{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 7210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006934812760055479, "grad_norm": 0.7157525954676491, "learning_rate": 1.3869625520110959e-08, "loss": 0.9952, "step": 1 }, { "epoch": 0.0013869625520110957, "grad_norm": 0.7365616469009997, "learning_rate": 2.7739251040221917e-08, "loss": 1.023, "step": 2 }, { "epoch": 0.0020804438280166435, "grad_norm": 0.9607770173503413, "learning_rate": 4.1608876560332874e-08, "loss": 1.0586, "step": 3 }, { "epoch": 0.0027739251040221915, "grad_norm": 0.693573066041827, "learning_rate": 5.5478502080443834e-08, "loss": 1.0157, "step": 4 }, { "epoch": 0.0034674063800277394, "grad_norm": 0.8555541552251046, "learning_rate": 6.93481276005548e-08, "loss": 1.0226, "step": 5 }, { "epoch": 0.004160887656033287, "grad_norm": 0.7857834335732481, "learning_rate": 8.321775312066575e-08, "loss": 1.1396, "step": 6 }, { "epoch": 0.0048543689320388345, "grad_norm": 0.7637361805161516, "learning_rate": 9.70873786407767e-08, "loss": 1.0319, "step": 7 }, { "epoch": 0.005547850208044383, "grad_norm": 0.8106483805629625, "learning_rate": 1.1095700416088767e-07, "loss": 1.0091, "step": 8 }, { "epoch": 0.0062413314840499305, "grad_norm": 0.7027160641608973, "learning_rate": 1.2482662968099862e-07, "loss": 1.0236, "step": 9 }, { "epoch": 0.006934812760055479, "grad_norm": 0.7601501023881959, "learning_rate": 1.386962552011096e-07, "loss": 1.0949, "step": 10 }, { "epoch": 0.0076282940360610264, "grad_norm": 0.7194724240032185, "learning_rate": 1.5256588072122053e-07, "loss": 1.0223, "step": 11 }, { "epoch": 0.008321775312066574, "grad_norm": 0.692926972724246, "learning_rate": 1.664355062413315e-07, "loss": 1.0643, "step": 12 }, { "epoch": 0.009015256588072122, "grad_norm": 0.7059539310308902, "learning_rate": 1.8030513176144244e-07, "loss": 1.0264, "step": 13 }, { "epoch": 0.009708737864077669, "grad_norm": 0.721973293622727, "learning_rate": 1.941747572815534e-07, "loss": 1.084, "step": 14 }, { "epoch": 0.010402219140083218, "grad_norm": 0.7019103873678585, "learning_rate": 2.080443828016644e-07, "loss": 1.0181, "step": 15 }, { "epoch": 0.011095700416088766, "grad_norm": 0.7399949683343555, "learning_rate": 2.2191400832177534e-07, "loss": 1.0885, "step": 16 }, { "epoch": 0.011789181692094313, "grad_norm": 0.6470462981174262, "learning_rate": 2.3578363384188628e-07, "loss": 0.9954, "step": 17 }, { "epoch": 0.012482662968099861, "grad_norm": 0.6664168439771029, "learning_rate": 2.4965325936199724e-07, "loss": 1.011, "step": 18 }, { "epoch": 0.013176144244105409, "grad_norm": 0.7287961223313654, "learning_rate": 2.635228848821082e-07, "loss": 1.0184, "step": 19 }, { "epoch": 0.013869625520110958, "grad_norm": 0.7288020368439174, "learning_rate": 2.773925104022192e-07, "loss": 1.1128, "step": 20 }, { "epoch": 0.014563106796116505, "grad_norm": 0.72738295640996, "learning_rate": 2.9126213592233014e-07, "loss": 1.0416, "step": 21 }, { "epoch": 0.015256588072122053, "grad_norm": 0.7148771515364, "learning_rate": 3.0513176144244106e-07, "loss": 1.0272, "step": 22 }, { "epoch": 0.0159500693481276, "grad_norm": 0.733164316796413, "learning_rate": 3.19001386962552e-07, "loss": 1.0815, "step": 23 }, { "epoch": 0.016643550624133148, "grad_norm": 0.7094878780073887, "learning_rate": 3.32871012482663e-07, "loss": 1.0639, "step": 24 }, { "epoch": 0.017337031900138695, "grad_norm": 0.7432640656740158, "learning_rate": 3.4674063800277396e-07, "loss": 1.0609, "step": 25 }, { "epoch": 0.018030513176144243, "grad_norm": 0.6591866553870518, "learning_rate": 3.6061026352288487e-07, "loss": 0.9867, "step": 26 }, { "epoch": 0.01872399445214979, "grad_norm": 0.739928838539585, "learning_rate": 3.7447988904299584e-07, "loss": 1.0453, "step": 27 }, { "epoch": 0.019417475728155338, "grad_norm": 0.6955722959021466, "learning_rate": 3.883495145631068e-07, "loss": 1.0749, "step": 28 }, { "epoch": 0.02011095700416089, "grad_norm": 0.7262106028517052, "learning_rate": 4.022191400832178e-07, "loss": 1.0681, "step": 29 }, { "epoch": 0.020804438280166437, "grad_norm": 0.8561322692646302, "learning_rate": 4.160887656033288e-07, "loss": 0.9954, "step": 30 }, { "epoch": 0.021497919556171984, "grad_norm": 0.6664500793057903, "learning_rate": 4.299583911234397e-07, "loss": 0.9347, "step": 31 }, { "epoch": 0.022191400832177532, "grad_norm": 0.6603222417367025, "learning_rate": 4.4382801664355067e-07, "loss": 0.9633, "step": 32 }, { "epoch": 0.02288488210818308, "grad_norm": 0.6832516318019873, "learning_rate": 4.5769764216366164e-07, "loss": 1.0212, "step": 33 }, { "epoch": 0.023578363384188627, "grad_norm": 0.6729259095802395, "learning_rate": 4.7156726768377255e-07, "loss": 1.0028, "step": 34 }, { "epoch": 0.024271844660194174, "grad_norm": 0.6652382495219267, "learning_rate": 4.854368932038835e-07, "loss": 0.9346, "step": 35 }, { "epoch": 0.024965325936199722, "grad_norm": 0.7912453925739037, "learning_rate": 4.993065187239945e-07, "loss": 1.0298, "step": 36 }, { "epoch": 0.02565880721220527, "grad_norm": 0.6861473122536618, "learning_rate": 5.131761442441055e-07, "loss": 1.0371, "step": 37 }, { "epoch": 0.026352288488210817, "grad_norm": 0.7424079199734541, "learning_rate": 5.270457697642164e-07, "loss": 1.0255, "step": 38 }, { "epoch": 0.027045769764216365, "grad_norm": 0.6702315409020495, "learning_rate": 5.409153952843274e-07, "loss": 0.9796, "step": 39 }, { "epoch": 0.027739251040221916, "grad_norm": 0.6783824210210769, "learning_rate": 5.547850208044384e-07, "loss": 0.999, "step": 40 }, { "epoch": 0.028432732316227463, "grad_norm": 0.7489503428254928, "learning_rate": 5.686546463245493e-07, "loss": 1.0615, "step": 41 }, { "epoch": 0.02912621359223301, "grad_norm": 0.6067754960950416, "learning_rate": 5.825242718446603e-07, "loss": 0.937, "step": 42 }, { "epoch": 0.029819694868238558, "grad_norm": 0.6626081964303163, "learning_rate": 5.963938973647713e-07, "loss": 0.9696, "step": 43 }, { "epoch": 0.030513176144244106, "grad_norm": 0.7780657091944089, "learning_rate": 6.102635228848821e-07, "loss": 1.0647, "step": 44 }, { "epoch": 0.031206657420249653, "grad_norm": 0.7294569731371983, "learning_rate": 6.241331484049931e-07, "loss": 1.0183, "step": 45 }, { "epoch": 0.0319001386962552, "grad_norm": 0.6815932670137421, "learning_rate": 6.38002773925104e-07, "loss": 1.0248, "step": 46 }, { "epoch": 0.03259361997226075, "grad_norm": 0.6449349915814664, "learning_rate": 6.51872399445215e-07, "loss": 0.9586, "step": 47 }, { "epoch": 0.033287101248266296, "grad_norm": 0.6652671386978704, "learning_rate": 6.65742024965326e-07, "loss": 1.0394, "step": 48 }, { "epoch": 0.03398058252427184, "grad_norm": 0.6877408009142694, "learning_rate": 6.79611650485437e-07, "loss": 1.0005, "step": 49 }, { "epoch": 0.03467406380027739, "grad_norm": 0.6882709742577532, "learning_rate": 6.934812760055479e-07, "loss": 1.0294, "step": 50 }, { "epoch": 0.03536754507628294, "grad_norm": 0.6902783516628763, "learning_rate": 7.073509015256588e-07, "loss": 0.9628, "step": 51 }, { "epoch": 0.036061026352288486, "grad_norm": 0.6488863656818254, "learning_rate": 7.212205270457697e-07, "loss": 1.046, "step": 52 }, { "epoch": 0.036754507628294034, "grad_norm": 0.7047050783633263, "learning_rate": 7.350901525658807e-07, "loss": 1.0596, "step": 53 }, { "epoch": 0.03744798890429958, "grad_norm": 0.6241294357822871, "learning_rate": 7.489597780859917e-07, "loss": 0.9602, "step": 54 }, { "epoch": 0.03814147018030513, "grad_norm": 0.632282875996663, "learning_rate": 7.628294036061026e-07, "loss": 0.9502, "step": 55 }, { "epoch": 0.038834951456310676, "grad_norm": 0.7755548632530835, "learning_rate": 7.766990291262136e-07, "loss": 1.0019, "step": 56 }, { "epoch": 0.03952843273231623, "grad_norm": 0.6816306677937195, "learning_rate": 7.905686546463247e-07, "loss": 1.0584, "step": 57 }, { "epoch": 0.04022191400832178, "grad_norm": 0.6869667996670505, "learning_rate": 8.044382801664357e-07, "loss": 1.0636, "step": 58 }, { "epoch": 0.040915395284327326, "grad_norm": 0.6844077521420684, "learning_rate": 8.183079056865466e-07, "loss": 1.0049, "step": 59 }, { "epoch": 0.04160887656033287, "grad_norm": 0.7295788670307389, "learning_rate": 8.321775312066576e-07, "loss": 0.9699, "step": 60 }, { "epoch": 0.04230235783633842, "grad_norm": 0.6344974379462622, "learning_rate": 8.460471567267684e-07, "loss": 0.981, "step": 61 }, { "epoch": 0.04299583911234397, "grad_norm": 0.6601574198153054, "learning_rate": 8.599167822468794e-07, "loss": 1.0688, "step": 62 }, { "epoch": 0.043689320388349516, "grad_norm": 0.6590855794771459, "learning_rate": 8.737864077669904e-07, "loss": 0.9909, "step": 63 }, { "epoch": 0.044382801664355064, "grad_norm": 0.6588222087234297, "learning_rate": 8.876560332871013e-07, "loss": 1.0405, "step": 64 }, { "epoch": 0.04507628294036061, "grad_norm": 0.670816840796301, "learning_rate": 9.015256588072123e-07, "loss": 1.0596, "step": 65 }, { "epoch": 0.04576976421636616, "grad_norm": 0.7457681485301779, "learning_rate": 9.153952843273233e-07, "loss": 1.058, "step": 66 }, { "epoch": 0.046463245492371706, "grad_norm": 0.6730150097176245, "learning_rate": 9.292649098474342e-07, "loss": 1.0267, "step": 67 }, { "epoch": 0.047156726768377254, "grad_norm": 0.6816232648360951, "learning_rate": 9.431345353675451e-07, "loss": 1.0179, "step": 68 }, { "epoch": 0.0478502080443828, "grad_norm": 0.6806863554772085, "learning_rate": 9.570041608876562e-07, "loss": 1.04, "step": 69 }, { "epoch": 0.04854368932038835, "grad_norm": 0.6722904703306192, "learning_rate": 9.70873786407767e-07, "loss": 1.0269, "step": 70 }, { "epoch": 0.049237170596393896, "grad_norm": 0.6307523759370202, "learning_rate": 9.84743411927878e-07, "loss": 0.963, "step": 71 }, { "epoch": 0.049930651872399444, "grad_norm": 0.690452055960613, "learning_rate": 9.98613037447989e-07, "loss": 0.9781, "step": 72 }, { "epoch": 0.05062413314840499, "grad_norm": 0.6280269384455514, "learning_rate": 1.0124826629680998e-06, "loss": 0.9447, "step": 73 }, { "epoch": 0.05131761442441054, "grad_norm": 0.6192366709339232, "learning_rate": 1.026352288488211e-06, "loss": 0.9516, "step": 74 }, { "epoch": 0.052011095700416086, "grad_norm": 0.6582915392388792, "learning_rate": 1.0402219140083218e-06, "loss": 0.9755, "step": 75 }, { "epoch": 0.052704576976421634, "grad_norm": 0.6247740559571989, "learning_rate": 1.0540915395284328e-06, "loss": 0.9531, "step": 76 }, { "epoch": 0.05339805825242718, "grad_norm": 0.8715757920926887, "learning_rate": 1.0679611650485437e-06, "loss": 0.9983, "step": 77 }, { "epoch": 0.05409153952843273, "grad_norm": 0.6230904855613747, "learning_rate": 1.0818307905686548e-06, "loss": 0.953, "step": 78 }, { "epoch": 0.054785020804438284, "grad_norm": 0.6999829088478414, "learning_rate": 1.0957004160887658e-06, "loss": 0.9008, "step": 79 }, { "epoch": 0.05547850208044383, "grad_norm": 0.652430833328779, "learning_rate": 1.1095700416088767e-06, "loss": 0.9974, "step": 80 }, { "epoch": 0.05617198335644938, "grad_norm": 0.6423534831220379, "learning_rate": 1.1234396671289876e-06, "loss": 0.9604, "step": 81 }, { "epoch": 0.056865464632454926, "grad_norm": 0.6038815891519935, "learning_rate": 1.1373092926490986e-06, "loss": 1.0135, "step": 82 }, { "epoch": 0.057558945908460474, "grad_norm": 0.6539199107847871, "learning_rate": 1.1511789181692095e-06, "loss": 0.9841, "step": 83 }, { "epoch": 0.05825242718446602, "grad_norm": 0.624314333182806, "learning_rate": 1.1650485436893206e-06, "loss": 1.0102, "step": 84 }, { "epoch": 0.05894590846047157, "grad_norm": 0.6458015688268974, "learning_rate": 1.1789181692094314e-06, "loss": 0.9964, "step": 85 }, { "epoch": 0.059639389736477116, "grad_norm": 0.6443432282203443, "learning_rate": 1.1927877947295425e-06, "loss": 0.9278, "step": 86 }, { "epoch": 0.060332871012482664, "grad_norm": 0.6562209616139189, "learning_rate": 1.2066574202496534e-06, "loss": 0.9583, "step": 87 }, { "epoch": 0.06102635228848821, "grad_norm": 0.5924884335147939, "learning_rate": 1.2205270457697642e-06, "loss": 0.9561, "step": 88 }, { "epoch": 0.06171983356449376, "grad_norm": 0.6744890114343025, "learning_rate": 1.2343966712898753e-06, "loss": 0.9438, "step": 89 }, { "epoch": 0.06241331484049931, "grad_norm": 0.6378026335633673, "learning_rate": 1.2482662968099862e-06, "loss": 1.007, "step": 90 }, { "epoch": 0.06310679611650485, "grad_norm": 0.597100408771657, "learning_rate": 1.2621359223300972e-06, "loss": 1.0074, "step": 91 }, { "epoch": 0.0638002773925104, "grad_norm": 0.6227995100866369, "learning_rate": 1.276005547850208e-06, "loss": 0.9649, "step": 92 }, { "epoch": 0.06449375866851595, "grad_norm": 0.5989746070664526, "learning_rate": 1.2898751733703192e-06, "loss": 0.9215, "step": 93 }, { "epoch": 0.0651872399445215, "grad_norm": 0.6702674731080163, "learning_rate": 1.30374479889043e-06, "loss": 0.9505, "step": 94 }, { "epoch": 0.06588072122052704, "grad_norm": 0.7085493524866154, "learning_rate": 1.3176144244105409e-06, "loss": 0.9768, "step": 95 }, { "epoch": 0.06657420249653259, "grad_norm": 0.7221883081125735, "learning_rate": 1.331484049930652e-06, "loss": 0.9901, "step": 96 }, { "epoch": 0.06726768377253814, "grad_norm": 0.5582461615156555, "learning_rate": 1.3453536754507628e-06, "loss": 0.9002, "step": 97 }, { "epoch": 0.06796116504854369, "grad_norm": 0.708061679173971, "learning_rate": 1.359223300970874e-06, "loss": 0.9588, "step": 98 }, { "epoch": 0.06865464632454923, "grad_norm": 0.622931134839384, "learning_rate": 1.3730929264909848e-06, "loss": 0.9352, "step": 99 }, { "epoch": 0.06934812760055478, "grad_norm": 0.5849387470895084, "learning_rate": 1.3869625520110958e-06, "loss": 0.9115, "step": 100 }, { "epoch": 0.07004160887656033, "grad_norm": 0.5523372708912899, "learning_rate": 1.4008321775312067e-06, "loss": 0.9002, "step": 101 }, { "epoch": 0.07073509015256588, "grad_norm": 0.5442657802829106, "learning_rate": 1.4147018030513176e-06, "loss": 0.9409, "step": 102 }, { "epoch": 0.07142857142857142, "grad_norm": 0.5167094979882029, "learning_rate": 1.4285714285714286e-06, "loss": 0.8774, "step": 103 }, { "epoch": 0.07212205270457697, "grad_norm": 0.4852800648235046, "learning_rate": 1.4424410540915395e-06, "loss": 0.8487, "step": 104 }, { "epoch": 0.07281553398058252, "grad_norm": 0.48008720213910333, "learning_rate": 1.4563106796116506e-06, "loss": 0.8546, "step": 105 }, { "epoch": 0.07350901525658807, "grad_norm": 0.5381970957355515, "learning_rate": 1.4701803051317614e-06, "loss": 0.9464, "step": 106 }, { "epoch": 0.07420249653259361, "grad_norm": 0.5183008486942954, "learning_rate": 1.4840499306518725e-06, "loss": 1.002, "step": 107 }, { "epoch": 0.07489597780859916, "grad_norm": 0.45637453449028764, "learning_rate": 1.4979195561719834e-06, "loss": 0.8192, "step": 108 }, { "epoch": 0.07558945908460471, "grad_norm": 0.4881631488510109, "learning_rate": 1.5117891816920942e-06, "loss": 0.8049, "step": 109 }, { "epoch": 0.07628294036061026, "grad_norm": 0.46223944342917794, "learning_rate": 1.5256588072122053e-06, "loss": 0.8434, "step": 110 }, { "epoch": 0.0769764216366158, "grad_norm": 0.462576284785379, "learning_rate": 1.5395284327323162e-06, "loss": 0.9045, "step": 111 }, { "epoch": 0.07766990291262135, "grad_norm": 0.4620343105037817, "learning_rate": 1.5533980582524272e-06, "loss": 0.8832, "step": 112 }, { "epoch": 0.07836338418862691, "grad_norm": 0.4863508425853165, "learning_rate": 1.5672676837725385e-06, "loss": 0.8998, "step": 113 }, { "epoch": 0.07905686546463246, "grad_norm": 0.4775428016819076, "learning_rate": 1.5811373092926494e-06, "loss": 0.898, "step": 114 }, { "epoch": 0.07975034674063801, "grad_norm": 0.4830885527898184, "learning_rate": 1.5950069348127602e-06, "loss": 0.9021, "step": 115 }, { "epoch": 0.08044382801664356, "grad_norm": 0.4869441989034695, "learning_rate": 1.6088765603328713e-06, "loss": 0.829, "step": 116 }, { "epoch": 0.0811373092926491, "grad_norm": 0.49452604561754526, "learning_rate": 1.6227461858529822e-06, "loss": 0.9164, "step": 117 }, { "epoch": 0.08183079056865465, "grad_norm": 0.4929690520607417, "learning_rate": 1.6366158113730932e-06, "loss": 0.9656, "step": 118 }, { "epoch": 0.0825242718446602, "grad_norm": 0.4534570660489125, "learning_rate": 1.650485436893204e-06, "loss": 0.8346, "step": 119 }, { "epoch": 0.08321775312066575, "grad_norm": 0.44896814698879267, "learning_rate": 1.6643550624133152e-06, "loss": 0.841, "step": 120 }, { "epoch": 0.0839112343966713, "grad_norm": 0.49925693078485434, "learning_rate": 1.678224687933426e-06, "loss": 0.8941, "step": 121 }, { "epoch": 0.08460471567267684, "grad_norm": 0.40632859239809527, "learning_rate": 1.6920943134535369e-06, "loss": 0.8305, "step": 122 }, { "epoch": 0.08529819694868239, "grad_norm": 0.45585401202974063, "learning_rate": 1.705963938973648e-06, "loss": 0.8915, "step": 123 }, { "epoch": 0.08599167822468794, "grad_norm": 0.5016058318337435, "learning_rate": 1.7198335644937588e-06, "loss": 0.8585, "step": 124 }, { "epoch": 0.08668515950069348, "grad_norm": 0.44681488446956685, "learning_rate": 1.73370319001387e-06, "loss": 0.8212, "step": 125 }, { "epoch": 0.08737864077669903, "grad_norm": 0.4294515784157133, "learning_rate": 1.7475728155339808e-06, "loss": 0.897, "step": 126 }, { "epoch": 0.08807212205270458, "grad_norm": 0.46211389282086196, "learning_rate": 1.7614424410540918e-06, "loss": 0.9046, "step": 127 }, { "epoch": 0.08876560332871013, "grad_norm": 0.47423509856518187, "learning_rate": 1.7753120665742027e-06, "loss": 0.958, "step": 128 }, { "epoch": 0.08945908460471567, "grad_norm": 0.40273149375453604, "learning_rate": 1.7891816920943136e-06, "loss": 0.7942, "step": 129 }, { "epoch": 0.09015256588072122, "grad_norm": 0.46600366567800233, "learning_rate": 1.8030513176144246e-06, "loss": 0.8167, "step": 130 }, { "epoch": 0.09084604715672677, "grad_norm": 0.4927096876158672, "learning_rate": 1.8169209431345355e-06, "loss": 0.8075, "step": 131 }, { "epoch": 0.09153952843273232, "grad_norm": 0.4235794052816225, "learning_rate": 1.8307905686546466e-06, "loss": 0.8225, "step": 132 }, { "epoch": 0.09223300970873786, "grad_norm": 0.42089854766081586, "learning_rate": 1.8446601941747574e-06, "loss": 0.8454, "step": 133 }, { "epoch": 0.09292649098474341, "grad_norm": 0.6510274398754528, "learning_rate": 1.8585298196948685e-06, "loss": 0.9127, "step": 134 }, { "epoch": 0.09361997226074896, "grad_norm": 0.41182494622092, "learning_rate": 1.8723994452149794e-06, "loss": 0.799, "step": 135 }, { "epoch": 0.09431345353675451, "grad_norm": 0.40906146194426646, "learning_rate": 1.8862690707350902e-06, "loss": 0.8329, "step": 136 }, { "epoch": 0.09500693481276005, "grad_norm": 0.417522501030148, "learning_rate": 1.9001386962552013e-06, "loss": 0.7889, "step": 137 }, { "epoch": 0.0957004160887656, "grad_norm": 0.4760702200066777, "learning_rate": 1.9140083217753124e-06, "loss": 0.9148, "step": 138 }, { "epoch": 0.09639389736477115, "grad_norm": 0.4409560943629456, "learning_rate": 1.927877947295423e-06, "loss": 0.841, "step": 139 }, { "epoch": 0.0970873786407767, "grad_norm": 0.4176130976832559, "learning_rate": 1.941747572815534e-06, "loss": 0.8701, "step": 140 }, { "epoch": 0.09778085991678225, "grad_norm": 0.6424846457242896, "learning_rate": 1.955617198335645e-06, "loss": 0.8193, "step": 141 }, { "epoch": 0.09847434119278779, "grad_norm": 0.4642123896065429, "learning_rate": 1.969486823855756e-06, "loss": 0.9056, "step": 142 }, { "epoch": 0.09916782246879334, "grad_norm": 0.4614601263209169, "learning_rate": 1.983356449375867e-06, "loss": 0.8136, "step": 143 }, { "epoch": 0.09986130374479889, "grad_norm": 0.386926521895237, "learning_rate": 1.997226074895978e-06, "loss": 0.7662, "step": 144 }, { "epoch": 0.10055478502080444, "grad_norm": 0.4323327978431683, "learning_rate": 2.011095700416089e-06, "loss": 0.7478, "step": 145 }, { "epoch": 0.10124826629680998, "grad_norm": 0.4048711893378798, "learning_rate": 2.0249653259361997e-06, "loss": 0.8134, "step": 146 }, { "epoch": 0.10194174757281553, "grad_norm": 0.38406656989226945, "learning_rate": 2.0388349514563107e-06, "loss": 0.8047, "step": 147 }, { "epoch": 0.10263522884882108, "grad_norm": 0.5988509236999207, "learning_rate": 2.052704576976422e-06, "loss": 0.8223, "step": 148 }, { "epoch": 0.10332871012482663, "grad_norm": 0.4361669841858213, "learning_rate": 2.066574202496533e-06, "loss": 0.8688, "step": 149 }, { "epoch": 0.10402219140083217, "grad_norm": 0.3824904511899543, "learning_rate": 2.0804438280166435e-06, "loss": 0.8234, "step": 150 }, { "epoch": 0.10471567267683772, "grad_norm": 0.4737732666224156, "learning_rate": 2.0943134535367546e-06, "loss": 0.8359, "step": 151 }, { "epoch": 0.10540915395284327, "grad_norm": 0.3686095534705079, "learning_rate": 2.1081830790568657e-06, "loss": 0.7598, "step": 152 }, { "epoch": 0.10610263522884882, "grad_norm": 0.40975105551135427, "learning_rate": 2.1220527045769763e-06, "loss": 0.7943, "step": 153 }, { "epoch": 0.10679611650485436, "grad_norm": 0.40738122820413203, "learning_rate": 2.1359223300970874e-06, "loss": 0.8312, "step": 154 }, { "epoch": 0.10748959778085991, "grad_norm": 0.41387940078095947, "learning_rate": 2.1497919556171985e-06, "loss": 0.8691, "step": 155 }, { "epoch": 0.10818307905686546, "grad_norm": 0.7368764876690216, "learning_rate": 2.1636615811373096e-06, "loss": 0.8323, "step": 156 }, { "epoch": 0.108876560332871, "grad_norm": 0.5345760890309688, "learning_rate": 2.17753120665742e-06, "loss": 0.9664, "step": 157 }, { "epoch": 0.10957004160887657, "grad_norm": 0.4131720622284066, "learning_rate": 2.1914008321775317e-06, "loss": 0.7368, "step": 158 }, { "epoch": 0.11026352288488211, "grad_norm": 0.37733498699117274, "learning_rate": 2.2052704576976423e-06, "loss": 0.7489, "step": 159 }, { "epoch": 0.11095700416088766, "grad_norm": 0.37270222722580476, "learning_rate": 2.2191400832177534e-06, "loss": 0.8096, "step": 160 }, { "epoch": 0.11165048543689321, "grad_norm": 0.4058821981599631, "learning_rate": 2.2330097087378645e-06, "loss": 0.7868, "step": 161 }, { "epoch": 0.11234396671289876, "grad_norm": 0.37964105090858946, "learning_rate": 2.246879334257975e-06, "loss": 0.7857, "step": 162 }, { "epoch": 0.1130374479889043, "grad_norm": 0.4115195795365264, "learning_rate": 2.2607489597780862e-06, "loss": 0.7654, "step": 163 }, { "epoch": 0.11373092926490985, "grad_norm": 0.45218882365511776, "learning_rate": 2.2746185852981973e-06, "loss": 0.8285, "step": 164 }, { "epoch": 0.1144244105409154, "grad_norm": 0.4283307454414197, "learning_rate": 2.2884882108183084e-06, "loss": 0.8012, "step": 165 }, { "epoch": 0.11511789181692095, "grad_norm": 0.37898471377966636, "learning_rate": 2.302357836338419e-06, "loss": 0.758, "step": 166 }, { "epoch": 0.1158113730929265, "grad_norm": 0.4050126785782291, "learning_rate": 2.31622746185853e-06, "loss": 0.8699, "step": 167 }, { "epoch": 0.11650485436893204, "grad_norm": 0.4475975205170183, "learning_rate": 2.330097087378641e-06, "loss": 0.7779, "step": 168 }, { "epoch": 0.11719833564493759, "grad_norm": 0.4089845343812099, "learning_rate": 2.343966712898752e-06, "loss": 0.6968, "step": 169 }, { "epoch": 0.11789181692094314, "grad_norm": 0.40249580215901204, "learning_rate": 2.357836338418863e-06, "loss": 0.7987, "step": 170 }, { "epoch": 0.11858529819694869, "grad_norm": 0.42775199041769624, "learning_rate": 2.371705963938974e-06, "loss": 0.7305, "step": 171 }, { "epoch": 0.11927877947295423, "grad_norm": 0.4017014413031526, "learning_rate": 2.385575589459085e-06, "loss": 0.7023, "step": 172 }, { "epoch": 0.11997226074895978, "grad_norm": 0.36589722717314527, "learning_rate": 2.3994452149791957e-06, "loss": 0.7588, "step": 173 }, { "epoch": 0.12066574202496533, "grad_norm": 0.6159883968990059, "learning_rate": 2.4133148404993067e-06, "loss": 0.7644, "step": 174 }, { "epoch": 0.12135922330097088, "grad_norm": 0.370659678569985, "learning_rate": 2.427184466019418e-06, "loss": 0.7615, "step": 175 }, { "epoch": 0.12205270457697642, "grad_norm": 0.42840548038986814, "learning_rate": 2.4410540915395285e-06, "loss": 0.7462, "step": 176 }, { "epoch": 0.12274618585298197, "grad_norm": 0.38134854059108925, "learning_rate": 2.4549237170596395e-06, "loss": 0.8004, "step": 177 }, { "epoch": 0.12343966712898752, "grad_norm": 0.44181385340495555, "learning_rate": 2.4687933425797506e-06, "loss": 0.7343, "step": 178 }, { "epoch": 0.12413314840499307, "grad_norm": 0.4224670939920034, "learning_rate": 2.4826629680998617e-06, "loss": 0.7752, "step": 179 }, { "epoch": 0.12482662968099861, "grad_norm": 0.36104530028643944, "learning_rate": 2.4965325936199723e-06, "loss": 0.7381, "step": 180 }, { "epoch": 0.12552011095700416, "grad_norm": 0.44005429051414174, "learning_rate": 2.5104022191400834e-06, "loss": 0.7387, "step": 181 }, { "epoch": 0.1262135922330097, "grad_norm": 0.5202060042479089, "learning_rate": 2.5242718446601945e-06, "loss": 0.8152, "step": 182 }, { "epoch": 0.12690707350901526, "grad_norm": 0.3859967108361711, "learning_rate": 2.538141470180305e-06, "loss": 0.7155, "step": 183 }, { "epoch": 0.1276005547850208, "grad_norm": 0.35556697652262786, "learning_rate": 2.552011095700416e-06, "loss": 0.7453, "step": 184 }, { "epoch": 0.12829403606102635, "grad_norm": 0.440562926518887, "learning_rate": 2.5658807212205273e-06, "loss": 0.8335, "step": 185 }, { "epoch": 0.1289875173370319, "grad_norm": 0.41641306148148105, "learning_rate": 2.5797503467406383e-06, "loss": 0.8509, "step": 186 }, { "epoch": 0.12968099861303745, "grad_norm": 0.43837368942438204, "learning_rate": 2.593619972260749e-06, "loss": 0.7717, "step": 187 }, { "epoch": 0.130374479889043, "grad_norm": 0.42195900363377553, "learning_rate": 2.60748959778086e-06, "loss": 0.7189, "step": 188 }, { "epoch": 0.13106796116504854, "grad_norm": 0.40185288621909915, "learning_rate": 2.621359223300971e-06, "loss": 0.7455, "step": 189 }, { "epoch": 0.1317614424410541, "grad_norm": 0.42981098774319276, "learning_rate": 2.6352288488210818e-06, "loss": 0.7765, "step": 190 }, { "epoch": 0.13245492371705964, "grad_norm": 0.41750835261774816, "learning_rate": 2.649098474341193e-06, "loss": 0.7922, "step": 191 }, { "epoch": 0.13314840499306518, "grad_norm": 0.4529181884791232, "learning_rate": 2.662968099861304e-06, "loss": 0.8118, "step": 192 }, { "epoch": 0.13384188626907073, "grad_norm": 0.4403920855769716, "learning_rate": 2.676837725381415e-06, "loss": 0.6822, "step": 193 }, { "epoch": 0.13453536754507628, "grad_norm": 0.38722274562520886, "learning_rate": 2.6907073509015257e-06, "loss": 0.7246, "step": 194 }, { "epoch": 0.13522884882108183, "grad_norm": 0.4714787915536026, "learning_rate": 2.7045769764216367e-06, "loss": 0.7957, "step": 195 }, { "epoch": 0.13592233009708737, "grad_norm": 0.42150957107657083, "learning_rate": 2.718446601941748e-06, "loss": 0.7434, "step": 196 }, { "epoch": 0.13661581137309292, "grad_norm": 0.5216718201159719, "learning_rate": 2.7323162274618584e-06, "loss": 0.7357, "step": 197 }, { "epoch": 0.13730929264909847, "grad_norm": 0.3878172010520359, "learning_rate": 2.7461858529819695e-06, "loss": 0.7129, "step": 198 }, { "epoch": 0.13800277392510402, "grad_norm": 0.47665747018323956, "learning_rate": 2.7600554785020806e-06, "loss": 0.7987, "step": 199 }, { "epoch": 0.13869625520110956, "grad_norm": 0.4128992259952167, "learning_rate": 2.7739251040221917e-06, "loss": 0.7241, "step": 200 }, { "epoch": 0.1393897364771151, "grad_norm": 0.4396050055925165, "learning_rate": 2.7877947295423023e-06, "loss": 0.797, "step": 201 }, { "epoch": 0.14008321775312066, "grad_norm": 0.4004272257193986, "learning_rate": 2.8016643550624134e-06, "loss": 0.7595, "step": 202 }, { "epoch": 0.1407766990291262, "grad_norm": 0.3710027468305455, "learning_rate": 2.8155339805825245e-06, "loss": 0.7712, "step": 203 }, { "epoch": 0.14147018030513175, "grad_norm": 0.3844728424521066, "learning_rate": 2.829403606102635e-06, "loss": 0.8086, "step": 204 }, { "epoch": 0.1421636615811373, "grad_norm": 0.4119582628671637, "learning_rate": 2.843273231622746e-06, "loss": 0.6663, "step": 205 }, { "epoch": 0.14285714285714285, "grad_norm": 0.4573212518732153, "learning_rate": 2.8571428571428573e-06, "loss": 0.8165, "step": 206 }, { "epoch": 0.1435506241331484, "grad_norm": 0.4050016525913954, "learning_rate": 2.8710124826629683e-06, "loss": 0.7374, "step": 207 }, { "epoch": 0.14424410540915394, "grad_norm": 0.40466688502984693, "learning_rate": 2.884882108183079e-06, "loss": 0.7314, "step": 208 }, { "epoch": 0.1449375866851595, "grad_norm": 0.4178896117001944, "learning_rate": 2.89875173370319e-06, "loss": 0.7978, "step": 209 }, { "epoch": 0.14563106796116504, "grad_norm": 0.3787764219509587, "learning_rate": 2.912621359223301e-06, "loss": 0.7783, "step": 210 }, { "epoch": 0.1463245492371706, "grad_norm": 0.45474506304874823, "learning_rate": 2.9264909847434118e-06, "loss": 0.6931, "step": 211 }, { "epoch": 0.14701803051317613, "grad_norm": 0.4671029494611205, "learning_rate": 2.940360610263523e-06, "loss": 0.7796, "step": 212 }, { "epoch": 0.14771151178918168, "grad_norm": 0.45047096334063835, "learning_rate": 2.954230235783634e-06, "loss": 0.8387, "step": 213 }, { "epoch": 0.14840499306518723, "grad_norm": 0.38803404878012426, "learning_rate": 2.968099861303745e-06, "loss": 0.6888, "step": 214 }, { "epoch": 0.14909847434119278, "grad_norm": 0.3943536793994258, "learning_rate": 2.9819694868238556e-06, "loss": 0.7059, "step": 215 }, { "epoch": 0.14979195561719832, "grad_norm": 0.40804727263899615, "learning_rate": 2.9958391123439667e-06, "loss": 0.7399, "step": 216 }, { "epoch": 0.15048543689320387, "grad_norm": 0.4103653828487261, "learning_rate": 3.0097087378640778e-06, "loss": 0.7439, "step": 217 }, { "epoch": 0.15117891816920942, "grad_norm": 0.4695251055502895, "learning_rate": 3.0235783633841884e-06, "loss": 0.7459, "step": 218 }, { "epoch": 0.15187239944521497, "grad_norm": 0.36981358397849723, "learning_rate": 3.0374479889042995e-06, "loss": 0.6762, "step": 219 }, { "epoch": 0.15256588072122051, "grad_norm": 0.45866020164912186, "learning_rate": 3.0513176144244106e-06, "loss": 0.697, "step": 220 }, { "epoch": 0.15325936199722606, "grad_norm": 0.4250655406061998, "learning_rate": 3.0651872399445217e-06, "loss": 0.7034, "step": 221 }, { "epoch": 0.1539528432732316, "grad_norm": 0.4986501675683246, "learning_rate": 3.0790568654646323e-06, "loss": 0.6957, "step": 222 }, { "epoch": 0.15464632454923716, "grad_norm": 0.4320858977385933, "learning_rate": 3.0929264909847434e-06, "loss": 0.6711, "step": 223 }, { "epoch": 0.1553398058252427, "grad_norm": 0.3905585489893339, "learning_rate": 3.1067961165048544e-06, "loss": 0.6946, "step": 224 }, { "epoch": 0.15603328710124825, "grad_norm": 0.5691385690117634, "learning_rate": 3.120665742024965e-06, "loss": 0.713, "step": 225 }, { "epoch": 0.15672676837725383, "grad_norm": 0.4018553467780068, "learning_rate": 3.134535367545077e-06, "loss": 0.7723, "step": 226 }, { "epoch": 0.15742024965325938, "grad_norm": 0.4341017943701606, "learning_rate": 3.1484049930651877e-06, "loss": 0.7205, "step": 227 }, { "epoch": 0.15811373092926492, "grad_norm": 0.34455071912195556, "learning_rate": 3.1622746185852987e-06, "loss": 0.6464, "step": 228 }, { "epoch": 0.15880721220527047, "grad_norm": 0.4890969754146096, "learning_rate": 3.17614424410541e-06, "loss": 0.7859, "step": 229 }, { "epoch": 0.15950069348127602, "grad_norm": 0.4372650975732535, "learning_rate": 3.1900138696255205e-06, "loss": 0.7823, "step": 230 }, { "epoch": 0.16019417475728157, "grad_norm": 0.3888984885548787, "learning_rate": 3.2038834951456315e-06, "loss": 0.7053, "step": 231 }, { "epoch": 0.1608876560332871, "grad_norm": 0.4441693569558216, "learning_rate": 3.2177531206657426e-06, "loss": 0.8003, "step": 232 }, { "epoch": 0.16158113730929266, "grad_norm": 0.45285413547070325, "learning_rate": 3.2316227461858537e-06, "loss": 0.7537, "step": 233 }, { "epoch": 0.1622746185852982, "grad_norm": 0.3764247733055762, "learning_rate": 3.2454923717059643e-06, "loss": 0.6972, "step": 234 }, { "epoch": 0.16296809986130376, "grad_norm": 0.3665788718235755, "learning_rate": 3.2593619972260754e-06, "loss": 0.7249, "step": 235 }, { "epoch": 0.1636615811373093, "grad_norm": 0.3686969545881345, "learning_rate": 3.2732316227461865e-06, "loss": 0.6693, "step": 236 }, { "epoch": 0.16435506241331485, "grad_norm": 0.3766909229645527, "learning_rate": 3.287101248266297e-06, "loss": 0.7325, "step": 237 }, { "epoch": 0.1650485436893204, "grad_norm": 0.42314048885712235, "learning_rate": 3.300970873786408e-06, "loss": 0.7642, "step": 238 }, { "epoch": 0.16574202496532595, "grad_norm": 0.4252000850307808, "learning_rate": 3.3148404993065193e-06, "loss": 0.677, "step": 239 }, { "epoch": 0.1664355062413315, "grad_norm": 0.38912020789020796, "learning_rate": 3.3287101248266303e-06, "loss": 0.7272, "step": 240 }, { "epoch": 0.16712898751733704, "grad_norm": 0.382307745645276, "learning_rate": 3.342579750346741e-06, "loss": 0.6513, "step": 241 }, { "epoch": 0.1678224687933426, "grad_norm": 0.4527453287260334, "learning_rate": 3.356449375866852e-06, "loss": 0.7548, "step": 242 }, { "epoch": 0.16851595006934814, "grad_norm": 0.38643663324749034, "learning_rate": 3.370319001386963e-06, "loss": 0.7054, "step": 243 }, { "epoch": 0.16920943134535368, "grad_norm": 0.434433662005887, "learning_rate": 3.3841886269070738e-06, "loss": 0.7379, "step": 244 }, { "epoch": 0.16990291262135923, "grad_norm": 0.41999529673546193, "learning_rate": 3.398058252427185e-06, "loss": 0.7369, "step": 245 }, { "epoch": 0.17059639389736478, "grad_norm": 0.40543406439208945, "learning_rate": 3.411927877947296e-06, "loss": 0.7346, "step": 246 }, { "epoch": 0.17128987517337033, "grad_norm": 0.44451792117385214, "learning_rate": 3.425797503467407e-06, "loss": 0.6726, "step": 247 }, { "epoch": 0.17198335644937587, "grad_norm": 0.4370238537183958, "learning_rate": 3.4396671289875176e-06, "loss": 0.7119, "step": 248 }, { "epoch": 0.17267683772538142, "grad_norm": 0.3618909161517529, "learning_rate": 3.4535367545076287e-06, "loss": 0.6303, "step": 249 }, { "epoch": 0.17337031900138697, "grad_norm": 0.4362454159660753, "learning_rate": 3.46740638002774e-06, "loss": 0.6872, "step": 250 }, { "epoch": 0.17406380027739252, "grad_norm": 0.40788222654825634, "learning_rate": 3.4812760055478504e-06, "loss": 0.7798, "step": 251 }, { "epoch": 0.17475728155339806, "grad_norm": 0.40646895325408444, "learning_rate": 3.4951456310679615e-06, "loss": 0.7273, "step": 252 }, { "epoch": 0.1754507628294036, "grad_norm": 0.3970649147498973, "learning_rate": 3.5090152565880726e-06, "loss": 0.7415, "step": 253 }, { "epoch": 0.17614424410540916, "grad_norm": 0.8738028260247339, "learning_rate": 3.5228848821081837e-06, "loss": 0.7357, "step": 254 }, { "epoch": 0.1768377253814147, "grad_norm": 0.4383888286551231, "learning_rate": 3.5367545076282943e-06, "loss": 0.7452, "step": 255 }, { "epoch": 0.17753120665742025, "grad_norm": 0.4430590745242103, "learning_rate": 3.5506241331484054e-06, "loss": 0.6778, "step": 256 }, { "epoch": 0.1782246879334258, "grad_norm": 0.39890463053500536, "learning_rate": 3.5644937586685165e-06, "loss": 0.7401, "step": 257 }, { "epoch": 0.17891816920943135, "grad_norm": 0.39432385105373763, "learning_rate": 3.578363384188627e-06, "loss": 0.6875, "step": 258 }, { "epoch": 0.1796116504854369, "grad_norm": 0.4614461680532051, "learning_rate": 3.592233009708738e-06, "loss": 0.7385, "step": 259 }, { "epoch": 0.18030513176144244, "grad_norm": 0.46226064481570694, "learning_rate": 3.6061026352288493e-06, "loss": 0.6958, "step": 260 }, { "epoch": 0.180998613037448, "grad_norm": 0.37990529350392716, "learning_rate": 3.6199722607489603e-06, "loss": 0.7773, "step": 261 }, { "epoch": 0.18169209431345354, "grad_norm": 0.40180373286019033, "learning_rate": 3.633841886269071e-06, "loss": 0.7238, "step": 262 }, { "epoch": 0.1823855755894591, "grad_norm": 0.40427696266686086, "learning_rate": 3.647711511789182e-06, "loss": 0.7307, "step": 263 }, { "epoch": 0.18307905686546463, "grad_norm": 0.5184525439199673, "learning_rate": 3.661581137309293e-06, "loss": 0.7426, "step": 264 }, { "epoch": 0.18377253814147018, "grad_norm": 0.43717853016028557, "learning_rate": 3.6754507628294038e-06, "loss": 0.7125, "step": 265 }, { "epoch": 0.18446601941747573, "grad_norm": 0.3735704789665749, "learning_rate": 3.689320388349515e-06, "loss": 0.6825, "step": 266 }, { "epoch": 0.18515950069348128, "grad_norm": 0.3950750781681265, "learning_rate": 3.703190013869626e-06, "loss": 0.6577, "step": 267 }, { "epoch": 0.18585298196948682, "grad_norm": 0.40969662640678584, "learning_rate": 3.717059639389737e-06, "loss": 0.6832, "step": 268 }, { "epoch": 0.18654646324549237, "grad_norm": 0.8433752708079951, "learning_rate": 3.7309292649098476e-06, "loss": 0.7512, "step": 269 }, { "epoch": 0.18723994452149792, "grad_norm": 0.37305785462119795, "learning_rate": 3.7447988904299587e-06, "loss": 0.74, "step": 270 }, { "epoch": 0.18793342579750347, "grad_norm": 0.3789588688093465, "learning_rate": 3.7586685159500698e-06, "loss": 0.7348, "step": 271 }, { "epoch": 0.18862690707350901, "grad_norm": 0.3616849761320114, "learning_rate": 3.7725381414701804e-06, "loss": 0.6887, "step": 272 }, { "epoch": 0.18932038834951456, "grad_norm": 0.5018112098723796, "learning_rate": 3.7864077669902915e-06, "loss": 0.6886, "step": 273 }, { "epoch": 0.1900138696255201, "grad_norm": 0.36718542081499966, "learning_rate": 3.8002773925104026e-06, "loss": 0.6481, "step": 274 }, { "epoch": 0.19070735090152566, "grad_norm": 0.3923365942952953, "learning_rate": 3.8141470180305136e-06, "loss": 0.733, "step": 275 }, { "epoch": 0.1914008321775312, "grad_norm": 0.3735527871806657, "learning_rate": 3.828016643550625e-06, "loss": 0.7018, "step": 276 }, { "epoch": 0.19209431345353675, "grad_norm": 0.4386730270594253, "learning_rate": 3.841886269070735e-06, "loss": 0.76, "step": 277 }, { "epoch": 0.1927877947295423, "grad_norm": 0.37686535934554893, "learning_rate": 3.855755894590846e-06, "loss": 0.6579, "step": 278 }, { "epoch": 0.19348127600554785, "grad_norm": 0.3866861443374644, "learning_rate": 3.8696255201109575e-06, "loss": 0.7, "step": 279 }, { "epoch": 0.1941747572815534, "grad_norm": 0.5290966775461156, "learning_rate": 3.883495145631068e-06, "loss": 0.7431, "step": 280 }, { "epoch": 0.19486823855755894, "grad_norm": 0.3962785445821349, "learning_rate": 3.897364771151179e-06, "loss": 0.7803, "step": 281 }, { "epoch": 0.1955617198335645, "grad_norm": 0.3879033584112287, "learning_rate": 3.91123439667129e-06, "loss": 0.6738, "step": 282 }, { "epoch": 0.19625520110957004, "grad_norm": 0.38041620346715294, "learning_rate": 3.925104022191401e-06, "loss": 0.6293, "step": 283 }, { "epoch": 0.19694868238557559, "grad_norm": 0.4648867288637665, "learning_rate": 3.938973647711512e-06, "loss": 0.7474, "step": 284 }, { "epoch": 0.19764216366158113, "grad_norm": 0.7660847778048131, "learning_rate": 3.952843273231623e-06, "loss": 0.7282, "step": 285 }, { "epoch": 0.19833564493758668, "grad_norm": 0.4086871296085051, "learning_rate": 3.966712898751734e-06, "loss": 0.6426, "step": 286 }, { "epoch": 0.19902912621359223, "grad_norm": 0.4164392495091013, "learning_rate": 3.980582524271845e-06, "loss": 0.7051, "step": 287 }, { "epoch": 0.19972260748959778, "grad_norm": 0.41106476170361106, "learning_rate": 3.994452149791956e-06, "loss": 0.6931, "step": 288 }, { "epoch": 0.20041608876560332, "grad_norm": 0.4077108909855236, "learning_rate": 4.0083217753120665e-06, "loss": 0.7315, "step": 289 }, { "epoch": 0.20110957004160887, "grad_norm": 0.3482064528393862, "learning_rate": 4.022191400832178e-06, "loss": 0.5911, "step": 290 }, { "epoch": 0.20180305131761442, "grad_norm": 0.37430326552593113, "learning_rate": 4.036061026352289e-06, "loss": 0.7335, "step": 291 }, { "epoch": 0.20249653259361997, "grad_norm": 0.40684380286537025, "learning_rate": 4.049930651872399e-06, "loss": 0.6855, "step": 292 }, { "epoch": 0.2031900138696255, "grad_norm": 0.360212933581968, "learning_rate": 4.063800277392511e-06, "loss": 0.6422, "step": 293 }, { "epoch": 0.20388349514563106, "grad_norm": 0.39792381205101135, "learning_rate": 4.0776699029126215e-06, "loss": 0.721, "step": 294 }, { "epoch": 0.2045769764216366, "grad_norm": 0.4453439910657429, "learning_rate": 4.091539528432732e-06, "loss": 0.7601, "step": 295 }, { "epoch": 0.20527045769764216, "grad_norm": 0.34831754314449653, "learning_rate": 4.105409153952844e-06, "loss": 0.7018, "step": 296 }, { "epoch": 0.2059639389736477, "grad_norm": 0.3898897307469491, "learning_rate": 4.119278779472954e-06, "loss": 0.6699, "step": 297 }, { "epoch": 0.20665742024965325, "grad_norm": 0.40060497596979705, "learning_rate": 4.133148404993066e-06, "loss": 0.7531, "step": 298 }, { "epoch": 0.2073509015256588, "grad_norm": 0.40441239042015287, "learning_rate": 4.1470180305131764e-06, "loss": 0.7485, "step": 299 }, { "epoch": 0.20804438280166435, "grad_norm": 0.4237334246726315, "learning_rate": 4.160887656033287e-06, "loss": 0.6922, "step": 300 }, { "epoch": 0.2087378640776699, "grad_norm": 0.3741007592381228, "learning_rate": 4.1747572815533986e-06, "loss": 0.6715, "step": 301 }, { "epoch": 0.20943134535367544, "grad_norm": 0.5389226606185195, "learning_rate": 4.188626907073509e-06, "loss": 0.6939, "step": 302 }, { "epoch": 0.210124826629681, "grad_norm": 0.3862773522692115, "learning_rate": 4.20249653259362e-06, "loss": 0.7199, "step": 303 }, { "epoch": 0.21081830790568654, "grad_norm": 0.39457682805996036, "learning_rate": 4.216366158113731e-06, "loss": 0.7211, "step": 304 }, { "epoch": 0.21151178918169208, "grad_norm": 0.3924237038966114, "learning_rate": 4.230235783633842e-06, "loss": 0.6458, "step": 305 }, { "epoch": 0.21220527045769763, "grad_norm": 0.41825071907435474, "learning_rate": 4.244105409153953e-06, "loss": 0.754, "step": 306 }, { "epoch": 0.21289875173370318, "grad_norm": 0.4297642196475537, "learning_rate": 4.257975034674064e-06, "loss": 0.6654, "step": 307 }, { "epoch": 0.21359223300970873, "grad_norm": 0.3520622437956551, "learning_rate": 4.271844660194175e-06, "loss": 0.643, "step": 308 }, { "epoch": 0.21428571428571427, "grad_norm": 0.4044597557522868, "learning_rate": 4.2857142857142855e-06, "loss": 0.7518, "step": 309 }, { "epoch": 0.21497919556171982, "grad_norm": 0.3713455415699211, "learning_rate": 4.299583911234397e-06, "loss": 0.6956, "step": 310 }, { "epoch": 0.21567267683772537, "grad_norm": 0.42093578154492384, "learning_rate": 4.313453536754508e-06, "loss": 0.6999, "step": 311 }, { "epoch": 0.21636615811373092, "grad_norm": 0.3948856956442142, "learning_rate": 4.327323162274619e-06, "loss": 0.7453, "step": 312 }, { "epoch": 0.21705963938973646, "grad_norm": 0.36924587597247616, "learning_rate": 4.34119278779473e-06, "loss": 0.7046, "step": 313 }, { "epoch": 0.217753120665742, "grad_norm": 0.37060128569933265, "learning_rate": 4.35506241331484e-06, "loss": 0.6666, "step": 314 }, { "epoch": 0.21844660194174756, "grad_norm": 0.3817264949494108, "learning_rate": 4.368932038834952e-06, "loss": 0.7588, "step": 315 }, { "epoch": 0.21914008321775313, "grad_norm": 0.44279359077385344, "learning_rate": 4.382801664355063e-06, "loss": 0.7247, "step": 316 }, { "epoch": 0.21983356449375868, "grad_norm": 0.41286292621557696, "learning_rate": 4.396671289875174e-06, "loss": 0.722, "step": 317 }, { "epoch": 0.22052704576976423, "grad_norm": 0.3665446610582214, "learning_rate": 4.410540915395285e-06, "loss": 0.7042, "step": 318 }, { "epoch": 0.22122052704576978, "grad_norm": 0.4064325674582958, "learning_rate": 4.424410540915396e-06, "loss": 0.6911, "step": 319 }, { "epoch": 0.22191400832177532, "grad_norm": 0.3527947659626731, "learning_rate": 4.438280166435507e-06, "loss": 0.6782, "step": 320 }, { "epoch": 0.22260748959778087, "grad_norm": 0.4122497012373624, "learning_rate": 4.4521497919556175e-06, "loss": 0.6382, "step": 321 }, { "epoch": 0.22330097087378642, "grad_norm": 0.4171792992825532, "learning_rate": 4.466019417475729e-06, "loss": 0.7077, "step": 322 }, { "epoch": 0.22399445214979197, "grad_norm": 0.4009641540334108, "learning_rate": 4.47988904299584e-06, "loss": 0.6752, "step": 323 }, { "epoch": 0.22468793342579751, "grad_norm": 0.3683855640327117, "learning_rate": 4.49375866851595e-06, "loss": 0.6811, "step": 324 }, { "epoch": 0.22538141470180306, "grad_norm": 0.33393299149542643, "learning_rate": 4.507628294036062e-06, "loss": 0.6586, "step": 325 }, { "epoch": 0.2260748959778086, "grad_norm": 0.6062995504813962, "learning_rate": 4.5214979195561724e-06, "loss": 0.6465, "step": 326 }, { "epoch": 0.22676837725381416, "grad_norm": 0.3973099576565524, "learning_rate": 4.535367545076284e-06, "loss": 0.7261, "step": 327 }, { "epoch": 0.2274618585298197, "grad_norm": 0.3908901589205917, "learning_rate": 4.5492371705963946e-06, "loss": 0.684, "step": 328 }, { "epoch": 0.22815533980582525, "grad_norm": 0.3775681435948479, "learning_rate": 4.563106796116505e-06, "loss": 0.673, "step": 329 }, { "epoch": 0.2288488210818308, "grad_norm": 0.3900280063158446, "learning_rate": 4.576976421636617e-06, "loss": 0.6897, "step": 330 }, { "epoch": 0.22954230235783635, "grad_norm": 0.37078725700628873, "learning_rate": 4.590846047156727e-06, "loss": 0.6874, "step": 331 }, { "epoch": 0.2302357836338419, "grad_norm": 0.38792361557953897, "learning_rate": 4.604715672676838e-06, "loss": 0.7129, "step": 332 }, { "epoch": 0.23092926490984744, "grad_norm": 0.38616002138886846, "learning_rate": 4.6185852981969495e-06, "loss": 0.6745, "step": 333 }, { "epoch": 0.231622746185853, "grad_norm": 0.32852732250748046, "learning_rate": 4.63245492371706e-06, "loss": 0.6635, "step": 334 }, { "epoch": 0.23231622746185854, "grad_norm": 0.4457611585675984, "learning_rate": 4.646324549237171e-06, "loss": 0.7158, "step": 335 }, { "epoch": 0.23300970873786409, "grad_norm": 0.40236756258934586, "learning_rate": 4.660194174757282e-06, "loss": 0.7616, "step": 336 }, { "epoch": 0.23370319001386963, "grad_norm": 0.3599443137364842, "learning_rate": 4.674063800277393e-06, "loss": 0.6883, "step": 337 }, { "epoch": 0.23439667128987518, "grad_norm": 0.41547414287949047, "learning_rate": 4.687933425797504e-06, "loss": 0.7555, "step": 338 }, { "epoch": 0.23509015256588073, "grad_norm": 0.37873565331028053, "learning_rate": 4.701803051317615e-06, "loss": 0.6402, "step": 339 }, { "epoch": 0.23578363384188628, "grad_norm": 0.39684279481484297, "learning_rate": 4.715672676837726e-06, "loss": 0.6315, "step": 340 }, { "epoch": 0.23647711511789182, "grad_norm": 0.6091528195070264, "learning_rate": 4.729542302357837e-06, "loss": 0.7367, "step": 341 }, { "epoch": 0.23717059639389737, "grad_norm": 0.6825847378059929, "learning_rate": 4.743411927877948e-06, "loss": 0.6655, "step": 342 }, { "epoch": 0.23786407766990292, "grad_norm": 0.43983874334583867, "learning_rate": 4.7572815533980585e-06, "loss": 0.6389, "step": 343 }, { "epoch": 0.23855755894590847, "grad_norm": 0.39966578241418427, "learning_rate": 4.77115117891817e-06, "loss": 0.7489, "step": 344 }, { "epoch": 0.239251040221914, "grad_norm": 0.43569735103122237, "learning_rate": 4.785020804438281e-06, "loss": 0.668, "step": 345 }, { "epoch": 0.23994452149791956, "grad_norm": 0.42173264369566943, "learning_rate": 4.798890429958391e-06, "loss": 0.6116, "step": 346 }, { "epoch": 0.2406380027739251, "grad_norm": 0.4233628116294417, "learning_rate": 4.812760055478503e-06, "loss": 0.6558, "step": 347 }, { "epoch": 0.24133148404993066, "grad_norm": 0.37286416211015233, "learning_rate": 4.8266296809986135e-06, "loss": 0.6448, "step": 348 }, { "epoch": 0.2420249653259362, "grad_norm": 0.5540498950332101, "learning_rate": 4.840499306518724e-06, "loss": 0.6845, "step": 349 }, { "epoch": 0.24271844660194175, "grad_norm": 0.39698146940563805, "learning_rate": 4.854368932038836e-06, "loss": 0.658, "step": 350 }, { "epoch": 0.2434119278779473, "grad_norm": 0.3760647404205541, "learning_rate": 4.868238557558946e-06, "loss": 0.6862, "step": 351 }, { "epoch": 0.24410540915395285, "grad_norm": 0.3821808346275461, "learning_rate": 4.882108183079057e-06, "loss": 0.6398, "step": 352 }, { "epoch": 0.2447988904299584, "grad_norm": 0.5129475677657719, "learning_rate": 4.895977808599168e-06, "loss": 0.6635, "step": 353 }, { "epoch": 0.24549237170596394, "grad_norm": 0.4342552380988674, "learning_rate": 4.909847434119279e-06, "loss": 0.6356, "step": 354 }, { "epoch": 0.2461858529819695, "grad_norm": 0.5138925585228701, "learning_rate": 4.9237170596393906e-06, "loss": 0.6717, "step": 355 }, { "epoch": 0.24687933425797504, "grad_norm": 0.3604123159012774, "learning_rate": 4.937586685159501e-06, "loss": 0.6659, "step": 356 }, { "epoch": 0.24757281553398058, "grad_norm": 0.3908145023887402, "learning_rate": 4.951456310679612e-06, "loss": 0.6907, "step": 357 }, { "epoch": 0.24826629680998613, "grad_norm": 0.49042164821212136, "learning_rate": 4.965325936199723e-06, "loss": 0.7564, "step": 358 }, { "epoch": 0.24895977808599168, "grad_norm": 0.3501434572941209, "learning_rate": 4.979195561719834e-06, "loss": 0.6338, "step": 359 }, { "epoch": 0.24965325936199723, "grad_norm": 0.3830884675531958, "learning_rate": 4.993065187239945e-06, "loss": 0.7155, "step": 360 }, { "epoch": 0.2503467406380028, "grad_norm": 0.3725694539787639, "learning_rate": 5.006934812760056e-06, "loss": 0.6294, "step": 361 }, { "epoch": 0.2510402219140083, "grad_norm": 0.34617132963619734, "learning_rate": 5.020804438280167e-06, "loss": 0.6415, "step": 362 }, { "epoch": 0.25173370319001387, "grad_norm": 0.3683478684346332, "learning_rate": 5.0346740638002775e-06, "loss": 0.6448, "step": 363 }, { "epoch": 0.2524271844660194, "grad_norm": 0.8710952742326363, "learning_rate": 5.048543689320389e-06, "loss": 0.6435, "step": 364 }, { "epoch": 0.25312066574202496, "grad_norm": 0.439240482058905, "learning_rate": 5.0624133148405e-06, "loss": 0.7665, "step": 365 }, { "epoch": 0.2538141470180305, "grad_norm": 0.33216127526038036, "learning_rate": 5.07628294036061e-06, "loss": 0.6297, "step": 366 }, { "epoch": 0.25450762829403606, "grad_norm": 0.5775981024418212, "learning_rate": 5.090152565880722e-06, "loss": 0.6666, "step": 367 }, { "epoch": 0.2552011095700416, "grad_norm": 0.38882938462702377, "learning_rate": 5.104022191400832e-06, "loss": 0.7085, "step": 368 }, { "epoch": 0.25589459084604715, "grad_norm": 0.33482058623909816, "learning_rate": 5.117891816920944e-06, "loss": 0.6187, "step": 369 }, { "epoch": 0.2565880721220527, "grad_norm": 0.33162870234823194, "learning_rate": 5.1317614424410545e-06, "loss": 0.6328, "step": 370 }, { "epoch": 0.25728155339805825, "grad_norm": 0.3896860210583244, "learning_rate": 5.145631067961165e-06, "loss": 0.6681, "step": 371 }, { "epoch": 0.2579750346740638, "grad_norm": 0.7454938688896714, "learning_rate": 5.159500693481277e-06, "loss": 0.6656, "step": 372 }, { "epoch": 0.25866851595006934, "grad_norm": 0.3747009984960349, "learning_rate": 5.173370319001387e-06, "loss": 0.6885, "step": 373 }, { "epoch": 0.2593619972260749, "grad_norm": 0.3963573968649995, "learning_rate": 5.187239944521498e-06, "loss": 0.7195, "step": 374 }, { "epoch": 0.26005547850208044, "grad_norm": 0.3238548069264673, "learning_rate": 5.2011095700416095e-06, "loss": 0.6326, "step": 375 }, { "epoch": 0.260748959778086, "grad_norm": 0.6022470252979756, "learning_rate": 5.21497919556172e-06, "loss": 0.7065, "step": 376 }, { "epoch": 0.26144244105409153, "grad_norm": 0.37864127639998046, "learning_rate": 5.228848821081831e-06, "loss": 0.6971, "step": 377 }, { "epoch": 0.2621359223300971, "grad_norm": 0.40581994582978925, "learning_rate": 5.242718446601942e-06, "loss": 0.6546, "step": 378 }, { "epoch": 0.26282940360610263, "grad_norm": 0.43523343135296255, "learning_rate": 5.256588072122053e-06, "loss": 0.6614, "step": 379 }, { "epoch": 0.2635228848821082, "grad_norm": 0.4429279022870644, "learning_rate": 5.2704576976421636e-06, "loss": 0.6547, "step": 380 }, { "epoch": 0.2642163661581137, "grad_norm": 0.34918946570697734, "learning_rate": 5.284327323162275e-06, "loss": 0.5975, "step": 381 }, { "epoch": 0.26490984743411927, "grad_norm": 0.3744351134584379, "learning_rate": 5.298196948682386e-06, "loss": 0.5928, "step": 382 }, { "epoch": 0.2656033287101248, "grad_norm": 0.3854516733573264, "learning_rate": 5.312066574202497e-06, "loss": 0.6574, "step": 383 }, { "epoch": 0.26629680998613037, "grad_norm": 0.3650426392099008, "learning_rate": 5.325936199722608e-06, "loss": 0.6325, "step": 384 }, { "epoch": 0.2669902912621359, "grad_norm": 0.3663888776279793, "learning_rate": 5.3398058252427185e-06, "loss": 0.6125, "step": 385 }, { "epoch": 0.26768377253814146, "grad_norm": 0.4243630833933148, "learning_rate": 5.35367545076283e-06, "loss": 0.6597, "step": 386 }, { "epoch": 0.268377253814147, "grad_norm": 0.3744910129430353, "learning_rate": 5.367545076282941e-06, "loss": 0.6109, "step": 387 }, { "epoch": 0.26907073509015256, "grad_norm": 0.34465466744243556, "learning_rate": 5.381414701803051e-06, "loss": 0.6416, "step": 388 }, { "epoch": 0.2697642163661581, "grad_norm": 0.41674605693511635, "learning_rate": 5.395284327323163e-06, "loss": 0.6233, "step": 389 }, { "epoch": 0.27045769764216365, "grad_norm": 0.38060760426150736, "learning_rate": 5.4091539528432735e-06, "loss": 0.7213, "step": 390 }, { "epoch": 0.2711511789181692, "grad_norm": 0.40123837020401165, "learning_rate": 5.423023578363384e-06, "loss": 0.6753, "step": 391 }, { "epoch": 0.27184466019417475, "grad_norm": 0.36451925116080663, "learning_rate": 5.436893203883496e-06, "loss": 0.6328, "step": 392 }, { "epoch": 0.2725381414701803, "grad_norm": 0.36749822505626917, "learning_rate": 5.450762829403606e-06, "loss": 0.6341, "step": 393 }, { "epoch": 0.27323162274618584, "grad_norm": 0.4096310577786831, "learning_rate": 5.464632454923717e-06, "loss": 0.6723, "step": 394 }, { "epoch": 0.2739251040221914, "grad_norm": 0.4237347819620758, "learning_rate": 5.478502080443828e-06, "loss": 0.7211, "step": 395 }, { "epoch": 0.27461858529819694, "grad_norm": 0.33936564676239156, "learning_rate": 5.492371705963939e-06, "loss": 0.6453, "step": 396 }, { "epoch": 0.2753120665742025, "grad_norm": 0.3658780802205608, "learning_rate": 5.5062413314840505e-06, "loss": 0.7085, "step": 397 }, { "epoch": 0.27600554785020803, "grad_norm": 0.3730632292217037, "learning_rate": 5.520110957004161e-06, "loss": 0.6712, "step": 398 }, { "epoch": 0.2766990291262136, "grad_norm": 0.3795485729408459, "learning_rate": 5.533980582524272e-06, "loss": 0.717, "step": 399 }, { "epoch": 0.27739251040221913, "grad_norm": 0.3774321745578124, "learning_rate": 5.547850208044383e-06, "loss": 0.6526, "step": 400 }, { "epoch": 0.2780859916782247, "grad_norm": 0.3746065542373678, "learning_rate": 5.561719833564494e-06, "loss": 0.654, "step": 401 }, { "epoch": 0.2787794729542302, "grad_norm": 0.39011429530147207, "learning_rate": 5.575589459084605e-06, "loss": 0.7052, "step": 402 }, { "epoch": 0.27947295423023577, "grad_norm": 0.4119729248892106, "learning_rate": 5.589459084604716e-06, "loss": 0.6616, "step": 403 }, { "epoch": 0.2801664355062413, "grad_norm": 0.4143740013594362, "learning_rate": 5.603328710124827e-06, "loss": 0.7257, "step": 404 }, { "epoch": 0.28085991678224687, "grad_norm": 0.41908972692371427, "learning_rate": 5.6171983356449374e-06, "loss": 0.7525, "step": 405 }, { "epoch": 0.2815533980582524, "grad_norm": 0.4165549426702378, "learning_rate": 5.631067961165049e-06, "loss": 0.6743, "step": 406 }, { "epoch": 0.28224687933425796, "grad_norm": 0.4154829042919571, "learning_rate": 5.6449375866851596e-06, "loss": 0.6862, "step": 407 }, { "epoch": 0.2829403606102635, "grad_norm": 0.5072686293952215, "learning_rate": 5.65880721220527e-06, "loss": 0.6137, "step": 408 }, { "epoch": 0.28363384188626906, "grad_norm": 0.37917909862888977, "learning_rate": 5.672676837725382e-06, "loss": 0.6845, "step": 409 }, { "epoch": 0.2843273231622746, "grad_norm": 0.3613478033568616, "learning_rate": 5.686546463245492e-06, "loss": 0.6435, "step": 410 }, { "epoch": 0.28502080443828015, "grad_norm": 0.4034578650552871, "learning_rate": 5.700416088765604e-06, "loss": 0.6732, "step": 411 }, { "epoch": 0.2857142857142857, "grad_norm": 0.3779341136244697, "learning_rate": 5.7142857142857145e-06, "loss": 0.6785, "step": 412 }, { "epoch": 0.28640776699029125, "grad_norm": 0.3793816313097279, "learning_rate": 5.728155339805825e-06, "loss": 0.5901, "step": 413 }, { "epoch": 0.2871012482662968, "grad_norm": 0.3964948080285677, "learning_rate": 5.742024965325937e-06, "loss": 0.7356, "step": 414 }, { "epoch": 0.28779472954230234, "grad_norm": 0.35077158308234324, "learning_rate": 5.755894590846047e-06, "loss": 0.6466, "step": 415 }, { "epoch": 0.2884882108183079, "grad_norm": 0.3737881400105352, "learning_rate": 5.769764216366158e-06, "loss": 0.7318, "step": 416 }, { "epoch": 0.28918169209431344, "grad_norm": 0.3420061085007688, "learning_rate": 5.7836338418862695e-06, "loss": 0.6436, "step": 417 }, { "epoch": 0.289875173370319, "grad_norm": 0.3835060764229847, "learning_rate": 5.79750346740638e-06, "loss": 0.7166, "step": 418 }, { "epoch": 0.29056865464632453, "grad_norm": 0.4041934642032845, "learning_rate": 5.811373092926491e-06, "loss": 0.6385, "step": 419 }, { "epoch": 0.2912621359223301, "grad_norm": 0.38980872524931653, "learning_rate": 5.825242718446602e-06, "loss": 0.6625, "step": 420 }, { "epoch": 0.2919556171983356, "grad_norm": 0.3926345259523608, "learning_rate": 5.839112343966713e-06, "loss": 0.6889, "step": 421 }, { "epoch": 0.2926490984743412, "grad_norm": 0.387207170404907, "learning_rate": 5.8529819694868235e-06, "loss": 0.6491, "step": 422 }, { "epoch": 0.2933425797503467, "grad_norm": 0.47073473437328256, "learning_rate": 5.866851595006935e-06, "loss": 0.7384, "step": 423 }, { "epoch": 0.29403606102635227, "grad_norm": 0.3829161705241466, "learning_rate": 5.880721220527046e-06, "loss": 0.6238, "step": 424 }, { "epoch": 0.2947295423023578, "grad_norm": 0.3749217356284979, "learning_rate": 5.894590846047157e-06, "loss": 0.6202, "step": 425 }, { "epoch": 0.29542302357836336, "grad_norm": 0.3795656266908337, "learning_rate": 5.908460471567268e-06, "loss": 0.5814, "step": 426 }, { "epoch": 0.2961165048543689, "grad_norm": 0.3352327571349482, "learning_rate": 5.9223300970873785e-06, "loss": 0.6219, "step": 427 }, { "epoch": 0.29680998613037446, "grad_norm": 0.45680581115880275, "learning_rate": 5.93619972260749e-06, "loss": 0.6776, "step": 428 }, { "epoch": 0.29750346740638, "grad_norm": 0.35900628684279645, "learning_rate": 5.950069348127601e-06, "loss": 0.5489, "step": 429 }, { "epoch": 0.29819694868238555, "grad_norm": 0.36206425800547715, "learning_rate": 5.963938973647711e-06, "loss": 0.659, "step": 430 }, { "epoch": 0.2988904299583911, "grad_norm": 0.37646322689185413, "learning_rate": 5.977808599167823e-06, "loss": 0.6916, "step": 431 }, { "epoch": 0.29958391123439665, "grad_norm": 0.37249710905483346, "learning_rate": 5.9916782246879334e-06, "loss": 0.6882, "step": 432 }, { "epoch": 0.3002773925104022, "grad_norm": 0.34689884606211574, "learning_rate": 6.005547850208044e-06, "loss": 0.6101, "step": 433 }, { "epoch": 0.30097087378640774, "grad_norm": 0.34496297242635743, "learning_rate": 6.0194174757281556e-06, "loss": 0.6395, "step": 434 }, { "epoch": 0.3016643550624133, "grad_norm": 0.3837297230084832, "learning_rate": 6.033287101248266e-06, "loss": 0.6713, "step": 435 }, { "epoch": 0.30235783633841884, "grad_norm": 0.3765465485638321, "learning_rate": 6.047156726768377e-06, "loss": 0.6289, "step": 436 }, { "epoch": 0.3030513176144244, "grad_norm": 0.38880642643366503, "learning_rate": 6.061026352288488e-06, "loss": 0.6224, "step": 437 }, { "epoch": 0.30374479889042993, "grad_norm": 0.3426690228325314, "learning_rate": 6.074895977808599e-06, "loss": 0.6254, "step": 438 }, { "epoch": 0.3044382801664355, "grad_norm": 0.36772812499968494, "learning_rate": 6.0887656033287105e-06, "loss": 0.5951, "step": 439 }, { "epoch": 0.30513176144244103, "grad_norm": 0.49899171788191177, "learning_rate": 6.102635228848821e-06, "loss": 0.6773, "step": 440 }, { "epoch": 0.3058252427184466, "grad_norm": 0.3834180695542531, "learning_rate": 6.116504854368932e-06, "loss": 0.7215, "step": 441 }, { "epoch": 0.3065187239944521, "grad_norm": 0.36106603703063916, "learning_rate": 6.130374479889043e-06, "loss": 0.6376, "step": 442 }, { "epoch": 0.30721220527045767, "grad_norm": 0.3710802833985127, "learning_rate": 6.144244105409154e-06, "loss": 0.6756, "step": 443 }, { "epoch": 0.3079056865464632, "grad_norm": 0.3823448067450143, "learning_rate": 6.158113730929265e-06, "loss": 0.6882, "step": 444 }, { "epoch": 0.30859916782246877, "grad_norm": 0.3625031250425025, "learning_rate": 6.171983356449376e-06, "loss": 0.5832, "step": 445 }, { "epoch": 0.3092926490984743, "grad_norm": 0.328308299367241, "learning_rate": 6.185852981969487e-06, "loss": 0.5977, "step": 446 }, { "epoch": 0.30998613037447986, "grad_norm": 0.34092776179197676, "learning_rate": 6.199722607489597e-06, "loss": 0.5933, "step": 447 }, { "epoch": 0.3106796116504854, "grad_norm": 0.42115580709489114, "learning_rate": 6.213592233009709e-06, "loss": 0.6959, "step": 448 }, { "epoch": 0.31137309292649096, "grad_norm": 0.38627872596079266, "learning_rate": 6.2274618585298195e-06, "loss": 0.6929, "step": 449 }, { "epoch": 0.3120665742024965, "grad_norm": 0.3259695951514646, "learning_rate": 6.24133148404993e-06, "loss": 0.6951, "step": 450 }, { "epoch": 0.3127600554785021, "grad_norm": 0.37187630418518997, "learning_rate": 6.2552011095700425e-06, "loss": 0.6646, "step": 451 }, { "epoch": 0.31345353675450766, "grad_norm": 0.3397739273074188, "learning_rate": 6.269070735090154e-06, "loss": 0.6664, "step": 452 }, { "epoch": 0.3141470180305132, "grad_norm": 0.3907456233974298, "learning_rate": 6.282940360610265e-06, "loss": 0.6513, "step": 453 }, { "epoch": 0.31484049930651875, "grad_norm": 0.33685206683928487, "learning_rate": 6.296809986130375e-06, "loss": 0.6131, "step": 454 }, { "epoch": 0.3155339805825243, "grad_norm": 0.5047864328561893, "learning_rate": 6.310679611650487e-06, "loss": 0.5541, "step": 455 }, { "epoch": 0.31622746185852985, "grad_norm": 0.3622958618519615, "learning_rate": 6.3245492371705975e-06, "loss": 0.6889, "step": 456 }, { "epoch": 0.3169209431345354, "grad_norm": 0.3706772455463886, "learning_rate": 6.338418862690708e-06, "loss": 0.7271, "step": 457 }, { "epoch": 0.31761442441054094, "grad_norm": 0.35327107215079073, "learning_rate": 6.35228848821082e-06, "loss": 0.6879, "step": 458 }, { "epoch": 0.3183079056865465, "grad_norm": 0.43921428412042124, "learning_rate": 6.36615811373093e-06, "loss": 0.6329, "step": 459 }, { "epoch": 0.31900138696255204, "grad_norm": 0.38468429534426213, "learning_rate": 6.380027739251041e-06, "loss": 0.6341, "step": 460 }, { "epoch": 0.3196948682385576, "grad_norm": 0.34535486919600455, "learning_rate": 6.393897364771152e-06, "loss": 0.6254, "step": 461 }, { "epoch": 0.32038834951456313, "grad_norm": 0.3635102675515314, "learning_rate": 6.407766990291263e-06, "loss": 0.6085, "step": 462 }, { "epoch": 0.3210818307905687, "grad_norm": 0.3638395408480024, "learning_rate": 6.421636615811374e-06, "loss": 0.6342, "step": 463 }, { "epoch": 0.3217753120665742, "grad_norm": 0.4335637077465101, "learning_rate": 6.435506241331485e-06, "loss": 0.6225, "step": 464 }, { "epoch": 0.3224687933425798, "grad_norm": 0.45472390117370814, "learning_rate": 6.449375866851596e-06, "loss": 0.6208, "step": 465 }, { "epoch": 0.3231622746185853, "grad_norm": 0.38973500636958547, "learning_rate": 6.463245492371707e-06, "loss": 0.6756, "step": 466 }, { "epoch": 0.32385575589459087, "grad_norm": 0.4103960632318212, "learning_rate": 6.477115117891818e-06, "loss": 0.6527, "step": 467 }, { "epoch": 0.3245492371705964, "grad_norm": 0.3870530980825968, "learning_rate": 6.490984743411929e-06, "loss": 0.6946, "step": 468 }, { "epoch": 0.32524271844660196, "grad_norm": 0.3788150859492285, "learning_rate": 6.50485436893204e-06, "loss": 0.659, "step": 469 }, { "epoch": 0.3259361997226075, "grad_norm": 0.36576904727712745, "learning_rate": 6.518723994452151e-06, "loss": 0.6075, "step": 470 }, { "epoch": 0.32662968099861306, "grad_norm": 0.38197827075641066, "learning_rate": 6.5325936199722614e-06, "loss": 0.6796, "step": 471 }, { "epoch": 0.3273231622746186, "grad_norm": 0.35198341424500457, "learning_rate": 6.546463245492373e-06, "loss": 0.6248, "step": 472 }, { "epoch": 0.32801664355062415, "grad_norm": 0.3545850515599311, "learning_rate": 6.560332871012484e-06, "loss": 0.6155, "step": 473 }, { "epoch": 0.3287101248266297, "grad_norm": 0.34892181852229914, "learning_rate": 6.574202496532594e-06, "loss": 0.6337, "step": 474 }, { "epoch": 0.32940360610263525, "grad_norm": 0.3499097210355433, "learning_rate": 6.588072122052706e-06, "loss": 0.609, "step": 475 }, { "epoch": 0.3300970873786408, "grad_norm": 0.3670643965397277, "learning_rate": 6.601941747572816e-06, "loss": 0.6147, "step": 476 }, { "epoch": 0.33079056865464634, "grad_norm": 0.36863552844334313, "learning_rate": 6.615811373092927e-06, "loss": 0.6348, "step": 477 }, { "epoch": 0.3314840499306519, "grad_norm": 0.47263933356334575, "learning_rate": 6.6296809986130385e-06, "loss": 0.6608, "step": 478 }, { "epoch": 0.33217753120665744, "grad_norm": 0.3895357643648736, "learning_rate": 6.643550624133149e-06, "loss": 0.6418, "step": 479 }, { "epoch": 0.332871012482663, "grad_norm": 0.48141877638096237, "learning_rate": 6.657420249653261e-06, "loss": 0.6115, "step": 480 }, { "epoch": 0.33356449375866853, "grad_norm": 0.36426459634017727, "learning_rate": 6.671289875173371e-06, "loss": 0.6569, "step": 481 }, { "epoch": 0.3342579750346741, "grad_norm": 0.35593652995273567, "learning_rate": 6.685159500693482e-06, "loss": 0.5888, "step": 482 }, { "epoch": 0.33495145631067963, "grad_norm": 0.3406668588484618, "learning_rate": 6.6990291262135935e-06, "loss": 0.6684, "step": 483 }, { "epoch": 0.3356449375866852, "grad_norm": 0.39969595043710154, "learning_rate": 6.712898751733704e-06, "loss": 0.5995, "step": 484 }, { "epoch": 0.3363384188626907, "grad_norm": 0.39264539382428926, "learning_rate": 6.726768377253815e-06, "loss": 0.6629, "step": 485 }, { "epoch": 0.33703190013869627, "grad_norm": 0.3546116714856907, "learning_rate": 6.740638002773926e-06, "loss": 0.7072, "step": 486 }, { "epoch": 0.3377253814147018, "grad_norm": 0.33241502234836434, "learning_rate": 6.754507628294037e-06, "loss": 0.6037, "step": 487 }, { "epoch": 0.33841886269070737, "grad_norm": 0.4155281461865311, "learning_rate": 6.7683772538141476e-06, "loss": 0.667, "step": 488 }, { "epoch": 0.3391123439667129, "grad_norm": 0.49872886700308955, "learning_rate": 6.782246879334259e-06, "loss": 0.6361, "step": 489 }, { "epoch": 0.33980582524271846, "grad_norm": 0.3648031387476405, "learning_rate": 6.79611650485437e-06, "loss": 0.6393, "step": 490 }, { "epoch": 0.340499306518724, "grad_norm": 0.3823274123017525, "learning_rate": 6.80998613037448e-06, "loss": 0.6614, "step": 491 }, { "epoch": 0.34119278779472956, "grad_norm": 0.36274873250507716, "learning_rate": 6.823855755894592e-06, "loss": 0.6473, "step": 492 }, { "epoch": 0.3418862690707351, "grad_norm": 0.3318657324302781, "learning_rate": 6.8377253814147025e-06, "loss": 0.6305, "step": 493 }, { "epoch": 0.34257975034674065, "grad_norm": 0.3737790320555523, "learning_rate": 6.851595006934814e-06, "loss": 0.6664, "step": 494 }, { "epoch": 0.3432732316227462, "grad_norm": 0.38558426487726943, "learning_rate": 6.865464632454925e-06, "loss": 0.638, "step": 495 }, { "epoch": 0.34396671289875175, "grad_norm": 0.3506520197301319, "learning_rate": 6.879334257975035e-06, "loss": 0.6817, "step": 496 }, { "epoch": 0.3446601941747573, "grad_norm": 0.3392313412226792, "learning_rate": 6.893203883495147e-06, "loss": 0.5512, "step": 497 }, { "epoch": 0.34535367545076284, "grad_norm": 0.43644671710710203, "learning_rate": 6.9070735090152574e-06, "loss": 0.6812, "step": 498 }, { "epoch": 0.3460471567267684, "grad_norm": 0.37686842175908253, "learning_rate": 6.920943134535368e-06, "loss": 0.6731, "step": 499 }, { "epoch": 0.34674063800277394, "grad_norm": 0.3750595228107398, "learning_rate": 6.93481276005548e-06, "loss": 0.6621, "step": 500 }, { "epoch": 0.3474341192787795, "grad_norm": 0.4494243935110913, "learning_rate": 6.94868238557559e-06, "loss": 0.6918, "step": 501 }, { "epoch": 0.34812760055478503, "grad_norm": 0.37332066624967647, "learning_rate": 6.962552011095701e-06, "loss": 0.6737, "step": 502 }, { "epoch": 0.3488210818307906, "grad_norm": 0.40971393810899, "learning_rate": 6.976421636615812e-06, "loss": 0.6566, "step": 503 }, { "epoch": 0.34951456310679613, "grad_norm": 0.3997993832631874, "learning_rate": 6.990291262135923e-06, "loss": 0.6523, "step": 504 }, { "epoch": 0.3502080443828017, "grad_norm": 0.3368593714753437, "learning_rate": 7.004160887656034e-06, "loss": 0.6393, "step": 505 }, { "epoch": 0.3509015256588072, "grad_norm": 0.39120053297204876, "learning_rate": 7.018030513176145e-06, "loss": 0.6642, "step": 506 }, { "epoch": 0.35159500693481277, "grad_norm": 0.40169017314000155, "learning_rate": 7.031900138696256e-06, "loss": 0.6212, "step": 507 }, { "epoch": 0.3522884882108183, "grad_norm": 0.35031468554255296, "learning_rate": 7.045769764216367e-06, "loss": 0.6441, "step": 508 }, { "epoch": 0.35298196948682387, "grad_norm": 0.35425545147392234, "learning_rate": 7.059639389736478e-06, "loss": 0.5838, "step": 509 }, { "epoch": 0.3536754507628294, "grad_norm": 0.349257127183559, "learning_rate": 7.073509015256589e-06, "loss": 0.5501, "step": 510 }, { "epoch": 0.35436893203883496, "grad_norm": 0.4173550075217959, "learning_rate": 7.0873786407767e-06, "loss": 0.653, "step": 511 }, { "epoch": 0.3550624133148405, "grad_norm": 0.35310911339201523, "learning_rate": 7.101248266296811e-06, "loss": 0.6148, "step": 512 }, { "epoch": 0.35575589459084606, "grad_norm": 0.3724106105149326, "learning_rate": 7.115117891816921e-06, "loss": 0.6372, "step": 513 }, { "epoch": 0.3564493758668516, "grad_norm": 0.3746910839190923, "learning_rate": 7.128987517337033e-06, "loss": 0.6611, "step": 514 }, { "epoch": 0.35714285714285715, "grad_norm": 0.5758133722145943, "learning_rate": 7.1428571428571436e-06, "loss": 0.6705, "step": 515 }, { "epoch": 0.3578363384188627, "grad_norm": 0.36184393180889357, "learning_rate": 7.156726768377254e-06, "loss": 0.6169, "step": 516 }, { "epoch": 0.35852981969486825, "grad_norm": 0.3890621880842859, "learning_rate": 7.170596393897366e-06, "loss": 0.6163, "step": 517 }, { "epoch": 0.3592233009708738, "grad_norm": 0.4163638890746787, "learning_rate": 7.184466019417476e-06, "loss": 0.6984, "step": 518 }, { "epoch": 0.35991678224687934, "grad_norm": 0.3678646271128726, "learning_rate": 7.198335644937587e-06, "loss": 0.5918, "step": 519 }, { "epoch": 0.3606102635228849, "grad_norm": 0.33526320878213023, "learning_rate": 7.2122052704576985e-06, "loss": 0.5814, "step": 520 }, { "epoch": 0.36130374479889044, "grad_norm": 0.3955745590406837, "learning_rate": 7.226074895977809e-06, "loss": 0.6317, "step": 521 }, { "epoch": 0.361997226074896, "grad_norm": 0.36053174111861014, "learning_rate": 7.239944521497921e-06, "loss": 0.6554, "step": 522 }, { "epoch": 0.36269070735090153, "grad_norm": 0.3564894704917171, "learning_rate": 7.253814147018031e-06, "loss": 0.6125, "step": 523 }, { "epoch": 0.3633841886269071, "grad_norm": 0.33316077155749546, "learning_rate": 7.267683772538142e-06, "loss": 0.6448, "step": 524 }, { "epoch": 0.3640776699029126, "grad_norm": 0.34744648435203684, "learning_rate": 7.2815533980582534e-06, "loss": 0.6217, "step": 525 }, { "epoch": 0.3647711511789182, "grad_norm": 0.33563035223656806, "learning_rate": 7.295423023578364e-06, "loss": 0.5824, "step": 526 }, { "epoch": 0.3654646324549237, "grad_norm": 0.3864415802342526, "learning_rate": 7.309292649098475e-06, "loss": 0.6363, "step": 527 }, { "epoch": 0.36615811373092927, "grad_norm": 0.3716995633111904, "learning_rate": 7.323162274618586e-06, "loss": 0.568, "step": 528 }, { "epoch": 0.3668515950069348, "grad_norm": 0.330666339054274, "learning_rate": 7.337031900138697e-06, "loss": 0.6534, "step": 529 }, { "epoch": 0.36754507628294036, "grad_norm": 0.3580854730202074, "learning_rate": 7.3509015256588075e-06, "loss": 0.7107, "step": 530 }, { "epoch": 0.3682385575589459, "grad_norm": 0.3263473830256869, "learning_rate": 7.364771151178919e-06, "loss": 0.6434, "step": 531 }, { "epoch": 0.36893203883495146, "grad_norm": 0.4165092917597925, "learning_rate": 7.37864077669903e-06, "loss": 0.6386, "step": 532 }, { "epoch": 0.369625520110957, "grad_norm": 0.3623391989288637, "learning_rate": 7.39251040221914e-06, "loss": 0.7066, "step": 533 }, { "epoch": 0.37031900138696255, "grad_norm": 0.4316033729520409, "learning_rate": 7.406380027739252e-06, "loss": 0.6199, "step": 534 }, { "epoch": 0.3710124826629681, "grad_norm": 0.39532364646227364, "learning_rate": 7.4202496532593625e-06, "loss": 0.6207, "step": 535 }, { "epoch": 0.37170596393897365, "grad_norm": 0.3705591524094307, "learning_rate": 7.434119278779474e-06, "loss": 0.6097, "step": 536 }, { "epoch": 0.3723994452149792, "grad_norm": 0.4032004372577973, "learning_rate": 7.447988904299585e-06, "loss": 0.6089, "step": 537 }, { "epoch": 0.37309292649098474, "grad_norm": 0.35382613007991376, "learning_rate": 7.461858529819695e-06, "loss": 0.5554, "step": 538 }, { "epoch": 0.3737864077669903, "grad_norm": 0.40564277826124845, "learning_rate": 7.475728155339807e-06, "loss": 0.6621, "step": 539 }, { "epoch": 0.37447988904299584, "grad_norm": 0.39302305957432315, "learning_rate": 7.489597780859917e-06, "loss": 0.6141, "step": 540 }, { "epoch": 0.3751733703190014, "grad_norm": 0.4214478728911209, "learning_rate": 7.503467406380028e-06, "loss": 0.6199, "step": 541 }, { "epoch": 0.37586685159500693, "grad_norm": 0.3628049824721709, "learning_rate": 7.5173370319001396e-06, "loss": 0.6865, "step": 542 }, { "epoch": 0.3765603328710125, "grad_norm": 0.4012988276021495, "learning_rate": 7.53120665742025e-06, "loss": 0.6015, "step": 543 }, { "epoch": 0.37725381414701803, "grad_norm": 0.35129563570548594, "learning_rate": 7.545076282940361e-06, "loss": 0.6366, "step": 544 }, { "epoch": 0.3779472954230236, "grad_norm": 0.3233070882668223, "learning_rate": 7.558945908460472e-06, "loss": 0.5471, "step": 545 }, { "epoch": 0.3786407766990291, "grad_norm": 0.369766794917211, "learning_rate": 7.572815533980583e-06, "loss": 0.6021, "step": 546 }, { "epoch": 0.37933425797503467, "grad_norm": 0.7027124293210516, "learning_rate": 7.586685159500694e-06, "loss": 0.5974, "step": 547 }, { "epoch": 0.3800277392510402, "grad_norm": 0.3508078980491497, "learning_rate": 7.600554785020805e-06, "loss": 0.6159, "step": 548 }, { "epoch": 0.38072122052704577, "grad_norm": 0.3447122332376924, "learning_rate": 7.614424410540916e-06, "loss": 0.625, "step": 549 }, { "epoch": 0.3814147018030513, "grad_norm": 0.36742652699902334, "learning_rate": 7.628294036061027e-06, "loss": 0.6799, "step": 550 }, { "epoch": 0.38210818307905686, "grad_norm": 0.39029782442341576, "learning_rate": 7.642163661581138e-06, "loss": 0.6576, "step": 551 }, { "epoch": 0.3828016643550624, "grad_norm": 0.3694327119768504, "learning_rate": 7.65603328710125e-06, "loss": 0.595, "step": 552 }, { "epoch": 0.38349514563106796, "grad_norm": 0.38181457868583935, "learning_rate": 7.66990291262136e-06, "loss": 0.6731, "step": 553 }, { "epoch": 0.3841886269070735, "grad_norm": 0.3585947759237626, "learning_rate": 7.68377253814147e-06, "loss": 0.5964, "step": 554 }, { "epoch": 0.38488210818307905, "grad_norm": 0.3711436288734461, "learning_rate": 7.697642163661582e-06, "loss": 0.6104, "step": 555 }, { "epoch": 0.3855755894590846, "grad_norm": 0.32718425674116086, "learning_rate": 7.711511789181692e-06, "loss": 0.6054, "step": 556 }, { "epoch": 0.38626907073509015, "grad_norm": 0.40139732659377303, "learning_rate": 7.725381414701804e-06, "loss": 0.6481, "step": 557 }, { "epoch": 0.3869625520110957, "grad_norm": 0.3373681258759682, "learning_rate": 7.739251040221915e-06, "loss": 0.6284, "step": 558 }, { "epoch": 0.38765603328710124, "grad_norm": 0.34892409173412364, "learning_rate": 7.753120665742025e-06, "loss": 0.6358, "step": 559 }, { "epoch": 0.3883495145631068, "grad_norm": 0.32381651149847956, "learning_rate": 7.766990291262136e-06, "loss": 0.614, "step": 560 }, { "epoch": 0.38904299583911234, "grad_norm": 0.5387017088611541, "learning_rate": 7.780859916782248e-06, "loss": 0.658, "step": 561 }, { "epoch": 0.3897364771151179, "grad_norm": 0.3644482405436353, "learning_rate": 7.794729542302358e-06, "loss": 0.567, "step": 562 }, { "epoch": 0.39042995839112343, "grad_norm": 0.3563311439881521, "learning_rate": 7.808599167822469e-06, "loss": 0.6288, "step": 563 }, { "epoch": 0.391123439667129, "grad_norm": 0.3647926377245987, "learning_rate": 7.82246879334258e-06, "loss": 0.6083, "step": 564 }, { "epoch": 0.39181692094313453, "grad_norm": 0.38784353311841147, "learning_rate": 7.83633841886269e-06, "loss": 0.6638, "step": 565 }, { "epoch": 0.3925104022191401, "grad_norm": 0.3627516423040043, "learning_rate": 7.850208044382802e-06, "loss": 0.629, "step": 566 }, { "epoch": 0.3932038834951456, "grad_norm": 0.3709618601859805, "learning_rate": 7.864077669902913e-06, "loss": 0.652, "step": 567 }, { "epoch": 0.39389736477115117, "grad_norm": 0.367595653098122, "learning_rate": 7.877947295423023e-06, "loss": 0.6287, "step": 568 }, { "epoch": 0.3945908460471567, "grad_norm": 0.37530796936263455, "learning_rate": 7.891816920943135e-06, "loss": 0.6012, "step": 569 }, { "epoch": 0.39528432732316227, "grad_norm": 0.3630061585167657, "learning_rate": 7.905686546463246e-06, "loss": 0.6399, "step": 570 }, { "epoch": 0.3959778085991678, "grad_norm": 0.4505300922846165, "learning_rate": 7.919556171983358e-06, "loss": 0.5954, "step": 571 }, { "epoch": 0.39667128987517336, "grad_norm": 0.5241355468709938, "learning_rate": 7.933425797503468e-06, "loss": 0.5999, "step": 572 }, { "epoch": 0.3973647711511789, "grad_norm": 0.6596590261106783, "learning_rate": 7.947295423023579e-06, "loss": 0.6565, "step": 573 }, { "epoch": 0.39805825242718446, "grad_norm": 0.3421029721387191, "learning_rate": 7.96116504854369e-06, "loss": 0.6338, "step": 574 }, { "epoch": 0.39875173370319, "grad_norm": 0.3818309929008366, "learning_rate": 7.9750346740638e-06, "loss": 0.5922, "step": 575 }, { "epoch": 0.39944521497919555, "grad_norm": 0.3370977908591438, "learning_rate": 7.988904299583912e-06, "loss": 0.5702, "step": 576 }, { "epoch": 0.4001386962552011, "grad_norm": 0.4148378425411659, "learning_rate": 8.002773925104023e-06, "loss": 0.6824, "step": 577 }, { "epoch": 0.40083217753120665, "grad_norm": 0.3559257403836171, "learning_rate": 8.016643550624133e-06, "loss": 0.5815, "step": 578 }, { "epoch": 0.4015256588072122, "grad_norm": 0.390233012997757, "learning_rate": 8.030513176144245e-06, "loss": 0.6534, "step": 579 }, { "epoch": 0.40221914008321774, "grad_norm": 0.35742287491923425, "learning_rate": 8.044382801664356e-06, "loss": 0.6017, "step": 580 }, { "epoch": 0.4029126213592233, "grad_norm": 0.37159797793736915, "learning_rate": 8.058252427184466e-06, "loss": 0.6613, "step": 581 }, { "epoch": 0.40360610263522884, "grad_norm": 0.3610419889811722, "learning_rate": 8.072122052704577e-06, "loss": 0.606, "step": 582 }, { "epoch": 0.4042995839112344, "grad_norm": 0.3213564416582794, "learning_rate": 8.085991678224689e-06, "loss": 0.6015, "step": 583 }, { "epoch": 0.40499306518723993, "grad_norm": 0.45625536503963865, "learning_rate": 8.099861303744799e-06, "loss": 0.6629, "step": 584 }, { "epoch": 0.4056865464632455, "grad_norm": 0.3453465437928662, "learning_rate": 8.11373092926491e-06, "loss": 0.5674, "step": 585 }, { "epoch": 0.406380027739251, "grad_norm": 0.38035315799004166, "learning_rate": 8.127600554785022e-06, "loss": 0.584, "step": 586 }, { "epoch": 0.4070735090152566, "grad_norm": 0.38026533204245244, "learning_rate": 8.141470180305131e-06, "loss": 0.6458, "step": 587 }, { "epoch": 0.4077669902912621, "grad_norm": 0.505652936690654, "learning_rate": 8.155339805825243e-06, "loss": 0.5777, "step": 588 }, { "epoch": 0.40846047156726767, "grad_norm": 0.3630153463847811, "learning_rate": 8.169209431345354e-06, "loss": 0.6462, "step": 589 }, { "epoch": 0.4091539528432732, "grad_norm": 0.44266297377838537, "learning_rate": 8.183079056865464e-06, "loss": 0.5683, "step": 590 }, { "epoch": 0.40984743411927876, "grad_norm": 0.3106147483563408, "learning_rate": 8.196948682385576e-06, "loss": 0.5945, "step": 591 }, { "epoch": 0.4105409153952843, "grad_norm": 0.35094025241047616, "learning_rate": 8.210818307905687e-06, "loss": 0.5985, "step": 592 }, { "epoch": 0.41123439667128986, "grad_norm": 0.3603207466662012, "learning_rate": 8.224687933425797e-06, "loss": 0.633, "step": 593 }, { "epoch": 0.4119278779472954, "grad_norm": 0.3955033347968271, "learning_rate": 8.238557558945909e-06, "loss": 0.6413, "step": 594 }, { "epoch": 0.41262135922330095, "grad_norm": 0.36000565915117894, "learning_rate": 8.25242718446602e-06, "loss": 0.6554, "step": 595 }, { "epoch": 0.4133148404993065, "grad_norm": 0.34329202409414755, "learning_rate": 8.266296809986132e-06, "loss": 0.6067, "step": 596 }, { "epoch": 0.41400832177531205, "grad_norm": 0.3128773773505845, "learning_rate": 8.280166435506241e-06, "loss": 0.59, "step": 597 }, { "epoch": 0.4147018030513176, "grad_norm": 0.4023227741583138, "learning_rate": 8.294036061026353e-06, "loss": 0.5882, "step": 598 }, { "epoch": 0.41539528432732314, "grad_norm": 0.3891397827002362, "learning_rate": 8.307905686546464e-06, "loss": 0.681, "step": 599 }, { "epoch": 0.4160887656033287, "grad_norm": 0.3720371127317372, "learning_rate": 8.321775312066574e-06, "loss": 0.618, "step": 600 }, { "epoch": 0.41678224687933424, "grad_norm": 0.35999608094557894, "learning_rate": 8.335644937586686e-06, "loss": 0.7464, "step": 601 }, { "epoch": 0.4174757281553398, "grad_norm": 0.3574131985513801, "learning_rate": 8.349514563106797e-06, "loss": 0.643, "step": 602 }, { "epoch": 0.41816920943134533, "grad_norm": 0.3901567629810148, "learning_rate": 8.363384188626907e-06, "loss": 0.6128, "step": 603 }, { "epoch": 0.4188626907073509, "grad_norm": 0.3121366927797027, "learning_rate": 8.377253814147018e-06, "loss": 0.5881, "step": 604 }, { "epoch": 0.41955617198335643, "grad_norm": 7.777784187646424, "learning_rate": 8.39112343966713e-06, "loss": 0.6798, "step": 605 }, { "epoch": 0.420249653259362, "grad_norm": 0.37283542003408443, "learning_rate": 8.40499306518724e-06, "loss": 0.5584, "step": 606 }, { "epoch": 0.4209431345353675, "grad_norm": 0.4133622213604055, "learning_rate": 8.418862690707351e-06, "loss": 0.6643, "step": 607 }, { "epoch": 0.42163661581137307, "grad_norm": 0.3683511948530525, "learning_rate": 8.432732316227463e-06, "loss": 0.6531, "step": 608 }, { "epoch": 0.4223300970873786, "grad_norm": 0.3424953294175224, "learning_rate": 8.446601941747573e-06, "loss": 0.6101, "step": 609 }, { "epoch": 0.42302357836338417, "grad_norm": 0.33224537868688725, "learning_rate": 8.460471567267684e-06, "loss": 0.6157, "step": 610 }, { "epoch": 0.4237170596393897, "grad_norm": 0.36205005650227956, "learning_rate": 8.474341192787796e-06, "loss": 0.5699, "step": 611 }, { "epoch": 0.42441054091539526, "grad_norm": 0.3583343067676532, "learning_rate": 8.488210818307905e-06, "loss": 0.6227, "step": 612 }, { "epoch": 0.4251040221914008, "grad_norm": 0.34724699542050486, "learning_rate": 8.502080443828017e-06, "loss": 0.618, "step": 613 }, { "epoch": 0.42579750346740636, "grad_norm": 0.3588354342662689, "learning_rate": 8.515950069348128e-06, "loss": 0.6174, "step": 614 }, { "epoch": 0.4264909847434119, "grad_norm": 0.3275400430150551, "learning_rate": 8.529819694868238e-06, "loss": 0.5574, "step": 615 }, { "epoch": 0.42718446601941745, "grad_norm": 0.3925014990832987, "learning_rate": 8.54368932038835e-06, "loss": 0.5617, "step": 616 }, { "epoch": 0.427877947295423, "grad_norm": 0.37400960767024355, "learning_rate": 8.557558945908461e-06, "loss": 0.644, "step": 617 }, { "epoch": 0.42857142857142855, "grad_norm": 0.33813596564574294, "learning_rate": 8.571428571428571e-06, "loss": 0.55, "step": 618 }, { "epoch": 0.4292649098474341, "grad_norm": 0.32360010608792594, "learning_rate": 8.585298196948682e-06, "loss": 0.6387, "step": 619 }, { "epoch": 0.42995839112343964, "grad_norm": 0.3525534513240279, "learning_rate": 8.599167822468794e-06, "loss": 0.6646, "step": 620 }, { "epoch": 0.4306518723994452, "grad_norm": 0.41679474612151624, "learning_rate": 8.613037447988904e-06, "loss": 0.636, "step": 621 }, { "epoch": 0.43134535367545074, "grad_norm": 0.3684984554461236, "learning_rate": 8.626907073509015e-06, "loss": 0.6316, "step": 622 }, { "epoch": 0.4320388349514563, "grad_norm": 0.3168038897942362, "learning_rate": 8.640776699029127e-06, "loss": 0.5389, "step": 623 }, { "epoch": 0.43273231622746183, "grad_norm": 0.3490477207986355, "learning_rate": 8.654646324549238e-06, "loss": 0.6288, "step": 624 }, { "epoch": 0.4334257975034674, "grad_norm": 0.305919050156397, "learning_rate": 8.668515950069348e-06, "loss": 0.5392, "step": 625 }, { "epoch": 0.43411927877947293, "grad_norm": 0.3309731211114467, "learning_rate": 8.68238557558946e-06, "loss": 0.6231, "step": 626 }, { "epoch": 0.4348127600554785, "grad_norm": 0.36591059473372156, "learning_rate": 8.696255201109571e-06, "loss": 0.6402, "step": 627 }, { "epoch": 0.435506241331484, "grad_norm": 0.38059866948975574, "learning_rate": 8.71012482662968e-06, "loss": 0.6703, "step": 628 }, { "epoch": 0.43619972260748957, "grad_norm": 0.3523777807232137, "learning_rate": 8.723994452149792e-06, "loss": 0.5613, "step": 629 }, { "epoch": 0.4368932038834951, "grad_norm": 0.5448638090461266, "learning_rate": 8.737864077669904e-06, "loss": 0.6361, "step": 630 }, { "epoch": 0.4375866851595007, "grad_norm": 0.3586127060142806, "learning_rate": 8.751733703190015e-06, "loss": 0.5749, "step": 631 }, { "epoch": 0.43828016643550627, "grad_norm": 0.3747722318686593, "learning_rate": 8.765603328710127e-06, "loss": 0.6181, "step": 632 }, { "epoch": 0.4389736477115118, "grad_norm": 0.3920628828663998, "learning_rate": 8.779472954230237e-06, "loss": 0.6673, "step": 633 }, { "epoch": 0.43966712898751736, "grad_norm": 0.37818417962654277, "learning_rate": 8.793342579750348e-06, "loss": 0.6165, "step": 634 }, { "epoch": 0.4403606102635229, "grad_norm": 0.349001623054722, "learning_rate": 8.80721220527046e-06, "loss": 0.578, "step": 635 }, { "epoch": 0.44105409153952846, "grad_norm": 0.3473644950842794, "learning_rate": 8.82108183079057e-06, "loss": 0.6422, "step": 636 }, { "epoch": 0.441747572815534, "grad_norm": 0.3842969319738716, "learning_rate": 8.834951456310681e-06, "loss": 0.6711, "step": 637 }, { "epoch": 0.44244105409153955, "grad_norm": 0.37756291569586337, "learning_rate": 8.848821081830792e-06, "loss": 0.6555, "step": 638 }, { "epoch": 0.4431345353675451, "grad_norm": 0.40117967766113527, "learning_rate": 8.862690707350902e-06, "loss": 0.6397, "step": 639 }, { "epoch": 0.44382801664355065, "grad_norm": 0.3405515190628938, "learning_rate": 8.876560332871014e-06, "loss": 0.6025, "step": 640 }, { "epoch": 0.4445214979195562, "grad_norm": 0.3725640263256581, "learning_rate": 8.890429958391125e-06, "loss": 0.5874, "step": 641 }, { "epoch": 0.44521497919556174, "grad_norm": 0.3428200244683828, "learning_rate": 8.904299583911235e-06, "loss": 0.5893, "step": 642 }, { "epoch": 0.4459084604715673, "grad_norm": 0.33252775268279633, "learning_rate": 8.918169209431346e-06, "loss": 0.6661, "step": 643 }, { "epoch": 0.44660194174757284, "grad_norm": 0.3830520211600166, "learning_rate": 8.932038834951458e-06, "loss": 0.5683, "step": 644 }, { "epoch": 0.4472954230235784, "grad_norm": 0.3715958039866768, "learning_rate": 8.945908460471568e-06, "loss": 0.6138, "step": 645 }, { "epoch": 0.44798890429958393, "grad_norm": 0.35265837994315496, "learning_rate": 8.95977808599168e-06, "loss": 0.5994, "step": 646 }, { "epoch": 0.4486823855755895, "grad_norm": 0.33039197619618926, "learning_rate": 8.97364771151179e-06, "loss": 0.6264, "step": 647 }, { "epoch": 0.44937586685159503, "grad_norm": 0.384641530384446, "learning_rate": 8.9875173370319e-06, "loss": 0.642, "step": 648 }, { "epoch": 0.4500693481276006, "grad_norm": 0.3607597153002498, "learning_rate": 9.001386962552012e-06, "loss": 0.6395, "step": 649 }, { "epoch": 0.4507628294036061, "grad_norm": 0.35606092385216564, "learning_rate": 9.015256588072124e-06, "loss": 0.6401, "step": 650 }, { "epoch": 0.45145631067961167, "grad_norm": 0.4090353440808489, "learning_rate": 9.029126213592233e-06, "loss": 0.5907, "step": 651 }, { "epoch": 0.4521497919556172, "grad_norm": 0.37308131460336813, "learning_rate": 9.042995839112345e-06, "loss": 0.633, "step": 652 }, { "epoch": 0.45284327323162277, "grad_norm": 0.3430242542135983, "learning_rate": 9.056865464632456e-06, "loss": 0.6001, "step": 653 }, { "epoch": 0.4535367545076283, "grad_norm": 0.36017796963933435, "learning_rate": 9.070735090152568e-06, "loss": 0.6468, "step": 654 }, { "epoch": 0.45423023578363386, "grad_norm": 0.4294104559681809, "learning_rate": 9.084604715672678e-06, "loss": 0.6611, "step": 655 }, { "epoch": 0.4549237170596394, "grad_norm": 0.37074717444942756, "learning_rate": 9.098474341192789e-06, "loss": 0.6274, "step": 656 }, { "epoch": 0.45561719833564496, "grad_norm": 0.34001423039867223, "learning_rate": 9.1123439667129e-06, "loss": 0.5995, "step": 657 }, { "epoch": 0.4563106796116505, "grad_norm": 0.3601282007392672, "learning_rate": 9.12621359223301e-06, "loss": 0.6339, "step": 658 }, { "epoch": 0.45700416088765605, "grad_norm": 0.5888944383430742, "learning_rate": 9.140083217753122e-06, "loss": 0.6437, "step": 659 }, { "epoch": 0.4576976421636616, "grad_norm": 0.3412602724562163, "learning_rate": 9.153952843273233e-06, "loss": 0.599, "step": 660 }, { "epoch": 0.45839112343966715, "grad_norm": 0.37562759117690714, "learning_rate": 9.167822468793343e-06, "loss": 0.6625, "step": 661 }, { "epoch": 0.4590846047156727, "grad_norm": 0.31646784457755045, "learning_rate": 9.181692094313455e-06, "loss": 0.6401, "step": 662 }, { "epoch": 0.45977808599167824, "grad_norm": 0.3100306138668391, "learning_rate": 9.195561719833566e-06, "loss": 0.6041, "step": 663 }, { "epoch": 0.4604715672676838, "grad_norm": 0.35226961678368174, "learning_rate": 9.209431345353676e-06, "loss": 0.5334, "step": 664 }, { "epoch": 0.46116504854368934, "grad_norm": 0.3410210807636282, "learning_rate": 9.223300970873788e-06, "loss": 0.5999, "step": 665 }, { "epoch": 0.4618585298196949, "grad_norm": 0.3801675232094619, "learning_rate": 9.237170596393899e-06, "loss": 0.6375, "step": 666 }, { "epoch": 0.46255201109570043, "grad_norm": 0.364125012519612, "learning_rate": 9.251040221914009e-06, "loss": 0.6122, "step": 667 }, { "epoch": 0.463245492371706, "grad_norm": 0.3461524800556262, "learning_rate": 9.26490984743412e-06, "loss": 0.6194, "step": 668 }, { "epoch": 0.46393897364771153, "grad_norm": 0.3660251738461281, "learning_rate": 9.278779472954232e-06, "loss": 0.6357, "step": 669 }, { "epoch": 0.4646324549237171, "grad_norm": 0.3402496059048011, "learning_rate": 9.292649098474342e-06, "loss": 0.643, "step": 670 }, { "epoch": 0.4653259361997226, "grad_norm": 0.40563360025585726, "learning_rate": 9.306518723994453e-06, "loss": 0.5717, "step": 671 }, { "epoch": 0.46601941747572817, "grad_norm": 0.36925076136939955, "learning_rate": 9.320388349514565e-06, "loss": 0.6217, "step": 672 }, { "epoch": 0.4667128987517337, "grad_norm": 0.3478622265674262, "learning_rate": 9.334257975034674e-06, "loss": 0.581, "step": 673 }, { "epoch": 0.46740638002773927, "grad_norm": 0.3317012135835466, "learning_rate": 9.348127600554786e-06, "loss": 0.5265, "step": 674 }, { "epoch": 0.4680998613037448, "grad_norm": 0.36481458204878303, "learning_rate": 9.361997226074897e-06, "loss": 0.6234, "step": 675 }, { "epoch": 0.46879334257975036, "grad_norm": 0.3447916072991981, "learning_rate": 9.375866851595007e-06, "loss": 0.5516, "step": 676 }, { "epoch": 0.4694868238557559, "grad_norm": 0.3133773887900021, "learning_rate": 9.389736477115119e-06, "loss": 0.5596, "step": 677 }, { "epoch": 0.47018030513176146, "grad_norm": 0.32308806295093884, "learning_rate": 9.40360610263523e-06, "loss": 0.5802, "step": 678 }, { "epoch": 0.470873786407767, "grad_norm": 0.3481099319108013, "learning_rate": 9.41747572815534e-06, "loss": 0.669, "step": 679 }, { "epoch": 0.47156726768377255, "grad_norm": 0.4351725032471424, "learning_rate": 9.431345353675451e-06, "loss": 0.7111, "step": 680 }, { "epoch": 0.4722607489597781, "grad_norm": 0.36114338628781123, "learning_rate": 9.445214979195563e-06, "loss": 0.6631, "step": 681 }, { "epoch": 0.47295423023578365, "grad_norm": 0.3800252832289555, "learning_rate": 9.459084604715674e-06, "loss": 0.6906, "step": 682 }, { "epoch": 0.4736477115117892, "grad_norm": 0.38194549464555544, "learning_rate": 9.472954230235784e-06, "loss": 0.603, "step": 683 }, { "epoch": 0.47434119278779474, "grad_norm": 0.36617573291203553, "learning_rate": 9.486823855755896e-06, "loss": 0.6271, "step": 684 }, { "epoch": 0.4750346740638003, "grad_norm": 0.3264948505149436, "learning_rate": 9.500693481276007e-06, "loss": 0.5752, "step": 685 }, { "epoch": 0.47572815533980584, "grad_norm": 0.3501432474989948, "learning_rate": 9.514563106796117e-06, "loss": 0.6722, "step": 686 }, { "epoch": 0.4764216366158114, "grad_norm": 0.35169803665934024, "learning_rate": 9.528432732316229e-06, "loss": 0.5869, "step": 687 }, { "epoch": 0.47711511789181693, "grad_norm": 0.31858527501959116, "learning_rate": 9.54230235783634e-06, "loss": 0.5992, "step": 688 }, { "epoch": 0.4778085991678225, "grad_norm": 0.3300810353931968, "learning_rate": 9.55617198335645e-06, "loss": 0.5881, "step": 689 }, { "epoch": 0.478502080443828, "grad_norm": 0.38880033996386565, "learning_rate": 9.570041608876561e-06, "loss": 0.6189, "step": 690 }, { "epoch": 0.4791955617198336, "grad_norm": 0.36418060142418335, "learning_rate": 9.583911234396673e-06, "loss": 0.5857, "step": 691 }, { "epoch": 0.4798890429958391, "grad_norm": 0.3540058535907166, "learning_rate": 9.597780859916783e-06, "loss": 0.5957, "step": 692 }, { "epoch": 0.48058252427184467, "grad_norm": 0.31340466492363656, "learning_rate": 9.611650485436894e-06, "loss": 0.6161, "step": 693 }, { "epoch": 0.4812760055478502, "grad_norm": 0.4557610208993212, "learning_rate": 9.625520110957006e-06, "loss": 0.7067, "step": 694 }, { "epoch": 0.48196948682385576, "grad_norm": 0.38295004302091745, "learning_rate": 9.639389736477115e-06, "loss": 0.5717, "step": 695 }, { "epoch": 0.4826629680998613, "grad_norm": 0.37190722678317617, "learning_rate": 9.653259361997227e-06, "loss": 0.6014, "step": 696 }, { "epoch": 0.48335644937586686, "grad_norm": 0.36611389398482685, "learning_rate": 9.667128987517338e-06, "loss": 0.6137, "step": 697 }, { "epoch": 0.4840499306518724, "grad_norm": 0.32538737533209905, "learning_rate": 9.680998613037448e-06, "loss": 0.5112, "step": 698 }, { "epoch": 0.48474341192787795, "grad_norm": 0.3463470381288639, "learning_rate": 9.69486823855756e-06, "loss": 0.6678, "step": 699 }, { "epoch": 0.4854368932038835, "grad_norm": 0.3824644240082401, "learning_rate": 9.708737864077671e-06, "loss": 0.7011, "step": 700 }, { "epoch": 0.48613037447988905, "grad_norm": 0.34319878679914073, "learning_rate": 9.722607489597781e-06, "loss": 0.6133, "step": 701 }, { "epoch": 0.4868238557558946, "grad_norm": 0.33961240170897133, "learning_rate": 9.736477115117893e-06, "loss": 0.617, "step": 702 }, { "epoch": 0.48751733703190014, "grad_norm": 0.36115468346237317, "learning_rate": 9.750346740638004e-06, "loss": 0.5648, "step": 703 }, { "epoch": 0.4882108183079057, "grad_norm": 0.34936958202921586, "learning_rate": 9.764216366158114e-06, "loss": 0.6263, "step": 704 }, { "epoch": 0.48890429958391124, "grad_norm": 0.4050860259334066, "learning_rate": 9.778085991678225e-06, "loss": 0.6111, "step": 705 }, { "epoch": 0.4895977808599168, "grad_norm": 0.32637207500367693, "learning_rate": 9.791955617198337e-06, "loss": 0.5298, "step": 706 }, { "epoch": 0.49029126213592233, "grad_norm": 0.32587264169952956, "learning_rate": 9.805825242718447e-06, "loss": 0.5618, "step": 707 }, { "epoch": 0.4909847434119279, "grad_norm": 0.34778623114061646, "learning_rate": 9.819694868238558e-06, "loss": 0.6204, "step": 708 }, { "epoch": 0.49167822468793343, "grad_norm": 0.36103341538436834, "learning_rate": 9.83356449375867e-06, "loss": 0.6517, "step": 709 }, { "epoch": 0.492371705963939, "grad_norm": 0.33128531808851164, "learning_rate": 9.847434119278781e-06, "loss": 0.5713, "step": 710 }, { "epoch": 0.4930651872399445, "grad_norm": 0.3784980712716657, "learning_rate": 9.861303744798891e-06, "loss": 0.6977, "step": 711 }, { "epoch": 0.49375866851595007, "grad_norm": 0.3402060319209778, "learning_rate": 9.875173370319002e-06, "loss": 0.6149, "step": 712 }, { "epoch": 0.4944521497919556, "grad_norm": 0.3382292644840488, "learning_rate": 9.889042995839114e-06, "loss": 0.5593, "step": 713 }, { "epoch": 0.49514563106796117, "grad_norm": 0.33045995267051376, "learning_rate": 9.902912621359224e-06, "loss": 0.595, "step": 714 }, { "epoch": 0.4958391123439667, "grad_norm": 0.3522171989161278, "learning_rate": 9.916782246879335e-06, "loss": 0.6228, "step": 715 }, { "epoch": 0.49653259361997226, "grad_norm": 0.3477928080599467, "learning_rate": 9.930651872399447e-06, "loss": 0.6339, "step": 716 }, { "epoch": 0.4972260748959778, "grad_norm": 0.38447654259666586, "learning_rate": 9.944521497919557e-06, "loss": 0.6673, "step": 717 }, { "epoch": 0.49791955617198336, "grad_norm": 0.32543806149067384, "learning_rate": 9.958391123439668e-06, "loss": 0.624, "step": 718 }, { "epoch": 0.4986130374479889, "grad_norm": 0.36357628229093936, "learning_rate": 9.97226074895978e-06, "loss": 0.6236, "step": 719 }, { "epoch": 0.49930651872399445, "grad_norm": 0.3518223270174587, "learning_rate": 9.98613037447989e-06, "loss": 0.6528, "step": 720 }, { "epoch": 0.5, "grad_norm": 0.30952998333257387, "learning_rate": 1e-05, "loss": 0.54, "step": 721 }, { "epoch": 0.5006934812760055, "grad_norm": 0.41501604220152166, "learning_rate": 9.999999414018107e-06, "loss": 0.5692, "step": 722 }, { "epoch": 0.5013869625520111, "grad_norm": 0.32514562243010514, "learning_rate": 9.999997656072562e-06, "loss": 0.5872, "step": 723 }, { "epoch": 0.5020804438280166, "grad_norm": 0.3470627802984313, "learning_rate": 9.999994726163778e-06, "loss": 0.6359, "step": 724 }, { "epoch": 0.5027739251040222, "grad_norm": 0.3745973618825542, "learning_rate": 9.999990624292442e-06, "loss": 0.647, "step": 725 }, { "epoch": 0.5034674063800277, "grad_norm": 0.34450891072001916, "learning_rate": 9.999985350459514e-06, "loss": 0.6046, "step": 726 }, { "epoch": 0.5041608876560333, "grad_norm": 0.33360430874820624, "learning_rate": 9.999978904666233e-06, "loss": 0.5942, "step": 727 }, { "epoch": 0.5048543689320388, "grad_norm": 0.33409116101469366, "learning_rate": 9.999971286914108e-06, "loss": 0.5498, "step": 728 }, { "epoch": 0.5055478502080444, "grad_norm": 0.33690900277471875, "learning_rate": 9.999962497204925e-06, "loss": 0.6787, "step": 729 }, { "epoch": 0.5062413314840499, "grad_norm": 0.3299105111409337, "learning_rate": 9.999952535540743e-06, "loss": 0.5169, "step": 730 }, { "epoch": 0.5069348127600555, "grad_norm": 0.35733261526077775, "learning_rate": 9.999941401923899e-06, "loss": 0.5994, "step": 731 }, { "epoch": 0.507628294036061, "grad_norm": 0.37167083547332896, "learning_rate": 9.999929096357001e-06, "loss": 0.5934, "step": 732 }, { "epoch": 0.5083217753120666, "grad_norm": 0.3410947793093965, "learning_rate": 9.999915618842935e-06, "loss": 0.6686, "step": 733 }, { "epoch": 0.5090152565880721, "grad_norm": 0.35813602667713473, "learning_rate": 9.99990096938486e-06, "loss": 0.5926, "step": 734 }, { "epoch": 0.5097087378640777, "grad_norm": 0.34850411155772887, "learning_rate": 9.999885147986207e-06, "loss": 0.6172, "step": 735 }, { "epoch": 0.5104022191400832, "grad_norm": 0.3997462176758978, "learning_rate": 9.999868154650686e-06, "loss": 0.6658, "step": 736 }, { "epoch": 0.5110957004160888, "grad_norm": 0.3581097909925207, "learning_rate": 9.99984998938228e-06, "loss": 0.5888, "step": 737 }, { "epoch": 0.5117891816920943, "grad_norm": 0.328277957660063, "learning_rate": 9.999830652185248e-06, "loss": 0.5651, "step": 738 }, { "epoch": 0.5124826629680999, "grad_norm": 0.46818075793680175, "learning_rate": 9.999810143064122e-06, "loss": 0.6999, "step": 739 }, { "epoch": 0.5131761442441054, "grad_norm": 0.3522552661114412, "learning_rate": 9.999788462023707e-06, "loss": 0.6181, "step": 740 }, { "epoch": 0.513869625520111, "grad_norm": 0.33841693670191736, "learning_rate": 9.99976560906909e-06, "loss": 0.5506, "step": 741 }, { "epoch": 0.5145631067961165, "grad_norm": 0.32448542256857915, "learning_rate": 9.999741584205621e-06, "loss": 0.5528, "step": 742 }, { "epoch": 0.515256588072122, "grad_norm": 0.4015374961363589, "learning_rate": 9.999716387438935e-06, "loss": 0.6397, "step": 743 }, { "epoch": 0.5159500693481276, "grad_norm": 0.35333372732488916, "learning_rate": 9.999690018774939e-06, "loss": 0.6324, "step": 744 }, { "epoch": 0.5166435506241331, "grad_norm": 0.3118523478259235, "learning_rate": 9.99966247821981e-06, "loss": 0.6094, "step": 745 }, { "epoch": 0.5173370319001387, "grad_norm": 0.3623486446023633, "learning_rate": 9.999633765780008e-06, "loss": 0.5759, "step": 746 }, { "epoch": 0.5180305131761442, "grad_norm": 0.35080946017865067, "learning_rate": 9.999603881462258e-06, "loss": 0.6108, "step": 747 }, { "epoch": 0.5187239944521498, "grad_norm": 0.34531983652521164, "learning_rate": 9.999572825273569e-06, "loss": 0.6456, "step": 748 }, { "epoch": 0.5194174757281553, "grad_norm": 0.3568512706854189, "learning_rate": 9.999540597221217e-06, "loss": 0.591, "step": 749 }, { "epoch": 0.5201109570041609, "grad_norm": 0.3872941250866843, "learning_rate": 9.999507197312756e-06, "loss": 0.6462, "step": 750 }, { "epoch": 0.5208044382801664, "grad_norm": 0.36623504971829873, "learning_rate": 9.999472625556019e-06, "loss": 0.6365, "step": 751 }, { "epoch": 0.521497919556172, "grad_norm": 0.3594091145576425, "learning_rate": 9.999436881959105e-06, "loss": 0.5919, "step": 752 }, { "epoch": 0.5221914008321775, "grad_norm": 0.3270076408023314, "learning_rate": 9.999399966530394e-06, "loss": 0.56, "step": 753 }, { "epoch": 0.5228848821081831, "grad_norm": 0.34133621043017787, "learning_rate": 9.999361879278537e-06, "loss": 0.5661, "step": 754 }, { "epoch": 0.5235783633841886, "grad_norm": 0.37079359026140135, "learning_rate": 9.999322620212463e-06, "loss": 0.5884, "step": 755 }, { "epoch": 0.5242718446601942, "grad_norm": 0.3413876553175361, "learning_rate": 9.999282189341374e-06, "loss": 0.6294, "step": 756 }, { "epoch": 0.5249653259361997, "grad_norm": 0.32037776440650106, "learning_rate": 9.999240586674749e-06, "loss": 0.5893, "step": 757 }, { "epoch": 0.5256588072122053, "grad_norm": 0.372701778382164, "learning_rate": 9.999197812222332e-06, "loss": 0.6726, "step": 758 }, { "epoch": 0.5263522884882108, "grad_norm": 0.34301272895701906, "learning_rate": 9.999153865994156e-06, "loss": 0.5923, "step": 759 }, { "epoch": 0.5270457697642164, "grad_norm": 0.36779244991125787, "learning_rate": 9.999108748000519e-06, "loss": 0.6126, "step": 760 }, { "epoch": 0.5277392510402219, "grad_norm": 0.3594133655125482, "learning_rate": 9.999062458251999e-06, "loss": 0.5625, "step": 761 }, { "epoch": 0.5284327323162274, "grad_norm": 0.3760775731146585, "learning_rate": 9.99901499675944e-06, "loss": 0.5998, "step": 762 }, { "epoch": 0.529126213592233, "grad_norm": 0.3508720821753457, "learning_rate": 9.998966363533972e-06, "loss": 0.6119, "step": 763 }, { "epoch": 0.5298196948682385, "grad_norm": 0.3121906419870047, "learning_rate": 9.998916558586992e-06, "loss": 0.5977, "step": 764 }, { "epoch": 0.5305131761442441, "grad_norm": 0.31578410096983867, "learning_rate": 9.998865581930176e-06, "loss": 0.5755, "step": 765 }, { "epoch": 0.5312066574202496, "grad_norm": 0.34650022010727266, "learning_rate": 9.99881343357547e-06, "loss": 0.6215, "step": 766 }, { "epoch": 0.5319001386962552, "grad_norm": 0.3871096605321635, "learning_rate": 9.998760113535097e-06, "loss": 0.6004, "step": 767 }, { "epoch": 0.5325936199722607, "grad_norm": 0.35318927332741834, "learning_rate": 9.998705621821559e-06, "loss": 0.6363, "step": 768 }, { "epoch": 0.5332871012482663, "grad_norm": 0.32787154942007024, "learning_rate": 9.998649958447624e-06, "loss": 0.6026, "step": 769 }, { "epoch": 0.5339805825242718, "grad_norm": 0.31997094021759737, "learning_rate": 9.99859312342634e-06, "loss": 0.5623, "step": 770 }, { "epoch": 0.5346740638002774, "grad_norm": 0.34604820086773813, "learning_rate": 9.99853511677103e-06, "loss": 0.6085, "step": 771 }, { "epoch": 0.5353675450762829, "grad_norm": 0.32553598125538497, "learning_rate": 9.99847593849529e-06, "loss": 0.5923, "step": 772 }, { "epoch": 0.5360610263522885, "grad_norm": 0.40036028972077475, "learning_rate": 9.99841558861299e-06, "loss": 0.567, "step": 773 }, { "epoch": 0.536754507628294, "grad_norm": 0.33160374278228255, "learning_rate": 9.998354067138276e-06, "loss": 0.6293, "step": 774 }, { "epoch": 0.5374479889042996, "grad_norm": 0.36155830359485086, "learning_rate": 9.99829137408557e-06, "loss": 0.5709, "step": 775 }, { "epoch": 0.5381414701803051, "grad_norm": 0.3915838316467222, "learning_rate": 9.998227509469565e-06, "loss": 0.5708, "step": 776 }, { "epoch": 0.5388349514563107, "grad_norm": 0.3425169869927215, "learning_rate": 9.998162473305229e-06, "loss": 0.606, "step": 777 }, { "epoch": 0.5395284327323162, "grad_norm": 0.35487253854695056, "learning_rate": 9.99809626560781e-06, "loss": 0.6234, "step": 778 }, { "epoch": 0.5402219140083218, "grad_norm": 0.3916036489836279, "learning_rate": 9.998028886392821e-06, "loss": 0.6292, "step": 779 }, { "epoch": 0.5409153952843273, "grad_norm": 0.35658352394717213, "learning_rate": 9.997960335676062e-06, "loss": 0.5864, "step": 780 }, { "epoch": 0.5416088765603329, "grad_norm": 0.33151812149335985, "learning_rate": 9.997890613473596e-06, "loss": 0.5677, "step": 781 }, { "epoch": 0.5423023578363384, "grad_norm": 0.33911034555027275, "learning_rate": 9.997819719801766e-06, "loss": 0.6162, "step": 782 }, { "epoch": 0.542995839112344, "grad_norm": 0.5511777461489347, "learning_rate": 9.99774765467719e-06, "loss": 0.6809, "step": 783 }, { "epoch": 0.5436893203883495, "grad_norm": 0.309334809552995, "learning_rate": 9.997674418116759e-06, "loss": 0.517, "step": 784 }, { "epoch": 0.544382801664355, "grad_norm": 0.3434360109327229, "learning_rate": 9.997600010137638e-06, "loss": 0.5611, "step": 785 }, { "epoch": 0.5450762829403606, "grad_norm": 0.3665740879948818, "learning_rate": 9.99752443075727e-06, "loss": 0.5807, "step": 786 }, { "epoch": 0.5457697642163661, "grad_norm": 0.3651661888224723, "learning_rate": 9.99744767999337e-06, "loss": 0.5799, "step": 787 }, { "epoch": 0.5464632454923717, "grad_norm": 0.3480518332197476, "learning_rate": 9.997369757863926e-06, "loss": 0.6049, "step": 788 }, { "epoch": 0.5471567267683772, "grad_norm": 0.353988081230821, "learning_rate": 9.997290664387205e-06, "loss": 0.657, "step": 789 }, { "epoch": 0.5478502080443828, "grad_norm": 0.39297584753933945, "learning_rate": 9.997210399581742e-06, "loss": 0.6452, "step": 790 }, { "epoch": 0.5485436893203883, "grad_norm": 0.3933270411166608, "learning_rate": 9.997128963466355e-06, "loss": 0.6874, "step": 791 }, { "epoch": 0.5492371705963939, "grad_norm": 0.3843851610970679, "learning_rate": 9.99704635606013e-06, "loss": 0.6197, "step": 792 }, { "epoch": 0.5499306518723994, "grad_norm": 0.3563726960771155, "learning_rate": 9.996962577382428e-06, "loss": 0.6123, "step": 793 }, { "epoch": 0.550624133148405, "grad_norm": 0.36131278518348864, "learning_rate": 9.996877627452888e-06, "loss": 0.5633, "step": 794 }, { "epoch": 0.5513176144244105, "grad_norm": 0.38944813314700005, "learning_rate": 9.99679150629142e-06, "loss": 0.6196, "step": 795 }, { "epoch": 0.5520110957004161, "grad_norm": 0.39273645679432406, "learning_rate": 9.996704213918213e-06, "loss": 0.593, "step": 796 }, { "epoch": 0.5527045769764216, "grad_norm": 0.4213437548828512, "learning_rate": 9.996615750353726e-06, "loss": 0.5541, "step": 797 }, { "epoch": 0.5533980582524272, "grad_norm": 0.3376625421997148, "learning_rate": 9.996526115618694e-06, "loss": 0.5734, "step": 798 }, { "epoch": 0.5540915395284327, "grad_norm": 0.35609183352280943, "learning_rate": 9.996435309734127e-06, "loss": 0.6038, "step": 799 }, { "epoch": 0.5547850208044383, "grad_norm": 0.34194422202558405, "learning_rate": 9.996343332721308e-06, "loss": 0.6384, "step": 800 }, { "epoch": 0.5554785020804438, "grad_norm": 0.45980807415441105, "learning_rate": 9.9962501846018e-06, "loss": 0.6478, "step": 801 }, { "epoch": 0.5561719833564494, "grad_norm": 0.356131120895226, "learning_rate": 9.99615586539743e-06, "loss": 0.5836, "step": 802 }, { "epoch": 0.5568654646324549, "grad_norm": 0.3094389747187915, "learning_rate": 9.99606037513031e-06, "loss": 0.5792, "step": 803 }, { "epoch": 0.5575589459084604, "grad_norm": 0.35424906911747067, "learning_rate": 9.995963713822823e-06, "loss": 0.5552, "step": 804 }, { "epoch": 0.558252427184466, "grad_norm": 0.37134850928462093, "learning_rate": 9.995865881497621e-06, "loss": 0.6395, "step": 805 }, { "epoch": 0.5589459084604715, "grad_norm": 0.3396083482831853, "learning_rate": 9.995766878177641e-06, "loss": 0.6559, "step": 806 }, { "epoch": 0.5596393897364771, "grad_norm": 0.349721990866471, "learning_rate": 9.995666703886084e-06, "loss": 0.5718, "step": 807 }, { "epoch": 0.5603328710124826, "grad_norm": 0.35772687569475387, "learning_rate": 9.995565358646432e-06, "loss": 0.5923, "step": 808 }, { "epoch": 0.5610263522884882, "grad_norm": 0.3824059071857025, "learning_rate": 9.995462842482441e-06, "loss": 0.5519, "step": 809 }, { "epoch": 0.5617198335644937, "grad_norm": 0.3284979543478124, "learning_rate": 9.995359155418139e-06, "loss": 0.5794, "step": 810 }, { "epoch": 0.5624133148404993, "grad_norm": 0.34818162862155966, "learning_rate": 9.995254297477827e-06, "loss": 0.5904, "step": 811 }, { "epoch": 0.5631067961165048, "grad_norm": 0.378188504715066, "learning_rate": 9.995148268686086e-06, "loss": 0.5819, "step": 812 }, { "epoch": 0.5638002773925104, "grad_norm": 0.46087643944959833, "learning_rate": 9.995041069067767e-06, "loss": 0.5456, "step": 813 }, { "epoch": 0.5644937586685159, "grad_norm": 0.3563574311634951, "learning_rate": 9.994932698647997e-06, "loss": 0.5933, "step": 814 }, { "epoch": 0.5651872399445215, "grad_norm": 0.3641945948169763, "learning_rate": 9.994823157452179e-06, "loss": 0.5946, "step": 815 }, { "epoch": 0.565880721220527, "grad_norm": 0.552909363074712, "learning_rate": 9.994712445505985e-06, "loss": 0.6261, "step": 816 }, { "epoch": 0.5665742024965326, "grad_norm": 0.6516575100223244, "learning_rate": 9.994600562835368e-06, "loss": 0.6152, "step": 817 }, { "epoch": 0.5672676837725381, "grad_norm": 0.3454900158635478, "learning_rate": 9.99448750946655e-06, "loss": 0.6046, "step": 818 }, { "epoch": 0.5679611650485437, "grad_norm": 0.35071766339724286, "learning_rate": 9.994373285426034e-06, "loss": 0.6314, "step": 819 }, { "epoch": 0.5686546463245492, "grad_norm": 0.43416563446791145, "learning_rate": 9.99425789074059e-06, "loss": 0.5519, "step": 820 }, { "epoch": 0.5693481276005548, "grad_norm": 0.375679142764231, "learning_rate": 9.994141325437269e-06, "loss": 0.6496, "step": 821 }, { "epoch": 0.5700416088765603, "grad_norm": 0.37883621605024626, "learning_rate": 9.994023589543387e-06, "loss": 0.5996, "step": 822 }, { "epoch": 0.5707350901525658, "grad_norm": 0.3147504750589824, "learning_rate": 9.993904683086544e-06, "loss": 0.5832, "step": 823 }, { "epoch": 0.5714285714285714, "grad_norm": 0.38615339122548875, "learning_rate": 9.993784606094612e-06, "loss": 0.6072, "step": 824 }, { "epoch": 0.5721220527045769, "grad_norm": 0.34055537994616036, "learning_rate": 9.993663358595736e-06, "loss": 0.5793, "step": 825 }, { "epoch": 0.5728155339805825, "grad_norm": 0.3479708413936868, "learning_rate": 9.993540940618334e-06, "loss": 0.5993, "step": 826 }, { "epoch": 0.573509015256588, "grad_norm": 0.3430681485698919, "learning_rate": 9.9934173521911e-06, "loss": 0.5673, "step": 827 }, { "epoch": 0.5742024965325936, "grad_norm": 0.40031566863708545, "learning_rate": 9.993292593343003e-06, "loss": 0.7237, "step": 828 }, { "epoch": 0.5748959778085991, "grad_norm": 0.3324760719689896, "learning_rate": 9.993166664103283e-06, "loss": 0.608, "step": 829 }, { "epoch": 0.5755894590846047, "grad_norm": 0.35885140959685163, "learning_rate": 9.993039564501463e-06, "loss": 0.6594, "step": 830 }, { "epoch": 0.5762829403606102, "grad_norm": 0.399186315746986, "learning_rate": 9.992911294567328e-06, "loss": 0.5741, "step": 831 }, { "epoch": 0.5769764216366158, "grad_norm": 0.35090334605835627, "learning_rate": 9.992781854330946e-06, "loss": 0.6422, "step": 832 }, { "epoch": 0.5776699029126213, "grad_norm": 0.31889923872767023, "learning_rate": 9.992651243822658e-06, "loss": 0.5733, "step": 833 }, { "epoch": 0.5783633841886269, "grad_norm": 0.3670958465002502, "learning_rate": 9.992519463073077e-06, "loss": 0.5851, "step": 834 }, { "epoch": 0.5790568654646324, "grad_norm": 0.3338916412965299, "learning_rate": 9.992386512113089e-06, "loss": 0.5857, "step": 835 }, { "epoch": 0.579750346740638, "grad_norm": 0.3583288330461265, "learning_rate": 9.99225239097386e-06, "loss": 0.6352, "step": 836 }, { "epoch": 0.5804438280166435, "grad_norm": 0.3378780438558911, "learning_rate": 9.992117099686828e-06, "loss": 0.5691, "step": 837 }, { "epoch": 0.5811373092926491, "grad_norm": 0.3256947131008986, "learning_rate": 9.9919806382837e-06, "loss": 0.608, "step": 838 }, { "epoch": 0.5818307905686546, "grad_norm": 0.3332284083468561, "learning_rate": 9.991843006796466e-06, "loss": 0.5455, "step": 839 }, { "epoch": 0.5825242718446602, "grad_norm": 0.34167001170988187, "learning_rate": 9.991704205257383e-06, "loss": 0.5673, "step": 840 }, { "epoch": 0.5832177531206657, "grad_norm": 0.30566608826041025, "learning_rate": 9.991564233698986e-06, "loss": 0.5875, "step": 841 }, { "epoch": 0.5839112343966713, "grad_norm": 0.329418583332585, "learning_rate": 9.991423092154083e-06, "loss": 0.5327, "step": 842 }, { "epoch": 0.5846047156726768, "grad_norm": 0.30507560845133674, "learning_rate": 9.991280780655757e-06, "loss": 0.5691, "step": 843 }, { "epoch": 0.5852981969486823, "grad_norm": 0.3617280149148349, "learning_rate": 9.991137299237366e-06, "loss": 0.6003, "step": 844 }, { "epoch": 0.5859916782246879, "grad_norm": 0.3671582873180128, "learning_rate": 9.990992647932537e-06, "loss": 0.5819, "step": 845 }, { "epoch": 0.5866851595006934, "grad_norm": 0.34613317062835486, "learning_rate": 9.990846826775179e-06, "loss": 0.5913, "step": 846 }, { "epoch": 0.587378640776699, "grad_norm": 0.36465833562542455, "learning_rate": 9.99069983579947e-06, "loss": 0.5707, "step": 847 }, { "epoch": 0.5880721220527045, "grad_norm": 0.380488480865335, "learning_rate": 9.990551675039863e-06, "loss": 0.6105, "step": 848 }, { "epoch": 0.5887656033287101, "grad_norm": 0.3669644067681118, "learning_rate": 9.990402344531089e-06, "loss": 0.6314, "step": 849 }, { "epoch": 0.5894590846047156, "grad_norm": 0.3634871716474428, "learning_rate": 9.990251844308145e-06, "loss": 0.5301, "step": 850 }, { "epoch": 0.5901525658807212, "grad_norm": 0.38484347585083906, "learning_rate": 9.990100174406313e-06, "loss": 0.6124, "step": 851 }, { "epoch": 0.5908460471567267, "grad_norm": 0.6158101712237795, "learning_rate": 9.989947334861136e-06, "loss": 0.6281, "step": 852 }, { "epoch": 0.5915395284327323, "grad_norm": 0.33260041145785296, "learning_rate": 9.989793325708446e-06, "loss": 0.5399, "step": 853 }, { "epoch": 0.5922330097087378, "grad_norm": 0.29013409357770487, "learning_rate": 9.989638146984337e-06, "loss": 0.507, "step": 854 }, { "epoch": 0.5929264909847434, "grad_norm": 0.33762953372103877, "learning_rate": 9.989481798725182e-06, "loss": 0.6313, "step": 855 }, { "epoch": 0.5936199722607489, "grad_norm": 0.3222219974052387, "learning_rate": 9.98932428096763e-06, "loss": 0.537, "step": 856 }, { "epoch": 0.5943134535367545, "grad_norm": 0.35485383095369083, "learning_rate": 9.989165593748602e-06, "loss": 0.54, "step": 857 }, { "epoch": 0.59500693481276, "grad_norm": 0.3414735268209781, "learning_rate": 9.98900573710529e-06, "loss": 0.5761, "step": 858 }, { "epoch": 0.5957004160887656, "grad_norm": 0.3407146971721609, "learning_rate": 9.988844711075166e-06, "loss": 0.6076, "step": 859 }, { "epoch": 0.5963938973647711, "grad_norm": 0.42069437792125053, "learning_rate": 9.988682515695973e-06, "loss": 0.6642, "step": 860 }, { "epoch": 0.5970873786407767, "grad_norm": 0.3347845063179168, "learning_rate": 9.988519151005728e-06, "loss": 0.6106, "step": 861 }, { "epoch": 0.5977808599167822, "grad_norm": 0.3579141738253978, "learning_rate": 9.988354617042723e-06, "loss": 0.5761, "step": 862 }, { "epoch": 0.5984743411927878, "grad_norm": 0.3460153846615427, "learning_rate": 9.988188913845523e-06, "loss": 0.5772, "step": 863 }, { "epoch": 0.5991678224687933, "grad_norm": 0.3222264974453905, "learning_rate": 9.988022041452968e-06, "loss": 0.598, "step": 864 }, { "epoch": 0.5998613037447988, "grad_norm": 0.41823344334298795, "learning_rate": 9.987853999904169e-06, "loss": 0.6456, "step": 865 }, { "epoch": 0.6005547850208044, "grad_norm": 0.3750284085110268, "learning_rate": 9.98768478923852e-06, "loss": 0.6575, "step": 866 }, { "epoch": 0.6012482662968099, "grad_norm": 0.31564041331946846, "learning_rate": 9.987514409495675e-06, "loss": 0.5677, "step": 867 }, { "epoch": 0.6019417475728155, "grad_norm": 0.37227726250887766, "learning_rate": 9.987342860715575e-06, "loss": 0.6333, "step": 868 }, { "epoch": 0.602635228848821, "grad_norm": 0.4952523103716889, "learning_rate": 9.987170142938429e-06, "loss": 0.6417, "step": 869 }, { "epoch": 0.6033287101248266, "grad_norm": 0.37715662413807755, "learning_rate": 9.98699625620472e-06, "loss": 0.5638, "step": 870 }, { "epoch": 0.6040221914008321, "grad_norm": 0.33870170954321344, "learning_rate": 9.986821200555206e-06, "loss": 0.5748, "step": 871 }, { "epoch": 0.6047156726768377, "grad_norm": 0.447394695448431, "learning_rate": 9.98664497603092e-06, "loss": 0.5667, "step": 872 }, { "epoch": 0.6054091539528432, "grad_norm": 0.31993512669500684, "learning_rate": 9.986467582673166e-06, "loss": 0.6104, "step": 873 }, { "epoch": 0.6061026352288488, "grad_norm": 0.34121670021324146, "learning_rate": 9.986289020523525e-06, "loss": 0.6298, "step": 874 }, { "epoch": 0.6067961165048543, "grad_norm": 0.3352939710398173, "learning_rate": 9.986109289623848e-06, "loss": 0.5684, "step": 875 }, { "epoch": 0.6074895977808599, "grad_norm": 0.32570600275910855, "learning_rate": 9.985928390016267e-06, "loss": 0.6172, "step": 876 }, { "epoch": 0.6081830790568654, "grad_norm": 0.3132585321745443, "learning_rate": 9.985746321743179e-06, "loss": 0.5781, "step": 877 }, { "epoch": 0.608876560332871, "grad_norm": 0.3334371299742966, "learning_rate": 9.985563084847263e-06, "loss": 0.5267, "step": 878 }, { "epoch": 0.6095700416088765, "grad_norm": 0.37735940500200976, "learning_rate": 9.985378679371465e-06, "loss": 0.585, "step": 879 }, { "epoch": 0.6102635228848821, "grad_norm": 0.3307596514634534, "learning_rate": 9.985193105359013e-06, "loss": 0.612, "step": 880 }, { "epoch": 0.6109570041608876, "grad_norm": 0.3661289923865791, "learning_rate": 9.9850063628534e-06, "loss": 0.6442, "step": 881 }, { "epoch": 0.6116504854368932, "grad_norm": 0.42036366886329213, "learning_rate": 9.984818451898399e-06, "loss": 0.6403, "step": 882 }, { "epoch": 0.6123439667128987, "grad_norm": 0.3686093860629796, "learning_rate": 9.984629372538055e-06, "loss": 0.5862, "step": 883 }, { "epoch": 0.6130374479889042, "grad_norm": 0.33930001299808243, "learning_rate": 9.984439124816687e-06, "loss": 0.5689, "step": 884 }, { "epoch": 0.6137309292649098, "grad_norm": 0.3243308830978303, "learning_rate": 9.984247708778887e-06, "loss": 0.559, "step": 885 }, { "epoch": 0.6144244105409153, "grad_norm": 0.32935726192149983, "learning_rate": 9.98405512446952e-06, "loss": 0.5722, "step": 886 }, { "epoch": 0.6151178918169209, "grad_norm": 0.34674025215944076, "learning_rate": 9.98386137193373e-06, "loss": 0.5535, "step": 887 }, { "epoch": 0.6158113730929264, "grad_norm": 0.31388258320678464, "learning_rate": 9.983666451216927e-06, "loss": 0.5374, "step": 888 }, { "epoch": 0.616504854368932, "grad_norm": 0.33331461141805196, "learning_rate": 9.983470362364803e-06, "loss": 0.5992, "step": 889 }, { "epoch": 0.6171983356449375, "grad_norm": 0.3377370185432441, "learning_rate": 9.983273105423317e-06, "loss": 0.5967, "step": 890 }, { "epoch": 0.6178918169209431, "grad_norm": 0.33479539583423634, "learning_rate": 9.983074680438707e-06, "loss": 0.5522, "step": 891 }, { "epoch": 0.6185852981969486, "grad_norm": 0.3126860064909479, "learning_rate": 9.98287508745748e-06, "loss": 0.5624, "step": 892 }, { "epoch": 0.6192787794729542, "grad_norm": 0.3946057135855722, "learning_rate": 9.98267432652642e-06, "loss": 0.6693, "step": 893 }, { "epoch": 0.6199722607489597, "grad_norm": 0.32483191440023923, "learning_rate": 9.982472397692585e-06, "loss": 0.6319, "step": 894 }, { "epoch": 0.6206657420249653, "grad_norm": 0.33232060016498727, "learning_rate": 9.982269301003305e-06, "loss": 0.5309, "step": 895 }, { "epoch": 0.6213592233009708, "grad_norm": 0.3147782800255537, "learning_rate": 9.982065036506183e-06, "loss": 0.5672, "step": 896 }, { "epoch": 0.6220527045769764, "grad_norm": 0.33633547096338834, "learning_rate": 9.981859604249098e-06, "loss": 0.5898, "step": 897 }, { "epoch": 0.6227461858529819, "grad_norm": 0.3918801038501328, "learning_rate": 9.981653004280203e-06, "loss": 0.5986, "step": 898 }, { "epoch": 0.6234396671289875, "grad_norm": 0.38464214514703654, "learning_rate": 9.981445236647923e-06, "loss": 0.5878, "step": 899 }, { "epoch": 0.624133148404993, "grad_norm": 0.30436655993558426, "learning_rate": 9.981236301400955e-06, "loss": 0.5737, "step": 900 }, { "epoch": 0.6248266296809986, "grad_norm": 0.3189560160759198, "learning_rate": 9.981026198588276e-06, "loss": 0.6132, "step": 901 }, { "epoch": 0.6255201109570042, "grad_norm": 0.3550589009893856, "learning_rate": 9.980814928259129e-06, "loss": 0.6255, "step": 902 }, { "epoch": 0.6262135922330098, "grad_norm": 0.4997772712303221, "learning_rate": 9.980602490463037e-06, "loss": 0.5564, "step": 903 }, { "epoch": 0.6269070735090153, "grad_norm": 0.34593571571772425, "learning_rate": 9.98038888524979e-06, "loss": 0.5459, "step": 904 }, { "epoch": 0.6276005547850209, "grad_norm": 0.35195237449428995, "learning_rate": 9.98017411266946e-06, "loss": 0.5999, "step": 905 }, { "epoch": 0.6282940360610264, "grad_norm": 0.3494946695679501, "learning_rate": 9.979958172772386e-06, "loss": 0.6094, "step": 906 }, { "epoch": 0.628987517337032, "grad_norm": 0.32944296945437185, "learning_rate": 9.979741065609182e-06, "loss": 0.6239, "step": 907 }, { "epoch": 0.6296809986130375, "grad_norm": 0.343188892969881, "learning_rate": 9.979522791230739e-06, "loss": 0.5749, "step": 908 }, { "epoch": 0.630374479889043, "grad_norm": 0.3431693932435067, "learning_rate": 9.979303349688214e-06, "loss": 0.6418, "step": 909 }, { "epoch": 0.6310679611650486, "grad_norm": 0.6260916997900723, "learning_rate": 9.979082741033047e-06, "loss": 0.6348, "step": 910 }, { "epoch": 0.6317614424410541, "grad_norm": 0.3459150533249548, "learning_rate": 9.978860965316945e-06, "loss": 0.5591, "step": 911 }, { "epoch": 0.6324549237170597, "grad_norm": 0.33333027810277965, "learning_rate": 9.978638022591894e-06, "loss": 0.5787, "step": 912 }, { "epoch": 0.6331484049930652, "grad_norm": 0.32706243660827083, "learning_rate": 9.978413912910145e-06, "loss": 0.598, "step": 913 }, { "epoch": 0.6338418862690708, "grad_norm": 0.3190908490026014, "learning_rate": 9.978188636324231e-06, "loss": 0.5783, "step": 914 }, { "epoch": 0.6345353675450763, "grad_norm": 0.3449967775841397, "learning_rate": 9.977962192886954e-06, "loss": 0.5929, "step": 915 }, { "epoch": 0.6352288488210819, "grad_norm": 0.3760267285165008, "learning_rate": 9.977734582651393e-06, "loss": 0.6492, "step": 916 }, { "epoch": 0.6359223300970874, "grad_norm": 0.33146825505524, "learning_rate": 9.977505805670895e-06, "loss": 0.6276, "step": 917 }, { "epoch": 0.636615811373093, "grad_norm": 0.3225866570301169, "learning_rate": 9.977275861999084e-06, "loss": 0.524, "step": 918 }, { "epoch": 0.6373092926490985, "grad_norm": 0.3413315589556036, "learning_rate": 9.977044751689858e-06, "loss": 0.6302, "step": 919 }, { "epoch": 0.6380027739251041, "grad_norm": 0.316767161688686, "learning_rate": 9.976812474797388e-06, "loss": 0.5139, "step": 920 }, { "epoch": 0.6386962552011096, "grad_norm": 0.336423549371417, "learning_rate": 9.97657903137612e-06, "loss": 0.603, "step": 921 }, { "epoch": 0.6393897364771152, "grad_norm": 0.3590292032224001, "learning_rate": 9.976344421480766e-06, "loss": 0.6633, "step": 922 }, { "epoch": 0.6400832177531207, "grad_norm": 0.3409483030153749, "learning_rate": 9.976108645166322e-06, "loss": 0.6064, "step": 923 }, { "epoch": 0.6407766990291263, "grad_norm": 0.3424195023715099, "learning_rate": 9.97587170248805e-06, "loss": 0.5987, "step": 924 }, { "epoch": 0.6414701803051318, "grad_norm": 0.33700917472951303, "learning_rate": 9.975633593501485e-06, "loss": 0.6875, "step": 925 }, { "epoch": 0.6421636615811374, "grad_norm": 0.32836587584052596, "learning_rate": 9.975394318262443e-06, "loss": 0.6266, "step": 926 }, { "epoch": 0.6428571428571429, "grad_norm": 0.33101502685906026, "learning_rate": 9.975153876827008e-06, "loss": 0.6597, "step": 927 }, { "epoch": 0.6435506241331485, "grad_norm": 0.33255835528358707, "learning_rate": 9.974912269251534e-06, "loss": 0.536, "step": 928 }, { "epoch": 0.644244105409154, "grad_norm": 0.3603313792246961, "learning_rate": 9.974669495592655e-06, "loss": 0.594, "step": 929 }, { "epoch": 0.6449375866851595, "grad_norm": 0.4947418665609005, "learning_rate": 9.974425555907275e-06, "loss": 0.5974, "step": 930 }, { "epoch": 0.6456310679611651, "grad_norm": 0.3081307295348536, "learning_rate": 9.97418045025257e-06, "loss": 0.5338, "step": 931 }, { "epoch": 0.6463245492371706, "grad_norm": 0.34525440992902945, "learning_rate": 9.973934178685992e-06, "loss": 0.5841, "step": 932 }, { "epoch": 0.6470180305131762, "grad_norm": 0.3685839827243698, "learning_rate": 9.973686741265265e-06, "loss": 0.5864, "step": 933 }, { "epoch": 0.6477115117891817, "grad_norm": 0.35534408188736244, "learning_rate": 9.973438138048389e-06, "loss": 0.5422, "step": 934 }, { "epoch": 0.6484049930651873, "grad_norm": 0.3906368162822919, "learning_rate": 9.973188369093631e-06, "loss": 0.5768, "step": 935 }, { "epoch": 0.6490984743411928, "grad_norm": 0.36012023338893784, "learning_rate": 9.972937434459538e-06, "loss": 0.545, "step": 936 }, { "epoch": 0.6497919556171984, "grad_norm": 0.4037319051285458, "learning_rate": 9.972685334204926e-06, "loss": 0.6142, "step": 937 }, { "epoch": 0.6504854368932039, "grad_norm": 0.3562140193168272, "learning_rate": 9.972432068388885e-06, "loss": 0.6215, "step": 938 }, { "epoch": 0.6511789181692095, "grad_norm": 0.3334502163629595, "learning_rate": 9.972177637070779e-06, "loss": 0.624, "step": 939 }, { "epoch": 0.651872399445215, "grad_norm": 0.3391506641330937, "learning_rate": 9.971922040310244e-06, "loss": 0.5857, "step": 940 }, { "epoch": 0.6525658807212206, "grad_norm": 0.31304795665423457, "learning_rate": 9.971665278167193e-06, "loss": 0.5541, "step": 941 }, { "epoch": 0.6532593619972261, "grad_norm": 0.3470678124095459, "learning_rate": 9.971407350701808e-06, "loss": 0.6141, "step": 942 }, { "epoch": 0.6539528432732317, "grad_norm": 0.3640580816601225, "learning_rate": 9.971148257974543e-06, "loss": 0.5779, "step": 943 }, { "epoch": 0.6546463245492372, "grad_norm": 0.36381434024896536, "learning_rate": 9.97088800004613e-06, "loss": 0.549, "step": 944 }, { "epoch": 0.6553398058252428, "grad_norm": 0.40933415568980586, "learning_rate": 9.97062657697757e-06, "loss": 0.5656, "step": 945 }, { "epoch": 0.6560332871012483, "grad_norm": 0.3588854690529746, "learning_rate": 9.970363988830138e-06, "loss": 0.597, "step": 946 }, { "epoch": 0.6567267683772539, "grad_norm": 0.36464656084080477, "learning_rate": 9.970100235665386e-06, "loss": 0.5743, "step": 947 }, { "epoch": 0.6574202496532594, "grad_norm": 0.32076356849363635, "learning_rate": 9.969835317545133e-06, "loss": 0.613, "step": 948 }, { "epoch": 0.658113730929265, "grad_norm": 0.35441860473056175, "learning_rate": 9.969569234531475e-06, "loss": 0.6019, "step": 949 }, { "epoch": 0.6588072122052705, "grad_norm": 0.3494071986318162, "learning_rate": 9.969301986686782e-06, "loss": 0.5707, "step": 950 }, { "epoch": 0.659500693481276, "grad_norm": 0.34668012733349474, "learning_rate": 9.969033574073689e-06, "loss": 0.5333, "step": 951 }, { "epoch": 0.6601941747572816, "grad_norm": 0.3338094515703168, "learning_rate": 9.968763996755115e-06, "loss": 0.5743, "step": 952 }, { "epoch": 0.6608876560332871, "grad_norm": 0.33161620384603135, "learning_rate": 9.968493254794247e-06, "loss": 0.5493, "step": 953 }, { "epoch": 0.6615811373092927, "grad_norm": 0.34568897389513337, "learning_rate": 9.968221348254543e-06, "loss": 0.601, "step": 954 }, { "epoch": 0.6622746185852982, "grad_norm": 0.34684248566121006, "learning_rate": 9.967948277199736e-06, "loss": 0.5742, "step": 955 }, { "epoch": 0.6629680998613038, "grad_norm": 0.3425555990494531, "learning_rate": 9.967674041693831e-06, "loss": 0.6075, "step": 956 }, { "epoch": 0.6636615811373093, "grad_norm": 0.3336947049702203, "learning_rate": 9.967398641801111e-06, "loss": 0.5625, "step": 957 }, { "epoch": 0.6643550624133149, "grad_norm": 0.3729854187748714, "learning_rate": 9.967122077586124e-06, "loss": 0.5725, "step": 958 }, { "epoch": 0.6650485436893204, "grad_norm": 0.3357350735840092, "learning_rate": 9.966844349113695e-06, "loss": 0.5786, "step": 959 }, { "epoch": 0.665742024965326, "grad_norm": 0.33270854302770486, "learning_rate": 9.966565456448923e-06, "loss": 0.5685, "step": 960 }, { "epoch": 0.6664355062413315, "grad_norm": 0.31271771303028706, "learning_rate": 9.966285399657175e-06, "loss": 0.5746, "step": 961 }, { "epoch": 0.6671289875173371, "grad_norm": 0.3367176285583568, "learning_rate": 9.9660041788041e-06, "loss": 0.5687, "step": 962 }, { "epoch": 0.6678224687933426, "grad_norm": 0.31678615811257615, "learning_rate": 9.965721793955609e-06, "loss": 0.5883, "step": 963 }, { "epoch": 0.6685159500693482, "grad_norm": 0.34291902832443727, "learning_rate": 9.965438245177895e-06, "loss": 0.6012, "step": 964 }, { "epoch": 0.6692094313453537, "grad_norm": 0.3809367221460459, "learning_rate": 9.965153532537416e-06, "loss": 0.6233, "step": 965 }, { "epoch": 0.6699029126213593, "grad_norm": 0.3547760519179594, "learning_rate": 9.96486765610091e-06, "loss": 0.5778, "step": 966 }, { "epoch": 0.6705963938973648, "grad_norm": 0.36668523892323834, "learning_rate": 9.96458061593538e-06, "loss": 0.5746, "step": 967 }, { "epoch": 0.6712898751733704, "grad_norm": 0.328016965053142, "learning_rate": 9.964292412108109e-06, "loss": 0.5915, "step": 968 }, { "epoch": 0.6719833564493759, "grad_norm": 0.34407606317022954, "learning_rate": 9.964003044686653e-06, "loss": 0.5501, "step": 969 }, { "epoch": 0.6726768377253814, "grad_norm": 0.35674597332836255, "learning_rate": 9.963712513738832e-06, "loss": 0.5614, "step": 970 }, { "epoch": 0.673370319001387, "grad_norm": 0.341599707279666, "learning_rate": 9.963420819332747e-06, "loss": 0.5648, "step": 971 }, { "epoch": 0.6740638002773925, "grad_norm": 0.32935977963907553, "learning_rate": 9.963127961536769e-06, "loss": 0.527, "step": 972 }, { "epoch": 0.6747572815533981, "grad_norm": 0.4838100570879444, "learning_rate": 9.96283394041954e-06, "loss": 0.6332, "step": 973 }, { "epoch": 0.6754507628294036, "grad_norm": 0.32862581119192874, "learning_rate": 9.96253875604998e-06, "loss": 0.5536, "step": 974 }, { "epoch": 0.6761442441054092, "grad_norm": 0.3912141034292813, "learning_rate": 9.962242408497274e-06, "loss": 0.5508, "step": 975 }, { "epoch": 0.6768377253814147, "grad_norm": 0.33381614494541395, "learning_rate": 9.961944897830886e-06, "loss": 0.5854, "step": 976 }, { "epoch": 0.6775312066574203, "grad_norm": 0.36732923327552414, "learning_rate": 9.961646224120551e-06, "loss": 0.6146, "step": 977 }, { "epoch": 0.6782246879334258, "grad_norm": 0.33845756731512416, "learning_rate": 9.961346387436275e-06, "loss": 0.6295, "step": 978 }, { "epoch": 0.6789181692094314, "grad_norm": 0.34261448107799614, "learning_rate": 9.961045387848338e-06, "loss": 0.5808, "step": 979 }, { "epoch": 0.6796116504854369, "grad_norm": 0.3187785442319545, "learning_rate": 9.96074322542729e-06, "loss": 0.5365, "step": 980 }, { "epoch": 0.6803051317614425, "grad_norm": 0.32141773690416076, "learning_rate": 9.960439900243959e-06, "loss": 0.5913, "step": 981 }, { "epoch": 0.680998613037448, "grad_norm": 0.3447109074751309, "learning_rate": 9.960135412369441e-06, "loss": 0.5398, "step": 982 }, { "epoch": 0.6816920943134536, "grad_norm": 0.3505630074917437, "learning_rate": 9.959829761875104e-06, "loss": 0.6408, "step": 983 }, { "epoch": 0.6823855755894591, "grad_norm": 0.3002610446520616, "learning_rate": 9.959522948832591e-06, "loss": 0.564, "step": 984 }, { "epoch": 0.6830790568654647, "grad_norm": 0.3442848128163253, "learning_rate": 9.959214973313818e-06, "loss": 0.505, "step": 985 }, { "epoch": 0.6837725381414702, "grad_norm": 0.3261013165861402, "learning_rate": 9.958905835390972e-06, "loss": 0.6332, "step": 986 }, { "epoch": 0.6844660194174758, "grad_norm": 0.47876384543350736, "learning_rate": 9.958595535136511e-06, "loss": 0.5859, "step": 987 }, { "epoch": 0.6851595006934813, "grad_norm": 0.34263392662042647, "learning_rate": 9.95828407262317e-06, "loss": 0.6131, "step": 988 }, { "epoch": 0.6858529819694869, "grad_norm": 0.2971681507927114, "learning_rate": 9.95797144792395e-06, "loss": 0.5417, "step": 989 }, { "epoch": 0.6865464632454924, "grad_norm": 0.36103153363073187, "learning_rate": 9.957657661112133e-06, "loss": 0.6176, "step": 990 }, { "epoch": 0.687239944521498, "grad_norm": 0.35769346631114884, "learning_rate": 9.957342712261263e-06, "loss": 0.6555, "step": 991 }, { "epoch": 0.6879334257975035, "grad_norm": 0.40510050967209915, "learning_rate": 9.957026601445166e-06, "loss": 0.63, "step": 992 }, { "epoch": 0.688626907073509, "grad_norm": 0.32464645725429453, "learning_rate": 9.95670932873793e-06, "loss": 0.5737, "step": 993 }, { "epoch": 0.6893203883495146, "grad_norm": 0.3400254370527942, "learning_rate": 9.95639089421393e-06, "loss": 0.5676, "step": 994 }, { "epoch": 0.6900138696255201, "grad_norm": 0.4165921946357827, "learning_rate": 9.956071297947798e-06, "loss": 0.5453, "step": 995 }, { "epoch": 0.6907073509015257, "grad_norm": 0.37321196629047915, "learning_rate": 9.955750540014448e-06, "loss": 0.5338, "step": 996 }, { "epoch": 0.6914008321775312, "grad_norm": 0.33921327437833365, "learning_rate": 9.955428620489062e-06, "loss": 0.5379, "step": 997 }, { "epoch": 0.6920943134535368, "grad_norm": 0.35440041687724594, "learning_rate": 9.955105539447096e-06, "loss": 0.6165, "step": 998 }, { "epoch": 0.6927877947295423, "grad_norm": 0.3285590688117563, "learning_rate": 9.954781296964279e-06, "loss": 0.6196, "step": 999 }, { "epoch": 0.6934812760055479, "grad_norm": 0.371636361595271, "learning_rate": 9.95445589311661e-06, "loss": 0.6161, "step": 1000 }, { "epoch": 0.6941747572815534, "grad_norm": 0.3326524858879347, "learning_rate": 9.954129327980362e-06, "loss": 0.5515, "step": 1001 }, { "epoch": 0.694868238557559, "grad_norm": 0.36226624672883995, "learning_rate": 9.953801601632079e-06, "loss": 0.5183, "step": 1002 }, { "epoch": 0.6955617198335645, "grad_norm": 0.32761164833965783, "learning_rate": 9.953472714148576e-06, "loss": 0.5383, "step": 1003 }, { "epoch": 0.6962552011095701, "grad_norm": 0.3464952058046187, "learning_rate": 9.953142665606945e-06, "loss": 0.629, "step": 1004 }, { "epoch": 0.6969486823855756, "grad_norm": 0.3074158681596855, "learning_rate": 9.952811456084546e-06, "loss": 0.5031, "step": 1005 }, { "epoch": 0.6976421636615812, "grad_norm": 0.338496489557684, "learning_rate": 9.95247908565901e-06, "loss": 0.6012, "step": 1006 }, { "epoch": 0.6983356449375867, "grad_norm": 0.38832683416829106, "learning_rate": 9.952145554408245e-06, "loss": 0.594, "step": 1007 }, { "epoch": 0.6990291262135923, "grad_norm": 0.3231626607418699, "learning_rate": 9.951810862410426e-06, "loss": 0.5732, "step": 1008 }, { "epoch": 0.6997226074895978, "grad_norm": 0.3526993233542111, "learning_rate": 9.951475009744006e-06, "loss": 0.6082, "step": 1009 }, { "epoch": 0.7004160887656034, "grad_norm": 0.3329768741425063, "learning_rate": 9.951137996487703e-06, "loss": 0.5693, "step": 1010 }, { "epoch": 0.7011095700416089, "grad_norm": 0.33207539616272197, "learning_rate": 9.95079982272051e-06, "loss": 0.5939, "step": 1011 }, { "epoch": 0.7018030513176144, "grad_norm": 0.3510554259780628, "learning_rate": 9.950460488521695e-06, "loss": 0.5877, "step": 1012 }, { "epoch": 0.70249653259362, "grad_norm": 0.3461173179627518, "learning_rate": 9.950119993970794e-06, "loss": 0.5952, "step": 1013 }, { "epoch": 0.7031900138696255, "grad_norm": 0.3487381221556316, "learning_rate": 9.949778339147617e-06, "loss": 0.5232, "step": 1014 }, { "epoch": 0.7038834951456311, "grad_norm": 0.3271125852802924, "learning_rate": 9.949435524132245e-06, "loss": 0.5609, "step": 1015 }, { "epoch": 0.7045769764216366, "grad_norm": 0.3665837812231856, "learning_rate": 9.949091549005033e-06, "loss": 0.5713, "step": 1016 }, { "epoch": 0.7052704576976422, "grad_norm": 0.34228569768066913, "learning_rate": 9.948746413846604e-06, "loss": 0.6004, "step": 1017 }, { "epoch": 0.7059639389736477, "grad_norm": 0.3356409857094149, "learning_rate": 9.948400118737856e-06, "loss": 0.6336, "step": 1018 }, { "epoch": 0.7066574202496533, "grad_norm": 0.342166029096126, "learning_rate": 9.948052663759957e-06, "loss": 0.5944, "step": 1019 }, { "epoch": 0.7073509015256588, "grad_norm": 0.36601108993914533, "learning_rate": 9.947704048994351e-06, "loss": 0.5713, "step": 1020 }, { "epoch": 0.7080443828016644, "grad_norm": 0.33386006159619147, "learning_rate": 9.947354274522748e-06, "loss": 0.5633, "step": 1021 }, { "epoch": 0.7087378640776699, "grad_norm": 0.30572169905213814, "learning_rate": 9.947003340427134e-06, "loss": 0.5985, "step": 1022 }, { "epoch": 0.7094313453536755, "grad_norm": 0.31640757934649083, "learning_rate": 9.946651246789765e-06, "loss": 0.564, "step": 1023 }, { "epoch": 0.710124826629681, "grad_norm": 0.3670258161330911, "learning_rate": 9.946297993693168e-06, "loss": 0.5787, "step": 1024 }, { "epoch": 0.7108183079056866, "grad_norm": 0.34322370570001765, "learning_rate": 9.945943581220144e-06, "loss": 0.5503, "step": 1025 }, { "epoch": 0.7115117891816921, "grad_norm": 0.340602804346554, "learning_rate": 9.945588009453766e-06, "loss": 0.5732, "step": 1026 }, { "epoch": 0.7122052704576977, "grad_norm": 0.38698430691899954, "learning_rate": 9.945231278477374e-06, "loss": 0.52, "step": 1027 }, { "epoch": 0.7128987517337032, "grad_norm": 0.34545318871864483, "learning_rate": 9.94487338837459e-06, "loss": 0.6157, "step": 1028 }, { "epoch": 0.7135922330097088, "grad_norm": 0.32974241410967664, "learning_rate": 9.944514339229292e-06, "loss": 0.5297, "step": 1029 }, { "epoch": 0.7142857142857143, "grad_norm": 0.3237198660503851, "learning_rate": 9.944154131125643e-06, "loss": 0.5427, "step": 1030 }, { "epoch": 0.7149791955617198, "grad_norm": 0.32577249272541753, "learning_rate": 9.943792764148074e-06, "loss": 0.5882, "step": 1031 }, { "epoch": 0.7156726768377254, "grad_norm": 0.35641603035666863, "learning_rate": 9.943430238381286e-06, "loss": 0.5201, "step": 1032 }, { "epoch": 0.7163661581137309, "grad_norm": 0.3388199217233206, "learning_rate": 9.943066553910252e-06, "loss": 0.5842, "step": 1033 }, { "epoch": 0.7170596393897365, "grad_norm": 0.35853035041766645, "learning_rate": 9.942701710820217e-06, "loss": 0.5873, "step": 1034 }, { "epoch": 0.717753120665742, "grad_norm": 0.3812354567225615, "learning_rate": 9.942335709196697e-06, "loss": 0.6516, "step": 1035 }, { "epoch": 0.7184466019417476, "grad_norm": 0.3330409448434271, "learning_rate": 9.941968549125481e-06, "loss": 0.5635, "step": 1036 }, { "epoch": 0.7191400832177531, "grad_norm": 0.3448194454185852, "learning_rate": 9.94160023069263e-06, "loss": 0.5404, "step": 1037 }, { "epoch": 0.7198335644937587, "grad_norm": 0.39385124207811567, "learning_rate": 9.941230753984472e-06, "loss": 0.6243, "step": 1038 }, { "epoch": 0.7205270457697642, "grad_norm": 0.3487501152480796, "learning_rate": 9.940860119087612e-06, "loss": 0.5353, "step": 1039 }, { "epoch": 0.7212205270457698, "grad_norm": 0.3643088946026779, "learning_rate": 9.940488326088924e-06, "loss": 0.6251, "step": 1040 }, { "epoch": 0.7219140083217753, "grad_norm": 0.3432021499024788, "learning_rate": 9.940115375075551e-06, "loss": 0.6495, "step": 1041 }, { "epoch": 0.7226074895977809, "grad_norm": 0.3367973243594567, "learning_rate": 9.939741266134914e-06, "loss": 0.6211, "step": 1042 }, { "epoch": 0.7233009708737864, "grad_norm": 0.34437975080665395, "learning_rate": 9.9393659993547e-06, "loss": 0.6302, "step": 1043 }, { "epoch": 0.723994452149792, "grad_norm": 0.39039845823464975, "learning_rate": 9.938989574822866e-06, "loss": 0.579, "step": 1044 }, { "epoch": 0.7246879334257975, "grad_norm": 0.32845623476097796, "learning_rate": 9.938611992627647e-06, "loss": 0.527, "step": 1045 }, { "epoch": 0.7253814147018031, "grad_norm": 0.3181699129320863, "learning_rate": 9.938233252857544e-06, "loss": 0.5715, "step": 1046 }, { "epoch": 0.7260748959778086, "grad_norm": 0.41892463315658773, "learning_rate": 9.937853355601331e-06, "loss": 0.5289, "step": 1047 }, { "epoch": 0.7267683772538142, "grad_norm": 0.33898184108626256, "learning_rate": 9.937472300948053e-06, "loss": 0.6499, "step": 1048 }, { "epoch": 0.7274618585298197, "grad_norm": 0.36282529694296206, "learning_rate": 9.937090088987028e-06, "loss": 0.6138, "step": 1049 }, { "epoch": 0.7281553398058253, "grad_norm": 0.3418792229555562, "learning_rate": 9.936706719807839e-06, "loss": 0.5443, "step": 1050 }, { "epoch": 0.7288488210818308, "grad_norm": 0.3505297479017804, "learning_rate": 9.936322193500349e-06, "loss": 0.5182, "step": 1051 }, { "epoch": 0.7295423023578363, "grad_norm": 0.3519624416218966, "learning_rate": 9.935936510154689e-06, "loss": 0.5676, "step": 1052 }, { "epoch": 0.7302357836338419, "grad_norm": 0.3113359479294091, "learning_rate": 9.935549669861257e-06, "loss": 0.5858, "step": 1053 }, { "epoch": 0.7309292649098474, "grad_norm": 0.33252266872190694, "learning_rate": 9.935161672710728e-06, "loss": 0.5842, "step": 1054 }, { "epoch": 0.731622746185853, "grad_norm": 0.34930045036354523, "learning_rate": 9.934772518794047e-06, "loss": 0.5553, "step": 1055 }, { "epoch": 0.7323162274618585, "grad_norm": 0.3606440647866921, "learning_rate": 9.934382208202425e-06, "loss": 0.5665, "step": 1056 }, { "epoch": 0.7330097087378641, "grad_norm": 0.35467096078452276, "learning_rate": 9.93399074102735e-06, "loss": 0.565, "step": 1057 }, { "epoch": 0.7337031900138696, "grad_norm": 0.3322547061127572, "learning_rate": 9.93359811736058e-06, "loss": 0.5752, "step": 1058 }, { "epoch": 0.7343966712898752, "grad_norm": 0.2992989740354364, "learning_rate": 9.93320433729414e-06, "loss": 0.5149, "step": 1059 }, { "epoch": 0.7350901525658807, "grad_norm": 0.33417326760115984, "learning_rate": 9.932809400920332e-06, "loss": 0.5534, "step": 1060 }, { "epoch": 0.7357836338418863, "grad_norm": 0.37007578925035495, "learning_rate": 9.932413308331725e-06, "loss": 0.5881, "step": 1061 }, { "epoch": 0.7364771151178918, "grad_norm": 0.34269131669156866, "learning_rate": 9.932016059621161e-06, "loss": 0.5604, "step": 1062 }, { "epoch": 0.7371705963938974, "grad_norm": 0.33546161759477605, "learning_rate": 9.931617654881753e-06, "loss": 0.5589, "step": 1063 }, { "epoch": 0.7378640776699029, "grad_norm": 0.3847215713142921, "learning_rate": 9.931218094206882e-06, "loss": 0.6045, "step": 1064 }, { "epoch": 0.7385575589459085, "grad_norm": 0.3367235220263564, "learning_rate": 9.930817377690205e-06, "loss": 0.5897, "step": 1065 }, { "epoch": 0.739251040221914, "grad_norm": 0.3406355566353876, "learning_rate": 9.930415505425644e-06, "loss": 0.5224, "step": 1066 }, { "epoch": 0.7399445214979196, "grad_norm": 0.32686734158684394, "learning_rate": 9.930012477507397e-06, "loss": 0.6715, "step": 1067 }, { "epoch": 0.7406380027739251, "grad_norm": 0.41130446696006584, "learning_rate": 9.92960829402993e-06, "loss": 0.6865, "step": 1068 }, { "epoch": 0.7413314840499307, "grad_norm": 0.34924981598093613, "learning_rate": 9.92920295508798e-06, "loss": 0.5805, "step": 1069 }, { "epoch": 0.7420249653259362, "grad_norm": 0.3312931346094807, "learning_rate": 9.928796460776558e-06, "loss": 0.567, "step": 1070 }, { "epoch": 0.7427184466019418, "grad_norm": 0.37359412047424895, "learning_rate": 9.928388811190938e-06, "loss": 0.5479, "step": 1071 }, { "epoch": 0.7434119278779473, "grad_norm": 0.361243689595125, "learning_rate": 9.927980006426677e-06, "loss": 0.6182, "step": 1072 }, { "epoch": 0.7441054091539528, "grad_norm": 0.3694364466931621, "learning_rate": 9.927570046579591e-06, "loss": 0.6626, "step": 1073 }, { "epoch": 0.7447988904299584, "grad_norm": 0.3485816922611258, "learning_rate": 9.927158931745775e-06, "loss": 0.6727, "step": 1074 }, { "epoch": 0.7454923717059639, "grad_norm": 0.34157275189351793, "learning_rate": 9.926746662021589e-06, "loss": 0.589, "step": 1075 }, { "epoch": 0.7461858529819695, "grad_norm": 0.38194584535428683, "learning_rate": 9.926333237503665e-06, "loss": 0.6034, "step": 1076 }, { "epoch": 0.746879334257975, "grad_norm": 0.35593483853898966, "learning_rate": 9.92591865828891e-06, "loss": 0.644, "step": 1077 }, { "epoch": 0.7475728155339806, "grad_norm": 0.3626096624134651, "learning_rate": 9.925502924474495e-06, "loss": 0.5897, "step": 1078 }, { "epoch": 0.7482662968099861, "grad_norm": 0.3720710472905889, "learning_rate": 9.925086036157869e-06, "loss": 0.6038, "step": 1079 }, { "epoch": 0.7489597780859917, "grad_norm": 0.33045752040041915, "learning_rate": 9.924667993436742e-06, "loss": 0.6144, "step": 1080 }, { "epoch": 0.7496532593619972, "grad_norm": 0.3330811925678463, "learning_rate": 9.924248796409107e-06, "loss": 0.5171, "step": 1081 }, { "epoch": 0.7503467406380028, "grad_norm": 0.356313244135962, "learning_rate": 9.923828445173215e-06, "loss": 0.6731, "step": 1082 }, { "epoch": 0.7510402219140083, "grad_norm": 0.34526196714865764, "learning_rate": 9.923406939827596e-06, "loss": 0.6021, "step": 1083 }, { "epoch": 0.7517337031900139, "grad_norm": 0.3186655600344347, "learning_rate": 9.922984280471048e-06, "loss": 0.5589, "step": 1084 }, { "epoch": 0.7524271844660194, "grad_norm": 0.3365046214183426, "learning_rate": 9.922560467202638e-06, "loss": 0.5753, "step": 1085 }, { "epoch": 0.753120665742025, "grad_norm": 0.3807776922687547, "learning_rate": 9.922135500121705e-06, "loss": 0.5785, "step": 1086 }, { "epoch": 0.7538141470180305, "grad_norm": 0.3310493131741956, "learning_rate": 9.921709379327859e-06, "loss": 0.5577, "step": 1087 }, { "epoch": 0.7545076282940361, "grad_norm": 0.4136156302344913, "learning_rate": 9.92128210492098e-06, "loss": 0.6314, "step": 1088 }, { "epoch": 0.7552011095700416, "grad_norm": 0.37631472966269475, "learning_rate": 9.920853677001215e-06, "loss": 0.6565, "step": 1089 }, { "epoch": 0.7558945908460472, "grad_norm": 0.6445027382242253, "learning_rate": 9.920424095668988e-06, "loss": 0.6184, "step": 1090 }, { "epoch": 0.7565880721220527, "grad_norm": 0.350976316901039, "learning_rate": 9.919993361024989e-06, "loss": 0.5619, "step": 1091 }, { "epoch": 0.7572815533980582, "grad_norm": 0.31328935691242954, "learning_rate": 9.919561473170178e-06, "loss": 0.5855, "step": 1092 }, { "epoch": 0.7579750346740638, "grad_norm": 0.3221338932383943, "learning_rate": 9.919128432205786e-06, "loss": 0.5937, "step": 1093 }, { "epoch": 0.7586685159500693, "grad_norm": 0.33196246372235766, "learning_rate": 9.918694238233314e-06, "loss": 0.6027, "step": 1094 }, { "epoch": 0.7593619972260749, "grad_norm": 0.3942666403658931, "learning_rate": 9.91825889135454e-06, "loss": 0.6452, "step": 1095 }, { "epoch": 0.7600554785020804, "grad_norm": 0.3675180175724699, "learning_rate": 9.9178223916715e-06, "loss": 0.5824, "step": 1096 }, { "epoch": 0.760748959778086, "grad_norm": 0.3032022133924184, "learning_rate": 9.917384739286505e-06, "loss": 0.5887, "step": 1097 }, { "epoch": 0.7614424410540915, "grad_norm": 0.33094285704358667, "learning_rate": 9.916945934302142e-06, "loss": 0.5337, "step": 1098 }, { "epoch": 0.7621359223300971, "grad_norm": 0.3250037259073846, "learning_rate": 9.916505976821262e-06, "loss": 0.5563, "step": 1099 }, { "epoch": 0.7628294036061026, "grad_norm": 0.3698602118076207, "learning_rate": 9.91606486694699e-06, "loss": 0.5598, "step": 1100 }, { "epoch": 0.7635228848821082, "grad_norm": 0.35241780322083244, "learning_rate": 9.915622604782716e-06, "loss": 0.5633, "step": 1101 }, { "epoch": 0.7642163661581137, "grad_norm": 0.3404720849529966, "learning_rate": 9.915179190432102e-06, "loss": 0.5658, "step": 1102 }, { "epoch": 0.7649098474341193, "grad_norm": 0.34218980207078303, "learning_rate": 9.914734623999086e-06, "loss": 0.5564, "step": 1103 }, { "epoch": 0.7656033287101248, "grad_norm": 0.34919546385510847, "learning_rate": 9.914288905587867e-06, "loss": 0.4655, "step": 1104 }, { "epoch": 0.7662968099861304, "grad_norm": 0.36238942723055884, "learning_rate": 9.91384203530292e-06, "loss": 0.6234, "step": 1105 }, { "epoch": 0.7669902912621359, "grad_norm": 0.3483550241642675, "learning_rate": 9.913394013248987e-06, "loss": 0.6375, "step": 1106 }, { "epoch": 0.7676837725381415, "grad_norm": 0.31474225103999814, "learning_rate": 9.912944839531083e-06, "loss": 0.5282, "step": 1107 }, { "epoch": 0.768377253814147, "grad_norm": 0.3200443820947016, "learning_rate": 9.912494514254487e-06, "loss": 0.6036, "step": 1108 }, { "epoch": 0.7690707350901526, "grad_norm": 0.3229008835087235, "learning_rate": 9.912043037524758e-06, "loss": 0.5547, "step": 1109 }, { "epoch": 0.7697642163661581, "grad_norm": 0.3165451146540614, "learning_rate": 9.911590409447713e-06, "loss": 0.5463, "step": 1110 }, { "epoch": 0.7704576976421637, "grad_norm": 0.33748946916856115, "learning_rate": 9.91113663012945e-06, "loss": 0.5923, "step": 1111 }, { "epoch": 0.7711511789181692, "grad_norm": 0.32709168605333655, "learning_rate": 9.910681699676327e-06, "loss": 0.5438, "step": 1112 }, { "epoch": 0.7718446601941747, "grad_norm": 0.33964568054254574, "learning_rate": 9.91022561819498e-06, "loss": 0.5095, "step": 1113 }, { "epoch": 0.7725381414701803, "grad_norm": 0.3448286501208917, "learning_rate": 9.909768385792308e-06, "loss": 0.5605, "step": 1114 }, { "epoch": 0.7732316227461858, "grad_norm": 0.3286078533295107, "learning_rate": 9.909310002575486e-06, "loss": 0.5318, "step": 1115 }, { "epoch": 0.7739251040221914, "grad_norm": 0.3366454563267662, "learning_rate": 9.908850468651953e-06, "loss": 0.58, "step": 1116 }, { "epoch": 0.7746185852981969, "grad_norm": 0.46236589261179867, "learning_rate": 9.908389784129424e-06, "loss": 0.5807, "step": 1117 }, { "epoch": 0.7753120665742025, "grad_norm": 0.3524473088468238, "learning_rate": 9.907927949115877e-06, "loss": 0.5745, "step": 1118 }, { "epoch": 0.776005547850208, "grad_norm": 0.3157956125887099, "learning_rate": 9.907464963719562e-06, "loss": 0.5167, "step": 1119 }, { "epoch": 0.7766990291262136, "grad_norm": 0.33920527726146843, "learning_rate": 9.907000828049001e-06, "loss": 0.5183, "step": 1120 }, { "epoch": 0.7773925104022191, "grad_norm": 0.3571934309252906, "learning_rate": 9.906535542212984e-06, "loss": 0.5849, "step": 1121 }, { "epoch": 0.7780859916782247, "grad_norm": 0.34341155887390523, "learning_rate": 9.906069106320573e-06, "loss": 0.6189, "step": 1122 }, { "epoch": 0.7787794729542302, "grad_norm": 0.36803861258608084, "learning_rate": 9.905601520481094e-06, "loss": 0.5567, "step": 1123 }, { "epoch": 0.7794729542302358, "grad_norm": 0.33411631641875295, "learning_rate": 9.905132784804146e-06, "loss": 0.5717, "step": 1124 }, { "epoch": 0.7801664355062413, "grad_norm": 0.3749463095695367, "learning_rate": 9.904662899399598e-06, "loss": 0.5764, "step": 1125 }, { "epoch": 0.7808599167822469, "grad_norm": 0.3411261494102941, "learning_rate": 9.904191864377588e-06, "loss": 0.5028, "step": 1126 }, { "epoch": 0.7815533980582524, "grad_norm": 0.313940560560632, "learning_rate": 9.903719679848522e-06, "loss": 0.5555, "step": 1127 }, { "epoch": 0.782246879334258, "grad_norm": 0.6566302794869116, "learning_rate": 9.903246345923078e-06, "loss": 0.5844, "step": 1128 }, { "epoch": 0.7829403606102635, "grad_norm": 0.3347037281216503, "learning_rate": 9.902771862712201e-06, "loss": 0.5831, "step": 1129 }, { "epoch": 0.7836338418862691, "grad_norm": 0.41623786574982496, "learning_rate": 9.902296230327109e-06, "loss": 0.6246, "step": 1130 }, { "epoch": 0.7843273231622746, "grad_norm": 0.3297323734484108, "learning_rate": 9.901819448879284e-06, "loss": 0.5644, "step": 1131 }, { "epoch": 0.7850208044382802, "grad_norm": 0.30955004662696883, "learning_rate": 9.901341518480478e-06, "loss": 0.5616, "step": 1132 }, { "epoch": 0.7857142857142857, "grad_norm": 0.3235277563056244, "learning_rate": 9.900862439242719e-06, "loss": 0.6166, "step": 1133 }, { "epoch": 0.7864077669902912, "grad_norm": 0.31748454405852916, "learning_rate": 9.9003822112783e-06, "loss": 0.5235, "step": 1134 }, { "epoch": 0.7871012482662968, "grad_norm": 0.33023689686714086, "learning_rate": 9.899900834699778e-06, "loss": 0.5849, "step": 1135 }, { "epoch": 0.7877947295423023, "grad_norm": 0.3495578780899382, "learning_rate": 9.899418309619988e-06, "loss": 0.6115, "step": 1136 }, { "epoch": 0.7884882108183079, "grad_norm": 0.32497385026555636, "learning_rate": 9.89893463615203e-06, "loss": 0.5123, "step": 1137 }, { "epoch": 0.7891816920943134, "grad_norm": 0.31823531157818197, "learning_rate": 9.898449814409272e-06, "loss": 0.5472, "step": 1138 }, { "epoch": 0.789875173370319, "grad_norm": 0.3871723185910273, "learning_rate": 9.897963844505355e-06, "loss": 0.6342, "step": 1139 }, { "epoch": 0.7905686546463245, "grad_norm": 0.32748688538451093, "learning_rate": 9.897476726554185e-06, "loss": 0.6139, "step": 1140 }, { "epoch": 0.7912621359223301, "grad_norm": 0.35033793604159597, "learning_rate": 9.89698846066994e-06, "loss": 0.6325, "step": 1141 }, { "epoch": 0.7919556171983356, "grad_norm": 0.3487389698279109, "learning_rate": 9.896499046967065e-06, "loss": 0.6317, "step": 1142 }, { "epoch": 0.7926490984743412, "grad_norm": 0.3531649178211908, "learning_rate": 9.896008485560275e-06, "loss": 0.5557, "step": 1143 }, { "epoch": 0.7933425797503467, "grad_norm": 1.0305191170454497, "learning_rate": 9.895516776564555e-06, "loss": 0.5577, "step": 1144 }, { "epoch": 0.7940360610263523, "grad_norm": 0.32998484447400867, "learning_rate": 9.895023920095157e-06, "loss": 0.5486, "step": 1145 }, { "epoch": 0.7947295423023578, "grad_norm": 0.3619027933913946, "learning_rate": 9.894529916267605e-06, "loss": 0.5674, "step": 1146 }, { "epoch": 0.7954230235783634, "grad_norm": 0.3511914223909972, "learning_rate": 9.894034765197688e-06, "loss": 0.6188, "step": 1147 }, { "epoch": 0.7961165048543689, "grad_norm": 0.3526373782570898, "learning_rate": 9.893538467001466e-06, "loss": 0.6216, "step": 1148 }, { "epoch": 0.7968099861303745, "grad_norm": 0.3573519780274059, "learning_rate": 9.893041021795266e-06, "loss": 0.5697, "step": 1149 }, { "epoch": 0.79750346740638, "grad_norm": 0.3369604374861474, "learning_rate": 9.892542429695691e-06, "loss": 0.5902, "step": 1150 }, { "epoch": 0.7981969486823856, "grad_norm": 0.3706997741580729, "learning_rate": 9.892042690819602e-06, "loss": 0.5412, "step": 1151 }, { "epoch": 0.7988904299583911, "grad_norm": 0.305468979726182, "learning_rate": 9.891541805284137e-06, "loss": 0.4956, "step": 1152 }, { "epoch": 0.7995839112343966, "grad_norm": 0.32809836169628454, "learning_rate": 9.891039773206698e-06, "loss": 0.5511, "step": 1153 }, { "epoch": 0.8002773925104022, "grad_norm": 0.3405183403533073, "learning_rate": 9.890536594704961e-06, "loss": 0.536, "step": 1154 }, { "epoch": 0.8009708737864077, "grad_norm": 0.32633246107874186, "learning_rate": 9.890032269896862e-06, "loss": 0.5373, "step": 1155 }, { "epoch": 0.8016643550624133, "grad_norm": 0.34040614751990844, "learning_rate": 9.889526798900615e-06, "loss": 0.5423, "step": 1156 }, { "epoch": 0.8023578363384188, "grad_norm": 0.33871526192231677, "learning_rate": 9.889020181834698e-06, "loss": 0.6379, "step": 1157 }, { "epoch": 0.8030513176144244, "grad_norm": 0.34929270754374225, "learning_rate": 9.888512418817861e-06, "loss": 0.5281, "step": 1158 }, { "epoch": 0.8037447988904299, "grad_norm": 0.3740202942568168, "learning_rate": 9.888003509969116e-06, "loss": 0.5771, "step": 1159 }, { "epoch": 0.8044382801664355, "grad_norm": 0.4620528126667381, "learning_rate": 9.887493455407746e-06, "loss": 0.587, "step": 1160 }, { "epoch": 0.805131761442441, "grad_norm": 0.2986631872001357, "learning_rate": 9.88698225525331e-06, "loss": 0.5042, "step": 1161 }, { "epoch": 0.8058252427184466, "grad_norm": 0.32733982878285756, "learning_rate": 9.886469909625624e-06, "loss": 0.5278, "step": 1162 }, { "epoch": 0.8065187239944521, "grad_norm": 0.3208486870317828, "learning_rate": 9.885956418644783e-06, "loss": 0.5399, "step": 1163 }, { "epoch": 0.8072122052704577, "grad_norm": 0.35713225551264444, "learning_rate": 9.885441782431143e-06, "loss": 0.6282, "step": 1164 }, { "epoch": 0.8079056865464632, "grad_norm": 0.3534688004612578, "learning_rate": 9.884926001105331e-06, "loss": 0.61, "step": 1165 }, { "epoch": 0.8085991678224688, "grad_norm": 0.3573246841498351, "learning_rate": 9.884409074788242e-06, "loss": 0.5765, "step": 1166 }, { "epoch": 0.8092926490984743, "grad_norm": 0.3542712347958411, "learning_rate": 9.883891003601041e-06, "loss": 0.5989, "step": 1167 }, { "epoch": 0.8099861303744799, "grad_norm": 0.3472953991002415, "learning_rate": 9.883371787665158e-06, "loss": 0.6198, "step": 1168 }, { "epoch": 0.8106796116504854, "grad_norm": 0.32420034756459143, "learning_rate": 9.882851427102299e-06, "loss": 0.5682, "step": 1169 }, { "epoch": 0.811373092926491, "grad_norm": 0.3307306065177549, "learning_rate": 9.882329922034424e-06, "loss": 0.5373, "step": 1170 }, { "epoch": 0.8120665742024965, "grad_norm": 0.3118736702940442, "learning_rate": 9.881807272583776e-06, "loss": 0.5125, "step": 1171 }, { "epoch": 0.812760055478502, "grad_norm": 0.3459557102540757, "learning_rate": 9.88128347887286e-06, "loss": 0.644, "step": 1172 }, { "epoch": 0.8134535367545076, "grad_norm": 0.34330909907101614, "learning_rate": 9.880758541024449e-06, "loss": 0.5741, "step": 1173 }, { "epoch": 0.8141470180305131, "grad_norm": 0.3261296168661891, "learning_rate": 9.880232459161583e-06, "loss": 0.5514, "step": 1174 }, { "epoch": 0.8148404993065187, "grad_norm": 0.32032462633009595, "learning_rate": 9.879705233407576e-06, "loss": 0.5293, "step": 1175 }, { "epoch": 0.8155339805825242, "grad_norm": 0.3779710108286491, "learning_rate": 9.879176863885997e-06, "loss": 0.5532, "step": 1176 }, { "epoch": 0.8162274618585298, "grad_norm": 0.3467233546272369, "learning_rate": 9.878647350720703e-06, "loss": 0.5986, "step": 1177 }, { "epoch": 0.8169209431345353, "grad_norm": 0.3561514472980493, "learning_rate": 9.8781166940358e-06, "loss": 0.6511, "step": 1178 }, { "epoch": 0.8176144244105409, "grad_norm": 0.36134302910341937, "learning_rate": 9.877584893955674e-06, "loss": 0.5211, "step": 1179 }, { "epoch": 0.8183079056865464, "grad_norm": 0.3158580471200914, "learning_rate": 9.877051950604972e-06, "loss": 0.5711, "step": 1180 }, { "epoch": 0.819001386962552, "grad_norm": 0.3613864439699286, "learning_rate": 9.876517864108617e-06, "loss": 0.5703, "step": 1181 }, { "epoch": 0.8196948682385575, "grad_norm": 0.3075357148573775, "learning_rate": 9.87598263459179e-06, "loss": 0.51, "step": 1182 }, { "epoch": 0.8203883495145631, "grad_norm": 0.3188142632127063, "learning_rate": 9.875446262179948e-06, "loss": 0.5379, "step": 1183 }, { "epoch": 0.8210818307905686, "grad_norm": 0.3767995481374369, "learning_rate": 9.874908746998811e-06, "loss": 0.5949, "step": 1184 }, { "epoch": 0.8217753120665742, "grad_norm": 0.32095775879686966, "learning_rate": 9.87437008917437e-06, "loss": 0.5404, "step": 1185 }, { "epoch": 0.8224687933425797, "grad_norm": 0.3576115548174324, "learning_rate": 9.873830288832882e-06, "loss": 0.5634, "step": 1186 }, { "epoch": 0.8231622746185853, "grad_norm": 0.3422084150460348, "learning_rate": 9.873289346100872e-06, "loss": 0.5852, "step": 1187 }, { "epoch": 0.8238557558945908, "grad_norm": 0.3222649805066783, "learning_rate": 9.872747261105133e-06, "loss": 0.5463, "step": 1188 }, { "epoch": 0.8245492371705964, "grad_norm": 0.3496908260537203, "learning_rate": 9.872204033972727e-06, "loss": 0.5205, "step": 1189 }, { "epoch": 0.8252427184466019, "grad_norm": 0.3388360737763813, "learning_rate": 9.87165966483098e-06, "loss": 0.5909, "step": 1190 }, { "epoch": 0.8259361997226075, "grad_norm": 0.3236104661748136, "learning_rate": 9.871114153807491e-06, "loss": 0.5578, "step": 1191 }, { "epoch": 0.826629680998613, "grad_norm": 0.39015592746291056, "learning_rate": 9.870567501030122e-06, "loss": 0.6206, "step": 1192 }, { "epoch": 0.8273231622746186, "grad_norm": 0.33132849595283714, "learning_rate": 9.870019706627006e-06, "loss": 0.6117, "step": 1193 }, { "epoch": 0.8280166435506241, "grad_norm": 0.35448745194396175, "learning_rate": 9.869470770726541e-06, "loss": 0.5997, "step": 1194 }, { "epoch": 0.8287101248266296, "grad_norm": 0.3208479735558261, "learning_rate": 9.868920693457393e-06, "loss": 0.5917, "step": 1195 }, { "epoch": 0.8294036061026352, "grad_norm": 0.3324491514388112, "learning_rate": 9.868369474948498e-06, "loss": 0.5659, "step": 1196 }, { "epoch": 0.8300970873786407, "grad_norm": 0.3667633479287624, "learning_rate": 9.867817115329055e-06, "loss": 0.605, "step": 1197 }, { "epoch": 0.8307905686546463, "grad_norm": 0.319951425929113, "learning_rate": 9.867263614728535e-06, "loss": 0.5668, "step": 1198 }, { "epoch": 0.8314840499306518, "grad_norm": 0.3219623411274206, "learning_rate": 9.866708973276674e-06, "loss": 0.5381, "step": 1199 }, { "epoch": 0.8321775312066574, "grad_norm": 0.33814181704870083, "learning_rate": 9.866153191103476e-06, "loss": 0.5031, "step": 1200 }, { "epoch": 0.8328710124826629, "grad_norm": 0.3574079179689098, "learning_rate": 9.865596268339213e-06, "loss": 0.5947, "step": 1201 }, { "epoch": 0.8335644937586685, "grad_norm": 0.344780139859224, "learning_rate": 9.865038205114422e-06, "loss": 0.6166, "step": 1202 }, { "epoch": 0.834257975034674, "grad_norm": 0.32138095499949426, "learning_rate": 9.86447900155991e-06, "loss": 0.5854, "step": 1203 }, { "epoch": 0.8349514563106796, "grad_norm": 0.32251090179520675, "learning_rate": 9.863918657806752e-06, "loss": 0.5606, "step": 1204 }, { "epoch": 0.8356449375866851, "grad_norm": 0.31744573456007175, "learning_rate": 9.863357173986285e-06, "loss": 0.5706, "step": 1205 }, { "epoch": 0.8363384188626907, "grad_norm": 0.3258706406679843, "learning_rate": 9.862794550230119e-06, "loss": 0.5624, "step": 1206 }, { "epoch": 0.8370319001386962, "grad_norm": 0.37917664859412087, "learning_rate": 9.862230786670129e-06, "loss": 0.5854, "step": 1207 }, { "epoch": 0.8377253814147018, "grad_norm": 0.3712638165551654, "learning_rate": 9.861665883438456e-06, "loss": 0.5972, "step": 1208 }, { "epoch": 0.8384188626907073, "grad_norm": 0.45810505441807553, "learning_rate": 9.86109984066751e-06, "loss": 0.6128, "step": 1209 }, { "epoch": 0.8391123439667129, "grad_norm": 0.6036613407606876, "learning_rate": 9.860532658489967e-06, "loss": 0.5489, "step": 1210 }, { "epoch": 0.8398058252427184, "grad_norm": 0.33020040403158707, "learning_rate": 9.85996433703877e-06, "loss": 0.5825, "step": 1211 }, { "epoch": 0.840499306518724, "grad_norm": 0.3621560256794954, "learning_rate": 9.85939487644713e-06, "loss": 0.5318, "step": 1212 }, { "epoch": 0.8411927877947295, "grad_norm": 0.34977264380135165, "learning_rate": 9.858824276848524e-06, "loss": 0.6136, "step": 1213 }, { "epoch": 0.841886269070735, "grad_norm": 0.32135164908334707, "learning_rate": 9.858252538376698e-06, "loss": 0.5025, "step": 1214 }, { "epoch": 0.8425797503467406, "grad_norm": 0.3898124430815325, "learning_rate": 9.857679661165663e-06, "loss": 0.5533, "step": 1215 }, { "epoch": 0.8432732316227461, "grad_norm": 0.3600086857034731, "learning_rate": 9.857105645349694e-06, "loss": 0.579, "step": 1216 }, { "epoch": 0.8439667128987517, "grad_norm": 0.3901900266989973, "learning_rate": 9.856530491063338e-06, "loss": 0.5646, "step": 1217 }, { "epoch": 0.8446601941747572, "grad_norm": 0.3665341133398684, "learning_rate": 9.855954198441411e-06, "loss": 0.6373, "step": 1218 }, { "epoch": 0.8453536754507628, "grad_norm": 0.33695444682139386, "learning_rate": 9.855376767618985e-06, "loss": 0.5542, "step": 1219 }, { "epoch": 0.8460471567267683, "grad_norm": 0.33689360622798203, "learning_rate": 9.854798198731411e-06, "loss": 0.6371, "step": 1220 }, { "epoch": 0.8467406380027739, "grad_norm": 0.3261138900491812, "learning_rate": 9.854218491914298e-06, "loss": 0.477, "step": 1221 }, { "epoch": 0.8474341192787794, "grad_norm": 0.30686493763733774, "learning_rate": 9.853637647303528e-06, "loss": 0.4961, "step": 1222 }, { "epoch": 0.848127600554785, "grad_norm": 0.326983557469853, "learning_rate": 9.853055665035244e-06, "loss": 0.5263, "step": 1223 }, { "epoch": 0.8488210818307905, "grad_norm": 0.3839857778727524, "learning_rate": 9.85247254524586e-06, "loss": 0.5942, "step": 1224 }, { "epoch": 0.8495145631067961, "grad_norm": 0.36238512304436643, "learning_rate": 9.851888288072053e-06, "loss": 0.5472, "step": 1225 }, { "epoch": 0.8502080443828016, "grad_norm": 0.3328601800683192, "learning_rate": 9.851302893650773e-06, "loss": 0.5815, "step": 1226 }, { "epoch": 0.8509015256588072, "grad_norm": 0.31869695935851317, "learning_rate": 9.850716362119229e-06, "loss": 0.5511, "step": 1227 }, { "epoch": 0.8515950069348127, "grad_norm": 0.3218992384439522, "learning_rate": 9.850128693614898e-06, "loss": 0.5431, "step": 1228 }, { "epoch": 0.8522884882108183, "grad_norm": 0.3075557984826137, "learning_rate": 9.84953988827553e-06, "loss": 0.5532, "step": 1229 }, { "epoch": 0.8529819694868238, "grad_norm": 0.32272884303113597, "learning_rate": 9.848949946239132e-06, "loss": 0.6061, "step": 1230 }, { "epoch": 0.8536754507628294, "grad_norm": 0.3288623926118559, "learning_rate": 9.848358867643985e-06, "loss": 0.4792, "step": 1231 }, { "epoch": 0.8543689320388349, "grad_norm": 0.3610242978031061, "learning_rate": 9.847766652628635e-06, "loss": 0.6134, "step": 1232 }, { "epoch": 0.8550624133148405, "grad_norm": 0.3141108964836117, "learning_rate": 9.847173301331889e-06, "loss": 0.543, "step": 1233 }, { "epoch": 0.855755894590846, "grad_norm": 0.3712592727169921, "learning_rate": 9.846578813892827e-06, "loss": 0.5927, "step": 1234 }, { "epoch": 0.8564493758668515, "grad_norm": 0.3422545784436403, "learning_rate": 9.84598319045079e-06, "loss": 0.5545, "step": 1235 }, { "epoch": 0.8571428571428571, "grad_norm": 0.34192867170626523, "learning_rate": 9.84538643114539e-06, "loss": 0.614, "step": 1236 }, { "epoch": 0.8578363384188626, "grad_norm": 0.34078198263330395, "learning_rate": 9.844788536116504e-06, "loss": 0.5408, "step": 1237 }, { "epoch": 0.8585298196948682, "grad_norm": 0.3189641682226928, "learning_rate": 9.844189505504272e-06, "loss": 0.5322, "step": 1238 }, { "epoch": 0.8592233009708737, "grad_norm": 0.3186152647314188, "learning_rate": 9.843589339449102e-06, "loss": 0.5346, "step": 1239 }, { "epoch": 0.8599167822468793, "grad_norm": 0.3267157115057452, "learning_rate": 9.84298803809167e-06, "loss": 0.5719, "step": 1240 }, { "epoch": 0.8606102635228848, "grad_norm": 0.3245630397273561, "learning_rate": 9.842385601572918e-06, "loss": 0.5165, "step": 1241 }, { "epoch": 0.8613037447988904, "grad_norm": 0.3910276769829623, "learning_rate": 9.841782030034049e-06, "loss": 0.6059, "step": 1242 }, { "epoch": 0.8619972260748959, "grad_norm": 0.4074256777033721, "learning_rate": 9.841177323616539e-06, "loss": 0.5965, "step": 1243 }, { "epoch": 0.8626907073509015, "grad_norm": 0.34814973925935633, "learning_rate": 9.840571482462126e-06, "loss": 0.5365, "step": 1244 }, { "epoch": 0.863384188626907, "grad_norm": 0.34036803058568826, "learning_rate": 9.839964506712814e-06, "loss": 0.561, "step": 1245 }, { "epoch": 0.8640776699029126, "grad_norm": 0.341389026646717, "learning_rate": 9.839356396510875e-06, "loss": 0.5451, "step": 1246 }, { "epoch": 0.8647711511789181, "grad_norm": 0.2907921324032125, "learning_rate": 9.838747151998844e-06, "loss": 0.4932, "step": 1247 }, { "epoch": 0.8654646324549237, "grad_norm": 0.3127236294467869, "learning_rate": 9.838136773319527e-06, "loss": 0.5163, "step": 1248 }, { "epoch": 0.8661581137309292, "grad_norm": 0.34454414991532545, "learning_rate": 9.837525260615987e-06, "loss": 0.5189, "step": 1249 }, { "epoch": 0.8668515950069348, "grad_norm": 0.36058454065948325, "learning_rate": 9.836912614031561e-06, "loss": 0.6312, "step": 1250 }, { "epoch": 0.8675450762829403, "grad_norm": 0.34673590336860904, "learning_rate": 9.83629883370985e-06, "loss": 0.5893, "step": 1251 }, { "epoch": 0.8682385575589459, "grad_norm": 0.33511794703223685, "learning_rate": 9.835683919794719e-06, "loss": 0.5658, "step": 1252 }, { "epoch": 0.8689320388349514, "grad_norm": 0.3210331871920328, "learning_rate": 9.835067872430297e-06, "loss": 0.6333, "step": 1253 }, { "epoch": 0.869625520110957, "grad_norm": 0.33470113519608913, "learning_rate": 9.834450691760983e-06, "loss": 0.5474, "step": 1254 }, { "epoch": 0.8703190013869625, "grad_norm": 0.32831830495224923, "learning_rate": 9.833832377931442e-06, "loss": 0.5015, "step": 1255 }, { "epoch": 0.871012482662968, "grad_norm": 0.3633007768286215, "learning_rate": 9.833212931086597e-06, "loss": 0.5702, "step": 1256 }, { "epoch": 0.8717059639389736, "grad_norm": 0.3357210640028075, "learning_rate": 9.832592351371646e-06, "loss": 0.5413, "step": 1257 }, { "epoch": 0.8723994452149791, "grad_norm": 0.35920255546669794, "learning_rate": 9.831970638932048e-06, "loss": 0.6591, "step": 1258 }, { "epoch": 0.8730929264909847, "grad_norm": 0.35116338869790564, "learning_rate": 9.831347793913526e-06, "loss": 0.603, "step": 1259 }, { "epoch": 0.8737864077669902, "grad_norm": 0.332515881910525, "learning_rate": 9.830723816462071e-06, "loss": 0.5354, "step": 1260 }, { "epoch": 0.8744798890429958, "grad_norm": 0.354597740102601, "learning_rate": 9.83009870672394e-06, "loss": 0.5938, "step": 1261 }, { "epoch": 0.8751733703190014, "grad_norm": 0.3339340280415721, "learning_rate": 9.829472464845654e-06, "loss": 0.5307, "step": 1262 }, { "epoch": 0.875866851595007, "grad_norm": 0.3531606867195477, "learning_rate": 9.828845090973998e-06, "loss": 0.5673, "step": 1263 }, { "epoch": 0.8765603328710125, "grad_norm": 0.31848797118696937, "learning_rate": 9.828216585256025e-06, "loss": 0.5333, "step": 1264 }, { "epoch": 0.8772538141470181, "grad_norm": 0.329104246002548, "learning_rate": 9.827586947839052e-06, "loss": 0.5743, "step": 1265 }, { "epoch": 0.8779472954230236, "grad_norm": 0.3287601918052048, "learning_rate": 9.826956178870662e-06, "loss": 0.5709, "step": 1266 }, { "epoch": 0.8786407766990292, "grad_norm": 0.3411401138107286, "learning_rate": 9.8263242784987e-06, "loss": 0.5621, "step": 1267 }, { "epoch": 0.8793342579750347, "grad_norm": 0.3500515446990955, "learning_rate": 9.825691246871283e-06, "loss": 0.5652, "step": 1268 }, { "epoch": 0.8800277392510403, "grad_norm": 0.3501616261287641, "learning_rate": 9.825057084136786e-06, "loss": 0.5716, "step": 1269 }, { "epoch": 0.8807212205270458, "grad_norm": 0.3249135004019094, "learning_rate": 9.824421790443855e-06, "loss": 0.6303, "step": 1270 }, { "epoch": 0.8814147018030514, "grad_norm": 0.3402407337272337, "learning_rate": 9.823785365941394e-06, "loss": 0.5482, "step": 1271 }, { "epoch": 0.8821081830790569, "grad_norm": 0.34989503534717264, "learning_rate": 9.82314781077858e-06, "loss": 0.5265, "step": 1272 }, { "epoch": 0.8828016643550625, "grad_norm": 0.33234210738171654, "learning_rate": 9.82250912510485e-06, "loss": 0.5606, "step": 1273 }, { "epoch": 0.883495145631068, "grad_norm": 0.3127730100828181, "learning_rate": 9.821869309069907e-06, "loss": 0.5963, "step": 1274 }, { "epoch": 0.8841886269070736, "grad_norm": 0.3484755831995452, "learning_rate": 9.821228362823719e-06, "loss": 0.5432, "step": 1275 }, { "epoch": 0.8848821081830791, "grad_norm": 0.36161292333184586, "learning_rate": 9.82058628651652e-06, "loss": 0.6208, "step": 1276 }, { "epoch": 0.8855755894590847, "grad_norm": 0.3181656179118097, "learning_rate": 9.819943080298808e-06, "loss": 0.5685, "step": 1277 }, { "epoch": 0.8862690707350902, "grad_norm": 0.34746536626784236, "learning_rate": 9.819298744321346e-06, "loss": 0.5881, "step": 1278 }, { "epoch": 0.8869625520110958, "grad_norm": 0.3541273059375999, "learning_rate": 9.818653278735163e-06, "loss": 0.5886, "step": 1279 }, { "epoch": 0.8876560332871013, "grad_norm": 0.34807479241564754, "learning_rate": 9.818006683691547e-06, "loss": 0.5312, "step": 1280 }, { "epoch": 0.8883495145631068, "grad_norm": 0.36630083180958223, "learning_rate": 9.817358959342057e-06, "loss": 0.5635, "step": 1281 }, { "epoch": 0.8890429958391124, "grad_norm": 0.34322476466741053, "learning_rate": 9.81671010583852e-06, "loss": 0.5713, "step": 1282 }, { "epoch": 0.8897364771151179, "grad_norm": 0.31767451134940866, "learning_rate": 9.816060123333016e-06, "loss": 0.4881, "step": 1283 }, { "epoch": 0.8904299583911235, "grad_norm": 0.36504358027982725, "learning_rate": 9.815409011977899e-06, "loss": 0.5475, "step": 1284 }, { "epoch": 0.891123439667129, "grad_norm": 0.3294562400040061, "learning_rate": 9.814756771925785e-06, "loss": 0.5629, "step": 1285 }, { "epoch": 0.8918169209431346, "grad_norm": 0.3162759299057603, "learning_rate": 9.814103403329552e-06, "loss": 0.5342, "step": 1286 }, { "epoch": 0.8925104022191401, "grad_norm": 0.3323054397959687, "learning_rate": 9.813448906342348e-06, "loss": 0.5783, "step": 1287 }, { "epoch": 0.8932038834951457, "grad_norm": 0.31126861920575605, "learning_rate": 9.81279328111758e-06, "loss": 0.5362, "step": 1288 }, { "epoch": 0.8938973647711512, "grad_norm": 0.36914508610428837, "learning_rate": 9.812136527808924e-06, "loss": 0.564, "step": 1289 }, { "epoch": 0.8945908460471568, "grad_norm": 0.3538761761079088, "learning_rate": 9.811478646570316e-06, "loss": 0.5656, "step": 1290 }, { "epoch": 0.8952843273231623, "grad_norm": 0.3424567488034912, "learning_rate": 9.810819637555961e-06, "loss": 0.5709, "step": 1291 }, { "epoch": 0.8959778085991679, "grad_norm": 0.3191712852224483, "learning_rate": 9.810159500920324e-06, "loss": 0.5277, "step": 1292 }, { "epoch": 0.8966712898751734, "grad_norm": 0.36731342267432715, "learning_rate": 9.809498236818136e-06, "loss": 0.5178, "step": 1293 }, { "epoch": 0.897364771151179, "grad_norm": 0.3377594492144452, "learning_rate": 9.808835845404393e-06, "loss": 0.5976, "step": 1294 }, { "epoch": 0.8980582524271845, "grad_norm": 0.3170764097891206, "learning_rate": 9.808172326834356e-06, "loss": 0.5529, "step": 1295 }, { "epoch": 0.8987517337031901, "grad_norm": 0.34066135968844374, "learning_rate": 9.807507681263549e-06, "loss": 0.5612, "step": 1296 }, { "epoch": 0.8994452149791956, "grad_norm": 0.3325589108742296, "learning_rate": 9.806841908847758e-06, "loss": 0.5772, "step": 1297 }, { "epoch": 0.9001386962552012, "grad_norm": 0.31731030396206666, "learning_rate": 9.806175009743035e-06, "loss": 0.5435, "step": 1298 }, { "epoch": 0.9008321775312067, "grad_norm": 0.31327367398706046, "learning_rate": 9.8055069841057e-06, "loss": 0.5438, "step": 1299 }, { "epoch": 0.9015256588072122, "grad_norm": 0.31364140750488745, "learning_rate": 9.80483783209233e-06, "loss": 0.5796, "step": 1300 }, { "epoch": 0.9022191400832178, "grad_norm": 0.31838352680737114, "learning_rate": 9.80416755385977e-06, "loss": 0.5677, "step": 1301 }, { "epoch": 0.9029126213592233, "grad_norm": 0.33174094161455114, "learning_rate": 9.80349614956513e-06, "loss": 0.5774, "step": 1302 }, { "epoch": 0.9036061026352289, "grad_norm": 0.3406181144519224, "learning_rate": 9.80282361936578e-06, "loss": 0.5012, "step": 1303 }, { "epoch": 0.9042995839112344, "grad_norm": 0.3457856925916943, "learning_rate": 9.802149963419356e-06, "loss": 0.5474, "step": 1304 }, { "epoch": 0.90499306518724, "grad_norm": 0.3712531282790912, "learning_rate": 9.801475181883763e-06, "loss": 0.6067, "step": 1305 }, { "epoch": 0.9056865464632455, "grad_norm": 0.33754065670955147, "learning_rate": 9.800799274917159e-06, "loss": 0.5766, "step": 1306 }, { "epoch": 0.9063800277392511, "grad_norm": 0.35865679017804203, "learning_rate": 9.800122242677975e-06, "loss": 0.5808, "step": 1307 }, { "epoch": 0.9070735090152566, "grad_norm": 0.30684712161365424, "learning_rate": 9.7994440853249e-06, "loss": 0.5465, "step": 1308 }, { "epoch": 0.9077669902912622, "grad_norm": 0.30032159591038865, "learning_rate": 9.798764803016892e-06, "loss": 0.495, "step": 1309 }, { "epoch": 0.9084604715672677, "grad_norm": 0.3720484777884542, "learning_rate": 9.798084395913167e-06, "loss": 0.5922, "step": 1310 }, { "epoch": 0.9091539528432733, "grad_norm": 0.3867791943651646, "learning_rate": 9.79740286417321e-06, "loss": 0.5849, "step": 1311 }, { "epoch": 0.9098474341192788, "grad_norm": 0.32768493521246145, "learning_rate": 9.796720207956765e-06, "loss": 0.5702, "step": 1312 }, { "epoch": 0.9105409153952844, "grad_norm": 0.34942558541562957, "learning_rate": 9.796036427423844e-06, "loss": 0.5856, "step": 1313 }, { "epoch": 0.9112343966712899, "grad_norm": 0.3744950031666534, "learning_rate": 9.795351522734718e-06, "loss": 0.5531, "step": 1314 }, { "epoch": 0.9119278779472955, "grad_norm": 0.3282846607161552, "learning_rate": 9.794665494049926e-06, "loss": 0.5593, "step": 1315 }, { "epoch": 0.912621359223301, "grad_norm": 0.5530462075254108, "learning_rate": 9.793978341530265e-06, "loss": 0.5778, "step": 1316 }, { "epoch": 0.9133148404993066, "grad_norm": 0.3510633295830234, "learning_rate": 9.793290065336802e-06, "loss": 0.6148, "step": 1317 }, { "epoch": 0.9140083217753121, "grad_norm": 0.3416426392043918, "learning_rate": 9.792600665630862e-06, "loss": 0.5365, "step": 1318 }, { "epoch": 0.9147018030513177, "grad_norm": 0.3214166710578825, "learning_rate": 9.791910142574035e-06, "loss": 0.6077, "step": 1319 }, { "epoch": 0.9153952843273232, "grad_norm": 0.3058861338392189, "learning_rate": 9.791218496328176e-06, "loss": 0.5727, "step": 1320 }, { "epoch": 0.9160887656033287, "grad_norm": 0.36429414031068824, "learning_rate": 9.7905257270554e-06, "loss": 0.5632, "step": 1321 }, { "epoch": 0.9167822468793343, "grad_norm": 0.32208427101954895, "learning_rate": 9.789831834918088e-06, "loss": 0.5111, "step": 1322 }, { "epoch": 0.9174757281553398, "grad_norm": 0.3398682101476737, "learning_rate": 9.789136820078884e-06, "loss": 0.5645, "step": 1323 }, { "epoch": 0.9181692094313454, "grad_norm": 0.3609865377687872, "learning_rate": 9.788440682700695e-06, "loss": 0.5868, "step": 1324 }, { "epoch": 0.9188626907073509, "grad_norm": 0.357367726817716, "learning_rate": 9.787743422946689e-06, "loss": 0.5962, "step": 1325 }, { "epoch": 0.9195561719833565, "grad_norm": 0.3166361463961456, "learning_rate": 9.787045040980299e-06, "loss": 0.5694, "step": 1326 }, { "epoch": 0.920249653259362, "grad_norm": 0.3301450043009037, "learning_rate": 9.78634553696522e-06, "loss": 0.5504, "step": 1327 }, { "epoch": 0.9209431345353676, "grad_norm": 0.33184365807605193, "learning_rate": 9.785644911065411e-06, "loss": 0.5586, "step": 1328 }, { "epoch": 0.9216366158113731, "grad_norm": 0.3688433994965824, "learning_rate": 9.784943163445095e-06, "loss": 0.4798, "step": 1329 }, { "epoch": 0.9223300970873787, "grad_norm": 0.3618513962661808, "learning_rate": 9.784240294268756e-06, "loss": 0.5455, "step": 1330 }, { "epoch": 0.9230235783633842, "grad_norm": 0.3457450644886999, "learning_rate": 9.783536303701141e-06, "loss": 0.5088, "step": 1331 }, { "epoch": 0.9237170596393898, "grad_norm": 0.3486225872824421, "learning_rate": 9.782831191907261e-06, "loss": 0.5742, "step": 1332 }, { "epoch": 0.9244105409153953, "grad_norm": 0.3315065718942818, "learning_rate": 9.782124959052388e-06, "loss": 0.5872, "step": 1333 }, { "epoch": 0.9251040221914009, "grad_norm": 0.3227113188069714, "learning_rate": 9.781417605302059e-06, "loss": 0.5331, "step": 1334 }, { "epoch": 0.9257975034674064, "grad_norm": 0.3263752872926798, "learning_rate": 9.780709130822071e-06, "loss": 0.5716, "step": 1335 }, { "epoch": 0.926490984743412, "grad_norm": 0.3520999513258659, "learning_rate": 9.779999535778487e-06, "loss": 0.596, "step": 1336 }, { "epoch": 0.9271844660194175, "grad_norm": 0.3276157528264304, "learning_rate": 9.779288820337628e-06, "loss": 0.5524, "step": 1337 }, { "epoch": 0.9278779472954231, "grad_norm": 0.33534342304864945, "learning_rate": 9.778576984666087e-06, "loss": 0.4673, "step": 1338 }, { "epoch": 0.9285714285714286, "grad_norm": 0.33542174245312506, "learning_rate": 9.777864028930705e-06, "loss": 0.5751, "step": 1339 }, { "epoch": 0.9292649098474342, "grad_norm": 0.3392654105733671, "learning_rate": 9.7771499532986e-06, "loss": 0.5306, "step": 1340 }, { "epoch": 0.9299583911234397, "grad_norm": 0.31461597060503727, "learning_rate": 9.776434757937141e-06, "loss": 0.5665, "step": 1341 }, { "epoch": 0.9306518723994452, "grad_norm": 0.592434449703214, "learning_rate": 9.775718443013969e-06, "loss": 0.6112, "step": 1342 }, { "epoch": 0.9313453536754508, "grad_norm": 0.3427716227598802, "learning_rate": 9.77500100869698e-06, "loss": 0.5809, "step": 1343 }, { "epoch": 0.9320388349514563, "grad_norm": 0.34024552649717665, "learning_rate": 9.774282455154338e-06, "loss": 0.6318, "step": 1344 }, { "epoch": 0.9327323162274619, "grad_norm": 0.3701056669384116, "learning_rate": 9.773562782554467e-06, "loss": 0.6098, "step": 1345 }, { "epoch": 0.9334257975034674, "grad_norm": 0.35332055119718075, "learning_rate": 9.77284199106605e-06, "loss": 0.5642, "step": 1346 }, { "epoch": 0.934119278779473, "grad_norm": 0.3400850066223855, "learning_rate": 9.772120080858037e-06, "loss": 0.6003, "step": 1347 }, { "epoch": 0.9348127600554785, "grad_norm": 0.3553058316745625, "learning_rate": 9.771397052099637e-06, "loss": 0.5221, "step": 1348 }, { "epoch": 0.9355062413314841, "grad_norm": 0.31577122634395005, "learning_rate": 9.770672904960326e-06, "loss": 0.5484, "step": 1349 }, { "epoch": 0.9361997226074896, "grad_norm": 0.3379216804339027, "learning_rate": 9.769947639609837e-06, "loss": 0.5583, "step": 1350 }, { "epoch": 0.9368932038834952, "grad_norm": 0.32929958892366473, "learning_rate": 9.769221256218165e-06, "loss": 0.5567, "step": 1351 }, { "epoch": 0.9375866851595007, "grad_norm": 0.31964291586065235, "learning_rate": 9.76849375495557e-06, "loss": 0.5885, "step": 1352 }, { "epoch": 0.9382801664355063, "grad_norm": 0.33509612920909093, "learning_rate": 9.767765135992577e-06, "loss": 0.5264, "step": 1353 }, { "epoch": 0.9389736477115118, "grad_norm": 0.3077192276393366, "learning_rate": 9.767035399499965e-06, "loss": 0.5198, "step": 1354 }, { "epoch": 0.9396671289875174, "grad_norm": 0.36276586392069327, "learning_rate": 9.76630454564878e-06, "loss": 0.4841, "step": 1355 }, { "epoch": 0.9403606102635229, "grad_norm": 0.3348890170241416, "learning_rate": 9.765572574610326e-06, "loss": 0.5096, "step": 1356 }, { "epoch": 0.9410540915395285, "grad_norm": 0.3368382436652647, "learning_rate": 9.764839486556177e-06, "loss": 0.6037, "step": 1357 }, { "epoch": 0.941747572815534, "grad_norm": 0.3385194917774941, "learning_rate": 9.764105281658161e-06, "loss": 0.6269, "step": 1358 }, { "epoch": 0.9424410540915396, "grad_norm": 0.320619109857471, "learning_rate": 9.76336996008837e-06, "loss": 0.5177, "step": 1359 }, { "epoch": 0.9431345353675451, "grad_norm": 0.29871327239631223, "learning_rate": 9.762633522019159e-06, "loss": 0.5513, "step": 1360 }, { "epoch": 0.9438280166435506, "grad_norm": 0.35889043316034214, "learning_rate": 9.761895967623141e-06, "loss": 0.5673, "step": 1361 }, { "epoch": 0.9445214979195562, "grad_norm": 0.3203059268859659, "learning_rate": 9.761157297073196e-06, "loss": 0.5435, "step": 1362 }, { "epoch": 0.9452149791955617, "grad_norm": 0.3511537235065825, "learning_rate": 9.760417510542464e-06, "loss": 0.6103, "step": 1363 }, { "epoch": 0.9459084604715673, "grad_norm": 0.3604565708923628, "learning_rate": 9.759676608204342e-06, "loss": 0.6054, "step": 1364 }, { "epoch": 0.9466019417475728, "grad_norm": 0.3342447123223769, "learning_rate": 9.758934590232495e-06, "loss": 0.5307, "step": 1365 }, { "epoch": 0.9472954230235784, "grad_norm": 0.37965077086134635, "learning_rate": 9.758191456800848e-06, "loss": 0.5935, "step": 1366 }, { "epoch": 0.9479889042995839, "grad_norm": 0.3478112253973906, "learning_rate": 9.757447208083582e-06, "loss": 0.5861, "step": 1367 }, { "epoch": 0.9486823855755895, "grad_norm": 0.323960790991273, "learning_rate": 9.756701844255145e-06, "loss": 0.5148, "step": 1368 }, { "epoch": 0.949375866851595, "grad_norm": 0.34803009997759937, "learning_rate": 9.755955365490246e-06, "loss": 0.5328, "step": 1369 }, { "epoch": 0.9500693481276006, "grad_norm": 0.37453541262630863, "learning_rate": 9.755207771963855e-06, "loss": 0.5353, "step": 1370 }, { "epoch": 0.9507628294036061, "grad_norm": 0.3773801044381051, "learning_rate": 9.754459063851198e-06, "loss": 0.5594, "step": 1371 }, { "epoch": 0.9514563106796117, "grad_norm": 0.33016084135655566, "learning_rate": 9.753709241327773e-06, "loss": 0.5512, "step": 1372 }, { "epoch": 0.9521497919556172, "grad_norm": 0.33497911250209167, "learning_rate": 9.752958304569327e-06, "loss": 0.5447, "step": 1373 }, { "epoch": 0.9528432732316228, "grad_norm": 0.34373072248120534, "learning_rate": 9.75220625375188e-06, "loss": 0.5838, "step": 1374 }, { "epoch": 0.9535367545076283, "grad_norm": 0.30820762039099153, "learning_rate": 9.7514530890517e-06, "loss": 0.5234, "step": 1375 }, { "epoch": 0.9542302357836339, "grad_norm": 0.4176006756154152, "learning_rate": 9.750698810645331e-06, "loss": 0.4976, "step": 1376 }, { "epoch": 0.9549237170596394, "grad_norm": 0.33539094543261083, "learning_rate": 9.749943418709567e-06, "loss": 0.5293, "step": 1377 }, { "epoch": 0.955617198335645, "grad_norm": 0.3324000901041926, "learning_rate": 9.749186913421465e-06, "loss": 0.5036, "step": 1378 }, { "epoch": 0.9563106796116505, "grad_norm": 0.3812265495386306, "learning_rate": 9.748429294958345e-06, "loss": 0.57, "step": 1379 }, { "epoch": 0.957004160887656, "grad_norm": 0.34076982875814876, "learning_rate": 9.74767056349779e-06, "loss": 0.5548, "step": 1380 }, { "epoch": 0.9576976421636616, "grad_norm": 0.37913491187013104, "learning_rate": 9.74691071921764e-06, "loss": 0.5821, "step": 1381 }, { "epoch": 0.9583911234396671, "grad_norm": 0.3382320688469176, "learning_rate": 9.746149762295994e-06, "loss": 0.5591, "step": 1382 }, { "epoch": 0.9590846047156727, "grad_norm": 0.3326244476434884, "learning_rate": 9.745387692911217e-06, "loss": 0.5585, "step": 1383 }, { "epoch": 0.9597780859916782, "grad_norm": 0.3078525234697808, "learning_rate": 9.744624511241933e-06, "loss": 0.5572, "step": 1384 }, { "epoch": 0.9604715672676838, "grad_norm": 0.30277126124901926, "learning_rate": 9.743860217467024e-06, "loss": 0.4968, "step": 1385 }, { "epoch": 0.9611650485436893, "grad_norm": 0.33782360856625965, "learning_rate": 9.74309481176564e-06, "loss": 0.5236, "step": 1386 }, { "epoch": 0.9618585298196949, "grad_norm": 0.38359156051992216, "learning_rate": 9.742328294317181e-06, "loss": 0.5844, "step": 1387 }, { "epoch": 0.9625520110957004, "grad_norm": 0.35228815113528755, "learning_rate": 9.741560665301316e-06, "loss": 0.5921, "step": 1388 }, { "epoch": 0.963245492371706, "grad_norm": 0.31593010433623414, "learning_rate": 9.740791924897973e-06, "loss": 0.5203, "step": 1389 }, { "epoch": 0.9639389736477115, "grad_norm": 0.33872246664457734, "learning_rate": 9.740022073287334e-06, "loss": 0.5621, "step": 1390 }, { "epoch": 0.9646324549237171, "grad_norm": 0.3481728645175325, "learning_rate": 9.73925111064985e-06, "loss": 0.5459, "step": 1391 }, { "epoch": 0.9653259361997226, "grad_norm": 0.3541419905260788, "learning_rate": 9.738479037166231e-06, "loss": 0.6089, "step": 1392 }, { "epoch": 0.9660194174757282, "grad_norm": 0.3504710653288276, "learning_rate": 9.737705853017442e-06, "loss": 0.5465, "step": 1393 }, { "epoch": 0.9667128987517337, "grad_norm": 0.3063424863612362, "learning_rate": 9.736931558384713e-06, "loss": 0.498, "step": 1394 }, { "epoch": 0.9674063800277393, "grad_norm": 0.37557717405228963, "learning_rate": 9.736156153449534e-06, "loss": 0.5892, "step": 1395 }, { "epoch": 0.9680998613037448, "grad_norm": 0.34004988273217296, "learning_rate": 9.735379638393654e-06, "loss": 0.5521, "step": 1396 }, { "epoch": 0.9687933425797504, "grad_norm": 0.3387287763392039, "learning_rate": 9.73460201339908e-06, "loss": 0.5778, "step": 1397 }, { "epoch": 0.9694868238557559, "grad_norm": 0.3364092001009357, "learning_rate": 9.733823278648084e-06, "loss": 0.5313, "step": 1398 }, { "epoch": 0.9701803051317615, "grad_norm": 0.33563973505245465, "learning_rate": 9.733043434323197e-06, "loss": 0.5812, "step": 1399 }, { "epoch": 0.970873786407767, "grad_norm": 0.40173479450985844, "learning_rate": 9.732262480607207e-06, "loss": 0.5476, "step": 1400 }, { "epoch": 0.9715672676837726, "grad_norm": 0.34481293985057604, "learning_rate": 9.731480417683163e-06, "loss": 0.569, "step": 1401 }, { "epoch": 0.9722607489597781, "grad_norm": 0.3169175051150628, "learning_rate": 9.730697245734377e-06, "loss": 0.5368, "step": 1402 }, { "epoch": 0.9729542302357836, "grad_norm": 0.3519871300399793, "learning_rate": 9.729912964944419e-06, "loss": 0.5382, "step": 1403 }, { "epoch": 0.9736477115117892, "grad_norm": 0.3799305843804598, "learning_rate": 9.729127575497116e-06, "loss": 0.5176, "step": 1404 }, { "epoch": 0.9743411927877947, "grad_norm": 0.33770682525361484, "learning_rate": 9.72834107757656e-06, "loss": 0.5603, "step": 1405 }, { "epoch": 0.9750346740638003, "grad_norm": 0.34256064354796206, "learning_rate": 9.727553471367099e-06, "loss": 0.5568, "step": 1406 }, { "epoch": 0.9757281553398058, "grad_norm": 0.34803047833693523, "learning_rate": 9.726764757053343e-06, "loss": 0.6481, "step": 1407 }, { "epoch": 0.9764216366158114, "grad_norm": 0.3814251518224469, "learning_rate": 9.725974934820162e-06, "loss": 0.6152, "step": 1408 }, { "epoch": 0.9771151178918169, "grad_norm": 0.35670775377754504, "learning_rate": 9.725184004852681e-06, "loss": 0.5648, "step": 1409 }, { "epoch": 0.9778085991678225, "grad_norm": 0.32713035388587347, "learning_rate": 9.724391967336293e-06, "loss": 0.531, "step": 1410 }, { "epoch": 0.978502080443828, "grad_norm": 0.3309089807646535, "learning_rate": 9.723598822456643e-06, "loss": 0.5004, "step": 1411 }, { "epoch": 0.9791955617198336, "grad_norm": 0.3189261185875698, "learning_rate": 9.722804570399638e-06, "loss": 0.5492, "step": 1412 }, { "epoch": 0.9798890429958391, "grad_norm": 0.3212823172868208, "learning_rate": 9.722009211351447e-06, "loss": 0.53, "step": 1413 }, { "epoch": 0.9805825242718447, "grad_norm": 0.32937476591309023, "learning_rate": 9.721212745498493e-06, "loss": 0.5836, "step": 1414 }, { "epoch": 0.9812760055478502, "grad_norm": 0.32920213673407056, "learning_rate": 9.720415173027466e-06, "loss": 0.5727, "step": 1415 }, { "epoch": 0.9819694868238558, "grad_norm": 0.3186506108728332, "learning_rate": 9.719616494125311e-06, "loss": 0.5419, "step": 1416 }, { "epoch": 0.9826629680998613, "grad_norm": 0.3454233802084189, "learning_rate": 9.718816708979228e-06, "loss": 0.6737, "step": 1417 }, { "epoch": 0.9833564493758669, "grad_norm": 0.3456448789102721, "learning_rate": 9.718015817776684e-06, "loss": 0.6151, "step": 1418 }, { "epoch": 0.9840499306518724, "grad_norm": 0.3596776939403398, "learning_rate": 9.717213820705403e-06, "loss": 0.5878, "step": 1419 }, { "epoch": 0.984743411927878, "grad_norm": 0.3492056949754736, "learning_rate": 9.716410717953364e-06, "loss": 0.5463, "step": 1420 }, { "epoch": 0.9854368932038835, "grad_norm": 0.32360037406045555, "learning_rate": 9.715606509708812e-06, "loss": 0.5638, "step": 1421 }, { "epoch": 0.986130374479889, "grad_norm": 0.4256266939199784, "learning_rate": 9.714801196160247e-06, "loss": 0.5373, "step": 1422 }, { "epoch": 0.9868238557558946, "grad_norm": 0.34243199989289114, "learning_rate": 9.713994777496427e-06, "loss": 0.5427, "step": 1423 }, { "epoch": 0.9875173370319001, "grad_norm": 0.35630692729016344, "learning_rate": 9.71318725390637e-06, "loss": 0.5726, "step": 1424 }, { "epoch": 0.9882108183079057, "grad_norm": 0.32541903837787706, "learning_rate": 9.712378625579358e-06, "loss": 0.5682, "step": 1425 }, { "epoch": 0.9889042995839112, "grad_norm": 0.33493185831625, "learning_rate": 9.711568892704924e-06, "loss": 0.5177, "step": 1426 }, { "epoch": 0.9895977808599168, "grad_norm": 0.38164286844146134, "learning_rate": 9.710758055472862e-06, "loss": 0.5466, "step": 1427 }, { "epoch": 0.9902912621359223, "grad_norm": 0.3363719194150675, "learning_rate": 9.709946114073231e-06, "loss": 0.6056, "step": 1428 }, { "epoch": 0.9909847434119279, "grad_norm": 0.34442741007792727, "learning_rate": 9.70913306869634e-06, "loss": 0.5251, "step": 1429 }, { "epoch": 0.9916782246879334, "grad_norm": 0.3359985260331031, "learning_rate": 9.708318919532766e-06, "loss": 0.5069, "step": 1430 }, { "epoch": 0.992371705963939, "grad_norm": 0.33141103763419355, "learning_rate": 9.707503666773334e-06, "loss": 0.507, "step": 1431 }, { "epoch": 0.9930651872399445, "grad_norm": 0.31911653176339655, "learning_rate": 9.706687310609137e-06, "loss": 0.5459, "step": 1432 }, { "epoch": 0.9937586685159501, "grad_norm": 0.33360539375342685, "learning_rate": 9.705869851231522e-06, "loss": 0.5217, "step": 1433 }, { "epoch": 0.9944521497919556, "grad_norm": 0.3261088579418762, "learning_rate": 9.705051288832095e-06, "loss": 0.5828, "step": 1434 }, { "epoch": 0.9951456310679612, "grad_norm": 0.33826855892003294, "learning_rate": 9.704231623602721e-06, "loss": 0.5251, "step": 1435 }, { "epoch": 0.9958391123439667, "grad_norm": 0.32907816015207925, "learning_rate": 9.703410855735525e-06, "loss": 0.5328, "step": 1436 }, { "epoch": 0.9965325936199723, "grad_norm": 0.288222460017734, "learning_rate": 9.702588985422887e-06, "loss": 0.4961, "step": 1437 }, { "epoch": 0.9972260748959778, "grad_norm": 0.3641372537449462, "learning_rate": 9.701766012857448e-06, "loss": 0.5562, "step": 1438 }, { "epoch": 0.9979195561719834, "grad_norm": 0.3298489645493611, "learning_rate": 9.700941938232108e-06, "loss": 0.5677, "step": 1439 }, { "epoch": 0.9986130374479889, "grad_norm": 0.4119776348892953, "learning_rate": 9.700116761740024e-06, "loss": 0.5339, "step": 1440 }, { "epoch": 0.9993065187239945, "grad_norm": 0.3159886207892137, "learning_rate": 9.699290483574611e-06, "loss": 0.5503, "step": 1441 }, { "epoch": 1.0, "grad_norm": 0.41798594104440057, "learning_rate": 9.698463103929542e-06, "loss": 0.5842, "step": 1442 }, { "epoch": 1.0006934812760055, "grad_norm": 0.30210117536250447, "learning_rate": 9.69763462299875e-06, "loss": 0.5206, "step": 1443 }, { "epoch": 1.001386962552011, "grad_norm": 0.32902579991011144, "learning_rate": 9.696805040976425e-06, "loss": 0.5224, "step": 1444 }, { "epoch": 1.0020804438280166, "grad_norm": 0.33674165962137353, "learning_rate": 9.695974358057012e-06, "loss": 0.5794, "step": 1445 }, { "epoch": 1.0027739251040222, "grad_norm": 0.631204063917515, "learning_rate": 9.695142574435222e-06, "loss": 0.5339, "step": 1446 }, { "epoch": 1.0034674063800277, "grad_norm": 0.3558127426324594, "learning_rate": 9.694309690306013e-06, "loss": 0.5176, "step": 1447 }, { "epoch": 1.0041608876560333, "grad_norm": 0.3237328478105614, "learning_rate": 9.693475705864613e-06, "loss": 0.4968, "step": 1448 }, { "epoch": 1.0048543689320388, "grad_norm": 0.330996494521083, "learning_rate": 9.692640621306497e-06, "loss": 0.4693, "step": 1449 }, { "epoch": 1.0055478502080444, "grad_norm": 0.3607636151790628, "learning_rate": 9.691804436827409e-06, "loss": 0.5404, "step": 1450 }, { "epoch": 1.00624133148405, "grad_norm": 0.3430460470026121, "learning_rate": 9.690967152623337e-06, "loss": 0.5319, "step": 1451 }, { "epoch": 1.0069348127600555, "grad_norm": 0.3569378890517885, "learning_rate": 9.690128768890538e-06, "loss": 0.5697, "step": 1452 }, { "epoch": 1.007628294036061, "grad_norm": 0.3018546951841948, "learning_rate": 9.689289285825526e-06, "loss": 0.4494, "step": 1453 }, { "epoch": 1.0083217753120666, "grad_norm": 0.32523767944287174, "learning_rate": 9.688448703625063e-06, "loss": 0.5394, "step": 1454 }, { "epoch": 1.0090152565880721, "grad_norm": 0.3445515603593232, "learning_rate": 9.687607022486183e-06, "loss": 0.5127, "step": 1455 }, { "epoch": 1.0097087378640777, "grad_norm": 0.33812903590257065, "learning_rate": 9.686764242606164e-06, "loss": 0.5398, "step": 1456 }, { "epoch": 1.0104022191400832, "grad_norm": 0.32268794575907395, "learning_rate": 9.68592036418255e-06, "loss": 0.5247, "step": 1457 }, { "epoch": 1.0110957004160888, "grad_norm": 0.34210414184527377, "learning_rate": 9.685075387413139e-06, "loss": 0.5244, "step": 1458 }, { "epoch": 1.0117891816920943, "grad_norm": 0.3368594866088289, "learning_rate": 9.68422931249599e-06, "loss": 0.5604, "step": 1459 }, { "epoch": 1.0124826629680999, "grad_norm": 0.34154410786238604, "learning_rate": 9.683382139629414e-06, "loss": 0.5333, "step": 1460 }, { "epoch": 1.0131761442441054, "grad_norm": 0.34571869034008895, "learning_rate": 9.682533869011983e-06, "loss": 0.4838, "step": 1461 }, { "epoch": 1.013869625520111, "grad_norm": 0.3146540126331417, "learning_rate": 9.681684500842525e-06, "loss": 0.5121, "step": 1462 }, { "epoch": 1.0145631067961165, "grad_norm": 0.3125075765645134, "learning_rate": 9.680834035320127e-06, "loss": 0.5131, "step": 1463 }, { "epoch": 1.015256588072122, "grad_norm": 0.39937888878270666, "learning_rate": 9.679982472644132e-06, "loss": 0.5486, "step": 1464 }, { "epoch": 1.0159500693481276, "grad_norm": 0.3984132940813232, "learning_rate": 9.679129813014137e-06, "loss": 0.5956, "step": 1465 }, { "epoch": 1.0166435506241331, "grad_norm": 0.33557152839212523, "learning_rate": 9.678276056630005e-06, "loss": 0.4936, "step": 1466 }, { "epoch": 1.0173370319001387, "grad_norm": 0.3358805794503355, "learning_rate": 9.677421203691844e-06, "loss": 0.4719, "step": 1467 }, { "epoch": 1.0180305131761442, "grad_norm": 0.3422084296775962, "learning_rate": 9.67656525440003e-06, "loss": 0.4905, "step": 1468 }, { "epoch": 1.0187239944521498, "grad_norm": 0.3244902181064797, "learning_rate": 9.67570820895519e-06, "loss": 0.5092, "step": 1469 }, { "epoch": 1.0194174757281553, "grad_norm": 0.34140129804374164, "learning_rate": 9.674850067558209e-06, "loss": 0.529, "step": 1470 }, { "epoch": 1.0201109570041609, "grad_norm": 0.36152772655245036, "learning_rate": 9.673990830410227e-06, "loss": 0.5079, "step": 1471 }, { "epoch": 1.0208044382801664, "grad_norm": 0.37207997894308903, "learning_rate": 9.673130497712646e-06, "loss": 0.5246, "step": 1472 }, { "epoch": 1.021497919556172, "grad_norm": 0.34537185865971043, "learning_rate": 9.672269069667122e-06, "loss": 0.4273, "step": 1473 }, { "epoch": 1.0221914008321775, "grad_norm": 0.41690655380322045, "learning_rate": 9.671406546475564e-06, "loss": 0.4621, "step": 1474 }, { "epoch": 1.022884882108183, "grad_norm": 0.3458134469043695, "learning_rate": 9.670542928340145e-06, "loss": 0.5281, "step": 1475 }, { "epoch": 1.0235783633841886, "grad_norm": 0.37397991921336665, "learning_rate": 9.669678215463289e-06, "loss": 0.5234, "step": 1476 }, { "epoch": 1.0242718446601942, "grad_norm": 0.3439034685625693, "learning_rate": 9.66881240804768e-06, "loss": 0.4929, "step": 1477 }, { "epoch": 1.0249653259361997, "grad_norm": 0.34605004852510335, "learning_rate": 9.667945506296252e-06, "loss": 0.5, "step": 1478 }, { "epoch": 1.0256588072122053, "grad_norm": 0.35541572537780564, "learning_rate": 9.667077510412206e-06, "loss": 0.4906, "step": 1479 }, { "epoch": 1.0263522884882108, "grad_norm": 0.3345591255482935, "learning_rate": 9.666208420598993e-06, "loss": 0.5132, "step": 1480 }, { "epoch": 1.0270457697642164, "grad_norm": 0.2982284334195003, "learning_rate": 9.66533823706032e-06, "loss": 0.4397, "step": 1481 }, { "epoch": 1.027739251040222, "grad_norm": 0.4170703038184141, "learning_rate": 9.664466960000152e-06, "loss": 0.5478, "step": 1482 }, { "epoch": 1.0284327323162274, "grad_norm": 0.3775213410406168, "learning_rate": 9.663594589622711e-06, "loss": 0.573, "step": 1483 }, { "epoch": 1.029126213592233, "grad_norm": 0.36964517040338624, "learning_rate": 9.662721126132473e-06, "loss": 0.5079, "step": 1484 }, { "epoch": 1.0298196948682385, "grad_norm": 0.3232631642537663, "learning_rate": 9.661846569734173e-06, "loss": 0.4855, "step": 1485 }, { "epoch": 1.030513176144244, "grad_norm": 0.3273358873941601, "learning_rate": 9.660970920632798e-06, "loss": 0.4769, "step": 1486 }, { "epoch": 1.0312066574202496, "grad_norm": 0.36800348272304073, "learning_rate": 9.660094179033596e-06, "loss": 0.5083, "step": 1487 }, { "epoch": 1.0319001386962552, "grad_norm": 0.36080054248376503, "learning_rate": 9.659216345142068e-06, "loss": 0.6337, "step": 1488 }, { "epoch": 1.0325936199722607, "grad_norm": 0.36832389123711534, "learning_rate": 9.658337419163973e-06, "loss": 0.5422, "step": 1489 }, { "epoch": 1.0332871012482663, "grad_norm": 0.3267218741240164, "learning_rate": 9.657457401305324e-06, "loss": 0.4771, "step": 1490 }, { "epoch": 1.0339805825242718, "grad_norm": 0.35264188713803, "learning_rate": 9.656576291772392e-06, "loss": 0.5254, "step": 1491 }, { "epoch": 1.0346740638002774, "grad_norm": 0.3560121286153758, "learning_rate": 9.655694090771701e-06, "loss": 0.5275, "step": 1492 }, { "epoch": 1.035367545076283, "grad_norm": 0.3517252354642808, "learning_rate": 9.654810798510033e-06, "loss": 0.47, "step": 1493 }, { "epoch": 1.0360610263522885, "grad_norm": 0.36263265506354103, "learning_rate": 9.653926415194426e-06, "loss": 0.4917, "step": 1494 }, { "epoch": 1.036754507628294, "grad_norm": 0.31992687288964183, "learning_rate": 9.653040941032173e-06, "loss": 0.4932, "step": 1495 }, { "epoch": 1.0374479889042996, "grad_norm": 0.45065245541497273, "learning_rate": 9.652154376230822e-06, "loss": 0.4644, "step": 1496 }, { "epoch": 1.0381414701803051, "grad_norm": 0.3501687836457089, "learning_rate": 9.651266720998176e-06, "loss": 0.5284, "step": 1497 }, { "epoch": 1.0388349514563107, "grad_norm": 0.29717922058653945, "learning_rate": 9.650377975542298e-06, "loss": 0.4688, "step": 1498 }, { "epoch": 1.0395284327323162, "grad_norm": 0.46145110093051234, "learning_rate": 9.649488140071503e-06, "loss": 0.5344, "step": 1499 }, { "epoch": 1.0402219140083218, "grad_norm": 0.3253067300049634, "learning_rate": 9.64859721479436e-06, "loss": 0.5179, "step": 1500 }, { "epoch": 1.0409153952843273, "grad_norm": 0.4273424033637859, "learning_rate": 9.647705199919697e-06, "loss": 0.5554, "step": 1501 }, { "epoch": 1.0416088765603329, "grad_norm": 0.3377962612132626, "learning_rate": 9.646812095656595e-06, "loss": 0.547, "step": 1502 }, { "epoch": 1.0423023578363384, "grad_norm": 0.3181516767383569, "learning_rate": 9.645917902214393e-06, "loss": 0.494, "step": 1503 }, { "epoch": 1.042995839112344, "grad_norm": 0.3529567066677283, "learning_rate": 9.64502261980268e-06, "loss": 0.4958, "step": 1504 }, { "epoch": 1.0436893203883495, "grad_norm": 0.3974998943327576, "learning_rate": 9.644126248631306e-06, "loss": 0.5329, "step": 1505 }, { "epoch": 1.044382801664355, "grad_norm": 0.3627345336594509, "learning_rate": 9.643228788910374e-06, "loss": 0.5721, "step": 1506 }, { "epoch": 1.0450762829403606, "grad_norm": 0.3447151808243214, "learning_rate": 9.642330240850244e-06, "loss": 0.4987, "step": 1507 }, { "epoch": 1.0457697642163661, "grad_norm": 0.31171379839843344, "learning_rate": 9.641430604661523e-06, "loss": 0.4798, "step": 1508 }, { "epoch": 1.0464632454923717, "grad_norm": 0.3491924560910556, "learning_rate": 9.640529880555086e-06, "loss": 0.5327, "step": 1509 }, { "epoch": 1.0471567267683772, "grad_norm": 0.3470118733588684, "learning_rate": 9.639628068742053e-06, "loss": 0.4927, "step": 1510 }, { "epoch": 1.0478502080443828, "grad_norm": 0.3165401535268853, "learning_rate": 9.638725169433801e-06, "loss": 0.5064, "step": 1511 }, { "epoch": 1.0485436893203883, "grad_norm": 0.3450271285443684, "learning_rate": 9.637821182841965e-06, "loss": 0.5146, "step": 1512 }, { "epoch": 1.0492371705963939, "grad_norm": 0.35980398821611514, "learning_rate": 9.636916109178433e-06, "loss": 0.5088, "step": 1513 }, { "epoch": 1.0499306518723994, "grad_norm": 0.32696446968109916, "learning_rate": 9.636009948655348e-06, "loss": 0.5535, "step": 1514 }, { "epoch": 1.050624133148405, "grad_norm": 0.35722384331251, "learning_rate": 9.635102701485103e-06, "loss": 0.5224, "step": 1515 }, { "epoch": 1.0513176144244105, "grad_norm": 0.34472637255773353, "learning_rate": 9.634194367880357e-06, "loss": 0.5181, "step": 1516 }, { "epoch": 1.052011095700416, "grad_norm": 0.331116228361004, "learning_rate": 9.633284948054014e-06, "loss": 0.5302, "step": 1517 }, { "epoch": 1.0527045769764216, "grad_norm": 0.3221314677000127, "learning_rate": 9.632374442219232e-06, "loss": 0.5154, "step": 1518 }, { "epoch": 1.0533980582524272, "grad_norm": 0.386738543078458, "learning_rate": 9.631462850589432e-06, "loss": 0.4517, "step": 1519 }, { "epoch": 1.0540915395284327, "grad_norm": 0.3421683778508663, "learning_rate": 9.630550173378283e-06, "loss": 0.477, "step": 1520 }, { "epoch": 1.0547850208044383, "grad_norm": 0.36648948977771956, "learning_rate": 9.629636410799709e-06, "loss": 0.5416, "step": 1521 }, { "epoch": 1.0554785020804438, "grad_norm": 0.3480063837990978, "learning_rate": 9.628721563067888e-06, "loss": 0.5573, "step": 1522 }, { "epoch": 1.0561719833564494, "grad_norm": 0.3412320378121699, "learning_rate": 9.627805630397257e-06, "loss": 0.5525, "step": 1523 }, { "epoch": 1.056865464632455, "grad_norm": 0.34131100799573993, "learning_rate": 9.626888613002502e-06, "loss": 0.4414, "step": 1524 }, { "epoch": 1.0575589459084604, "grad_norm": 0.3416796835258787, "learning_rate": 9.625970511098566e-06, "loss": 0.4887, "step": 1525 }, { "epoch": 1.058252427184466, "grad_norm": 0.35632388953029986, "learning_rate": 9.625051324900645e-06, "loss": 0.5043, "step": 1526 }, { "epoch": 1.0589459084604715, "grad_norm": 0.5440807322096927, "learning_rate": 9.624131054624189e-06, "loss": 0.4934, "step": 1527 }, { "epoch": 1.059639389736477, "grad_norm": 0.3308052835280193, "learning_rate": 9.623209700484903e-06, "loss": 0.4871, "step": 1528 }, { "epoch": 1.0603328710124826, "grad_norm": 0.3267267804163821, "learning_rate": 9.622287262698748e-06, "loss": 0.5444, "step": 1529 }, { "epoch": 1.0610263522884882, "grad_norm": 0.33432560623564445, "learning_rate": 9.621363741481933e-06, "loss": 0.5253, "step": 1530 }, { "epoch": 1.0617198335644937, "grad_norm": 0.3712652577662211, "learning_rate": 9.620439137050927e-06, "loss": 0.5067, "step": 1531 }, { "epoch": 1.0624133148404993, "grad_norm": 0.3864313408235188, "learning_rate": 9.619513449622451e-06, "loss": 0.6148, "step": 1532 }, { "epoch": 1.0631067961165048, "grad_norm": 0.3513007549930898, "learning_rate": 9.618586679413477e-06, "loss": 0.5435, "step": 1533 }, { "epoch": 1.0638002773925104, "grad_norm": 0.30495454581270853, "learning_rate": 9.617658826641235e-06, "loss": 0.4324, "step": 1534 }, { "epoch": 1.064493758668516, "grad_norm": 0.35225997807973775, "learning_rate": 9.616729891523207e-06, "loss": 0.5452, "step": 1535 }, { "epoch": 1.0651872399445215, "grad_norm": 0.3384302270424511, "learning_rate": 9.61579987427713e-06, "loss": 0.4571, "step": 1536 }, { "epoch": 1.065880721220527, "grad_norm": 0.3644638546315168, "learning_rate": 9.61486877512099e-06, "loss": 0.5117, "step": 1537 }, { "epoch": 1.0665742024965326, "grad_norm": 0.37408064789556505, "learning_rate": 9.61393659427303e-06, "loss": 0.5233, "step": 1538 }, { "epoch": 1.0672676837725381, "grad_norm": 0.46162782166701477, "learning_rate": 9.613003331951749e-06, "loss": 0.5393, "step": 1539 }, { "epoch": 1.0679611650485437, "grad_norm": 0.35267804778946427, "learning_rate": 9.612068988375898e-06, "loss": 0.5304, "step": 1540 }, { "epoch": 1.0686546463245492, "grad_norm": 0.351513786905638, "learning_rate": 9.611133563764476e-06, "loss": 0.5513, "step": 1541 }, { "epoch": 1.0693481276005548, "grad_norm": 0.3718476035880555, "learning_rate": 9.610197058336743e-06, "loss": 0.4899, "step": 1542 }, { "epoch": 1.0700416088765603, "grad_norm": 0.3112657247404451, "learning_rate": 9.609259472312208e-06, "loss": 0.458, "step": 1543 }, { "epoch": 1.0707350901525658, "grad_norm": 0.3309923751917419, "learning_rate": 9.608320805910633e-06, "loss": 0.5381, "step": 1544 }, { "epoch": 1.0714285714285714, "grad_norm": 0.3245404525893565, "learning_rate": 9.60738105935204e-06, "loss": 0.5078, "step": 1545 }, { "epoch": 1.072122052704577, "grad_norm": 0.31387432841180757, "learning_rate": 9.60644023285669e-06, "loss": 0.4501, "step": 1546 }, { "epoch": 1.0728155339805825, "grad_norm": 0.3540152884221349, "learning_rate": 9.605498326645115e-06, "loss": 0.5162, "step": 1547 }, { "epoch": 1.073509015256588, "grad_norm": 0.331812147139016, "learning_rate": 9.604555340938084e-06, "loss": 0.4702, "step": 1548 }, { "epoch": 1.0742024965325936, "grad_norm": 0.31369777450737346, "learning_rate": 9.603611275956632e-06, "loss": 0.4658, "step": 1549 }, { "epoch": 1.0748959778085991, "grad_norm": 0.3772426365384512, "learning_rate": 9.602666131922036e-06, "loss": 0.5577, "step": 1550 }, { "epoch": 1.0755894590846047, "grad_norm": 0.36915143235135567, "learning_rate": 9.60171990905583e-06, "loss": 0.4429, "step": 1551 }, { "epoch": 1.0762829403606102, "grad_norm": 0.3658905222328625, "learning_rate": 9.60077260757981e-06, "loss": 0.5529, "step": 1552 }, { "epoch": 1.0769764216366158, "grad_norm": 0.3597757970231172, "learning_rate": 9.599824227716007e-06, "loss": 0.5574, "step": 1553 }, { "epoch": 1.0776699029126213, "grad_norm": 0.3406320223217798, "learning_rate": 9.598874769686721e-06, "loss": 0.4445, "step": 1554 }, { "epoch": 1.0783633841886269, "grad_norm": 0.3176393628455282, "learning_rate": 9.597924233714494e-06, "loss": 0.4514, "step": 1555 }, { "epoch": 1.0790568654646324, "grad_norm": 0.35286977008213233, "learning_rate": 9.596972620022127e-06, "loss": 0.5389, "step": 1556 }, { "epoch": 1.079750346740638, "grad_norm": 0.36820121109917364, "learning_rate": 9.59601992883267e-06, "loss": 0.5297, "step": 1557 }, { "epoch": 1.0804438280166435, "grad_norm": 0.43598247777508037, "learning_rate": 9.595066160369428e-06, "loss": 0.5182, "step": 1558 }, { "epoch": 1.081137309292649, "grad_norm": 0.3169545052914446, "learning_rate": 9.594111314855957e-06, "loss": 0.4946, "step": 1559 }, { "epoch": 1.0818307905686546, "grad_norm": 0.35954800326925107, "learning_rate": 9.593155392516066e-06, "loss": 0.5161, "step": 1560 }, { "epoch": 1.0825242718446602, "grad_norm": 0.35787675211563713, "learning_rate": 9.592198393573816e-06, "loss": 0.5184, "step": 1561 }, { "epoch": 1.0832177531206657, "grad_norm": 0.31632027854129957, "learning_rate": 9.591240318253521e-06, "loss": 0.4736, "step": 1562 }, { "epoch": 1.0839112343966713, "grad_norm": 2.9569303138652336, "learning_rate": 9.590281166779747e-06, "loss": 0.4809, "step": 1563 }, { "epoch": 1.0846047156726768, "grad_norm": 0.3245619107218756, "learning_rate": 9.58932093937731e-06, "loss": 0.4868, "step": 1564 }, { "epoch": 1.0852981969486823, "grad_norm": 0.3558834445926099, "learning_rate": 9.588359636271284e-06, "loss": 0.5747, "step": 1565 }, { "epoch": 1.085991678224688, "grad_norm": 0.3732340685361256, "learning_rate": 9.587397257686992e-06, "loss": 0.563, "step": 1566 }, { "epoch": 1.0866851595006934, "grad_norm": 0.3444506803272316, "learning_rate": 9.586433803850002e-06, "loss": 0.527, "step": 1567 }, { "epoch": 1.087378640776699, "grad_norm": 0.34876944378169805, "learning_rate": 9.585469274986148e-06, "loss": 0.4909, "step": 1568 }, { "epoch": 1.0880721220527045, "grad_norm": 0.34385821310745096, "learning_rate": 9.584503671321503e-06, "loss": 0.5332, "step": 1569 }, { "epoch": 1.08876560332871, "grad_norm": 0.3237590691468841, "learning_rate": 9.583536993082402e-06, "loss": 0.5115, "step": 1570 }, { "epoch": 1.0894590846047156, "grad_norm": 0.3633712964933024, "learning_rate": 9.582569240495426e-06, "loss": 0.4967, "step": 1571 }, { "epoch": 1.0901525658807212, "grad_norm": 0.3604223774031506, "learning_rate": 9.581600413787406e-06, "loss": 0.6134, "step": 1572 }, { "epoch": 1.0908460471567267, "grad_norm": 0.3988966746488306, "learning_rate": 9.580630513185431e-06, "loss": 0.5194, "step": 1573 }, { "epoch": 1.0915395284327323, "grad_norm": 0.3885768601621719, "learning_rate": 9.579659538916839e-06, "loss": 0.5342, "step": 1574 }, { "epoch": 1.0922330097087378, "grad_norm": 0.33852293558354857, "learning_rate": 9.578687491209219e-06, "loss": 0.525, "step": 1575 }, { "epoch": 1.0929264909847434, "grad_norm": 0.33923252441294693, "learning_rate": 9.57771437029041e-06, "loss": 0.4983, "step": 1576 }, { "epoch": 1.093619972260749, "grad_norm": 0.3122927651682097, "learning_rate": 9.576740176388508e-06, "loss": 0.462, "step": 1577 }, { "epoch": 1.0943134535367545, "grad_norm": 0.3529504391622095, "learning_rate": 9.575764909731853e-06, "loss": 0.4737, "step": 1578 }, { "epoch": 1.09500693481276, "grad_norm": 0.3375825793054644, "learning_rate": 9.574788570549043e-06, "loss": 0.5378, "step": 1579 }, { "epoch": 1.0957004160887656, "grad_norm": 0.3600900993969908, "learning_rate": 9.573811159068925e-06, "loss": 0.5084, "step": 1580 }, { "epoch": 1.096393897364771, "grad_norm": 0.3412614032725487, "learning_rate": 9.572832675520595e-06, "loss": 0.5036, "step": 1581 }, { "epoch": 1.0970873786407767, "grad_norm": 0.3302963055916388, "learning_rate": 9.571853120133406e-06, "loss": 0.5398, "step": 1582 }, { "epoch": 1.0977808599167822, "grad_norm": 0.4466510484420232, "learning_rate": 9.570872493136954e-06, "loss": 0.4834, "step": 1583 }, { "epoch": 1.0984743411927878, "grad_norm": 0.47091943476306125, "learning_rate": 9.569890794761095e-06, "loss": 0.5083, "step": 1584 }, { "epoch": 1.0991678224687933, "grad_norm": 0.33670908274267125, "learning_rate": 9.56890802523593e-06, "loss": 0.5115, "step": 1585 }, { "epoch": 1.0998613037447988, "grad_norm": 0.3706401346849016, "learning_rate": 9.567924184791814e-06, "loss": 0.5599, "step": 1586 }, { "epoch": 1.1005547850208044, "grad_norm": 0.35786892708418927, "learning_rate": 9.56693927365935e-06, "loss": 0.5445, "step": 1587 }, { "epoch": 1.10124826629681, "grad_norm": 0.32991228579665166, "learning_rate": 9.565953292069397e-06, "loss": 0.5125, "step": 1588 }, { "epoch": 1.1019417475728155, "grad_norm": 0.38052894985225355, "learning_rate": 9.564966240253062e-06, "loss": 0.5141, "step": 1589 }, { "epoch": 1.102635228848821, "grad_norm": 0.32925034948767845, "learning_rate": 9.5639781184417e-06, "loss": 0.4939, "step": 1590 }, { "epoch": 1.1033287101248266, "grad_norm": 0.3578660324964388, "learning_rate": 9.56298892686692e-06, "loss": 0.5301, "step": 1591 }, { "epoch": 1.1040221914008321, "grad_norm": 0.3361576902296305, "learning_rate": 9.561998665760582e-06, "loss": 0.507, "step": 1592 }, { "epoch": 1.1047156726768377, "grad_norm": 0.338374185320022, "learning_rate": 9.561007335354797e-06, "loss": 0.5011, "step": 1593 }, { "epoch": 1.1054091539528432, "grad_norm": 0.4512941687336036, "learning_rate": 9.560014935881924e-06, "loss": 0.5435, "step": 1594 }, { "epoch": 1.1061026352288488, "grad_norm": 0.36437506730679203, "learning_rate": 9.559021467574576e-06, "loss": 0.6453, "step": 1595 }, { "epoch": 1.1067961165048543, "grad_norm": 0.3880107069803568, "learning_rate": 9.558026930665614e-06, "loss": 0.5415, "step": 1596 }, { "epoch": 1.1074895977808599, "grad_norm": 0.33285292398220034, "learning_rate": 9.55703132538815e-06, "loss": 0.5075, "step": 1597 }, { "epoch": 1.1081830790568654, "grad_norm": 0.36076949458117785, "learning_rate": 9.556034651975548e-06, "loss": 0.4632, "step": 1598 }, { "epoch": 1.108876560332871, "grad_norm": 0.32784402025878107, "learning_rate": 9.55503691066142e-06, "loss": 0.4756, "step": 1599 }, { "epoch": 1.1095700416088765, "grad_norm": 0.5641648416298276, "learning_rate": 9.554038101679628e-06, "loss": 0.5176, "step": 1600 }, { "epoch": 1.110263522884882, "grad_norm": 0.3626695197552836, "learning_rate": 9.553038225264288e-06, "loss": 0.5065, "step": 1601 }, { "epoch": 1.1109570041608876, "grad_norm": 0.34254840267303466, "learning_rate": 9.552037281649762e-06, "loss": 0.4998, "step": 1602 }, { "epoch": 1.1116504854368932, "grad_norm": 0.3717304989283044, "learning_rate": 9.551035271070665e-06, "loss": 0.5393, "step": 1603 }, { "epoch": 1.1123439667128987, "grad_norm": 0.34927161083359987, "learning_rate": 9.55003219376186e-06, "loss": 0.5168, "step": 1604 }, { "epoch": 1.1130374479889042, "grad_norm": 0.34127118205935114, "learning_rate": 9.549028049958462e-06, "loss": 0.5584, "step": 1605 }, { "epoch": 1.1137309292649098, "grad_norm": 0.4156028666764858, "learning_rate": 9.548022839895833e-06, "loss": 0.5404, "step": 1606 }, { "epoch": 1.1144244105409153, "grad_norm": 0.340186951918076, "learning_rate": 9.547016563809591e-06, "loss": 0.4658, "step": 1607 }, { "epoch": 1.115117891816921, "grad_norm": 0.40724070020490183, "learning_rate": 9.546009221935598e-06, "loss": 0.5181, "step": 1608 }, { "epoch": 1.1158113730929264, "grad_norm": 0.35791742169658786, "learning_rate": 9.545000814509965e-06, "loss": 0.5236, "step": 1609 }, { "epoch": 1.116504854368932, "grad_norm": 0.3301410764367193, "learning_rate": 9.543991341769057e-06, "loss": 0.5279, "step": 1610 }, { "epoch": 1.1171983356449375, "grad_norm": 0.3421366405858752, "learning_rate": 9.542980803949489e-06, "loss": 0.4561, "step": 1611 }, { "epoch": 1.117891816920943, "grad_norm": 0.33058199290264134, "learning_rate": 9.541969201288123e-06, "loss": 0.4606, "step": 1612 }, { "epoch": 1.1185852981969486, "grad_norm": 0.3514146351328263, "learning_rate": 9.54095653402207e-06, "loss": 0.5187, "step": 1613 }, { "epoch": 1.1192787794729542, "grad_norm": 0.35195486911332874, "learning_rate": 9.539942802388693e-06, "loss": 0.4513, "step": 1614 }, { "epoch": 1.1199722607489597, "grad_norm": 0.32031468396585744, "learning_rate": 9.538928006625603e-06, "loss": 0.4878, "step": 1615 }, { "epoch": 1.1206657420249653, "grad_norm": 0.4437305444298725, "learning_rate": 9.53791214697066e-06, "loss": 0.5536, "step": 1616 }, { "epoch": 1.1213592233009708, "grad_norm": 0.36914826941196477, "learning_rate": 9.536895223661975e-06, "loss": 0.5301, "step": 1617 }, { "epoch": 1.1220527045769764, "grad_norm": 0.3650776851106986, "learning_rate": 9.535877236937907e-06, "loss": 0.4827, "step": 1618 }, { "epoch": 1.122746185852982, "grad_norm": 0.31879212019918035, "learning_rate": 9.534858187037066e-06, "loss": 0.4679, "step": 1619 }, { "epoch": 1.1234396671289875, "grad_norm": 0.3752832099133058, "learning_rate": 9.533838074198306e-06, "loss": 0.5697, "step": 1620 }, { "epoch": 1.124133148404993, "grad_norm": 0.35436660672541487, "learning_rate": 9.532816898660742e-06, "loss": 0.455, "step": 1621 }, { "epoch": 1.1248266296809986, "grad_norm": 0.3281861166023363, "learning_rate": 9.531794660663723e-06, "loss": 0.4671, "step": 1622 }, { "epoch": 1.125520110957004, "grad_norm": 0.3450769304183243, "learning_rate": 9.530771360446855e-06, "loss": 0.4539, "step": 1623 }, { "epoch": 1.1262135922330097, "grad_norm": 0.3696520913185721, "learning_rate": 9.529746998249994e-06, "loss": 0.5584, "step": 1624 }, { "epoch": 1.1269070735090152, "grad_norm": 0.3545638529670006, "learning_rate": 9.528721574313243e-06, "loss": 0.4959, "step": 1625 }, { "epoch": 1.1276005547850207, "grad_norm": 0.5869636382503858, "learning_rate": 9.527695088876953e-06, "loss": 0.5272, "step": 1626 }, { "epoch": 1.1282940360610263, "grad_norm": 0.3103827400875764, "learning_rate": 9.526667542181727e-06, "loss": 0.4534, "step": 1627 }, { "epoch": 1.1289875173370318, "grad_norm": 0.3311933927914896, "learning_rate": 9.52563893446841e-06, "loss": 0.435, "step": 1628 }, { "epoch": 1.1296809986130374, "grad_norm": 0.3685916125398502, "learning_rate": 9.524609265978105e-06, "loss": 0.5774, "step": 1629 }, { "epoch": 1.130374479889043, "grad_norm": 0.34351945573431236, "learning_rate": 9.523578536952155e-06, "loss": 0.5311, "step": 1630 }, { "epoch": 1.1310679611650485, "grad_norm": 0.3230604901871847, "learning_rate": 9.52254674763216e-06, "loss": 0.5177, "step": 1631 }, { "epoch": 1.131761442441054, "grad_norm": 0.3581666442948397, "learning_rate": 9.521513898259959e-06, "loss": 0.5009, "step": 1632 }, { "epoch": 1.1324549237170596, "grad_norm": 0.4025996584654056, "learning_rate": 9.520479989077647e-06, "loss": 0.547, "step": 1633 }, { "epoch": 1.1331484049930651, "grad_norm": 0.34798550205148976, "learning_rate": 9.519445020327566e-06, "loss": 0.5082, "step": 1634 }, { "epoch": 1.1338418862690707, "grad_norm": 0.35477982279694437, "learning_rate": 9.518408992252301e-06, "loss": 0.4967, "step": 1635 }, { "epoch": 1.1345353675450762, "grad_norm": 0.3357066407497684, "learning_rate": 9.517371905094695e-06, "loss": 0.53, "step": 1636 }, { "epoch": 1.1352288488210818, "grad_norm": 0.5012240083436882, "learning_rate": 9.516333759097828e-06, "loss": 0.5036, "step": 1637 }, { "epoch": 1.1359223300970873, "grad_norm": 0.3411087744256892, "learning_rate": 9.515294554505039e-06, "loss": 0.5052, "step": 1638 }, { "epoch": 1.1366158113730929, "grad_norm": 0.38406127824500824, "learning_rate": 9.514254291559905e-06, "loss": 0.4952, "step": 1639 }, { "epoch": 1.1373092926490984, "grad_norm": 0.3529097042180847, "learning_rate": 9.513212970506261e-06, "loss": 0.5331, "step": 1640 }, { "epoch": 1.138002773925104, "grad_norm": 0.3873077740797989, "learning_rate": 9.512170591588183e-06, "loss": 0.5676, "step": 1641 }, { "epoch": 1.1386962552011095, "grad_norm": 0.3554460301360124, "learning_rate": 9.511127155049996e-06, "loss": 0.4469, "step": 1642 }, { "epoch": 1.139389736477115, "grad_norm": 0.3465874039356611, "learning_rate": 9.510082661136274e-06, "loss": 0.4969, "step": 1643 }, { "epoch": 1.1400832177531206, "grad_norm": 0.3495298863506024, "learning_rate": 9.509037110091843e-06, "loss": 0.5312, "step": 1644 }, { "epoch": 1.1407766990291262, "grad_norm": 0.39680003106953243, "learning_rate": 9.507990502161769e-06, "loss": 0.4881, "step": 1645 }, { "epoch": 1.1414701803051317, "grad_norm": 0.3135739349410391, "learning_rate": 9.506942837591366e-06, "loss": 0.5057, "step": 1646 }, { "epoch": 1.1421636615811372, "grad_norm": 0.33564410298737213, "learning_rate": 9.505894116626205e-06, "loss": 0.4728, "step": 1647 }, { "epoch": 1.1428571428571428, "grad_norm": 0.3249097679538702, "learning_rate": 9.504844339512096e-06, "loss": 0.4617, "step": 1648 }, { "epoch": 1.1435506241331483, "grad_norm": 0.3559096317268659, "learning_rate": 9.5037935064951e-06, "loss": 0.5854, "step": 1649 }, { "epoch": 1.1442441054091539, "grad_norm": 0.3414419544457502, "learning_rate": 9.502741617821524e-06, "loss": 0.4893, "step": 1650 }, { "epoch": 1.1449375866851594, "grad_norm": 0.3430276905742752, "learning_rate": 9.501688673737924e-06, "loss": 0.5249, "step": 1651 }, { "epoch": 1.145631067961165, "grad_norm": 0.39074907334250514, "learning_rate": 9.500634674491099e-06, "loss": 0.483, "step": 1652 }, { "epoch": 1.1463245492371705, "grad_norm": 0.3263341157308737, "learning_rate": 9.499579620328103e-06, "loss": 0.4886, "step": 1653 }, { "epoch": 1.147018030513176, "grad_norm": 0.40195093908502905, "learning_rate": 9.498523511496231e-06, "loss": 0.5827, "step": 1654 }, { "epoch": 1.1477115117891816, "grad_norm": 0.32395271958038363, "learning_rate": 9.497466348243028e-06, "loss": 0.4689, "step": 1655 }, { "epoch": 1.1484049930651872, "grad_norm": 0.3605680761959218, "learning_rate": 9.496408130816286e-06, "loss": 0.5025, "step": 1656 }, { "epoch": 1.1490984743411927, "grad_norm": 0.4199044813112826, "learning_rate": 9.495348859464042e-06, "loss": 0.4717, "step": 1657 }, { "epoch": 1.1497919556171983, "grad_norm": 0.344715746484264, "learning_rate": 9.494288534434581e-06, "loss": 0.4954, "step": 1658 }, { "epoch": 1.1504854368932038, "grad_norm": 0.3692774006241896, "learning_rate": 9.49322715597644e-06, "loss": 0.5178, "step": 1659 }, { "epoch": 1.1511789181692094, "grad_norm": 0.5416229107335282, "learning_rate": 9.49216472433839e-06, "loss": 0.486, "step": 1660 }, { "epoch": 1.151872399445215, "grad_norm": 0.3312354320124688, "learning_rate": 9.491101239769466e-06, "loss": 0.5321, "step": 1661 }, { "epoch": 1.1525658807212205, "grad_norm": 0.3311978668364831, "learning_rate": 9.490036702518937e-06, "loss": 0.5422, "step": 1662 }, { "epoch": 1.153259361997226, "grad_norm": 0.37499695498774954, "learning_rate": 9.488971112836324e-06, "loss": 0.5423, "step": 1663 }, { "epoch": 1.1539528432732316, "grad_norm": 0.31228912202218406, "learning_rate": 9.487904470971392e-06, "loss": 0.504, "step": 1664 }, { "epoch": 1.154646324549237, "grad_norm": 0.3276693072440868, "learning_rate": 9.486836777174154e-06, "loss": 0.5037, "step": 1665 }, { "epoch": 1.1553398058252426, "grad_norm": 0.5790942315327522, "learning_rate": 9.485768031694872e-06, "loss": 0.4983, "step": 1666 }, { "epoch": 1.1560332871012482, "grad_norm": 0.434311744774958, "learning_rate": 9.48469823478405e-06, "loss": 0.5426, "step": 1667 }, { "epoch": 1.1567267683772537, "grad_norm": 0.3331090918950169, "learning_rate": 9.483627386692442e-06, "loss": 0.4754, "step": 1668 }, { "epoch": 1.1574202496532593, "grad_norm": 0.3624095610356531, "learning_rate": 9.482555487671045e-06, "loss": 0.4899, "step": 1669 }, { "epoch": 1.1581137309292648, "grad_norm": 0.34964383902752577, "learning_rate": 9.481482537971109e-06, "loss": 0.5147, "step": 1670 }, { "epoch": 1.1588072122052704, "grad_norm": 0.3574242454702927, "learning_rate": 9.48040853784412e-06, "loss": 0.5559, "step": 1671 }, { "epoch": 1.159500693481276, "grad_norm": 0.34075858644858553, "learning_rate": 9.47933348754182e-06, "loss": 0.5179, "step": 1672 }, { "epoch": 1.1601941747572815, "grad_norm": 0.36892774082728613, "learning_rate": 9.478257387316189e-06, "loss": 0.5387, "step": 1673 }, { "epoch": 1.160887656033287, "grad_norm": 0.36299866215146465, "learning_rate": 9.47718023741946e-06, "loss": 0.5024, "step": 1674 }, { "epoch": 1.1615811373092926, "grad_norm": 0.3401760609141647, "learning_rate": 9.476102038104112e-06, "loss": 0.4958, "step": 1675 }, { "epoch": 1.1622746185852981, "grad_norm": 0.3821106522236179, "learning_rate": 9.47502278962286e-06, "loss": 0.4428, "step": 1676 }, { "epoch": 1.1629680998613037, "grad_norm": 0.32656565172819785, "learning_rate": 9.473942492228676e-06, "loss": 0.4571, "step": 1677 }, { "epoch": 1.1636615811373092, "grad_norm": 0.38063745220229667, "learning_rate": 9.472861146174777e-06, "loss": 0.533, "step": 1678 }, { "epoch": 1.1643550624133148, "grad_norm": 0.42193005599944505, "learning_rate": 9.471778751714615e-06, "loss": 0.5249, "step": 1679 }, { "epoch": 1.1650485436893203, "grad_norm": 0.3804509011733188, "learning_rate": 9.470695309101903e-06, "loss": 0.5171, "step": 1680 }, { "epoch": 1.1657420249653259, "grad_norm": 0.3303163476886524, "learning_rate": 9.469610818590586e-06, "loss": 0.4974, "step": 1681 }, { "epoch": 1.1664355062413314, "grad_norm": 0.427519321515256, "learning_rate": 9.468525280434866e-06, "loss": 0.5826, "step": 1682 }, { "epoch": 1.167128987517337, "grad_norm": 0.3345103364148085, "learning_rate": 9.467438694889181e-06, "loss": 0.5235, "step": 1683 }, { "epoch": 1.1678224687933425, "grad_norm": 0.360892832272255, "learning_rate": 9.466351062208223e-06, "loss": 0.5344, "step": 1684 }, { "epoch": 1.168515950069348, "grad_norm": 0.3281921420110916, "learning_rate": 9.465262382646922e-06, "loss": 0.5117, "step": 1685 }, { "epoch": 1.1692094313453536, "grad_norm": 0.3353355636612057, "learning_rate": 9.464172656460456e-06, "loss": 0.5201, "step": 1686 }, { "epoch": 1.1699029126213591, "grad_norm": 0.3384514348258698, "learning_rate": 9.463081883904251e-06, "loss": 0.5106, "step": 1687 }, { "epoch": 1.1705963938973647, "grad_norm": 0.31357891074588756, "learning_rate": 9.461990065233978e-06, "loss": 0.5352, "step": 1688 }, { "epoch": 1.1712898751733702, "grad_norm": 0.3350076853198936, "learning_rate": 9.460897200705546e-06, "loss": 0.4853, "step": 1689 }, { "epoch": 1.1719833564493758, "grad_norm": 0.4654249770831004, "learning_rate": 9.459803290575119e-06, "loss": 0.5408, "step": 1690 }, { "epoch": 1.1726768377253813, "grad_norm": 0.33374760497131273, "learning_rate": 9.458708335099099e-06, "loss": 0.4823, "step": 1691 }, { "epoch": 1.1733703190013869, "grad_norm": 0.33964898430279455, "learning_rate": 9.457612334534136e-06, "loss": 0.5307, "step": 1692 }, { "epoch": 1.1740638002773924, "grad_norm": 0.34566246311150367, "learning_rate": 9.456515289137127e-06, "loss": 0.4628, "step": 1693 }, { "epoch": 1.174757281553398, "grad_norm": 0.3586618633106987, "learning_rate": 9.455417199165209e-06, "loss": 0.479, "step": 1694 }, { "epoch": 1.1754507628294035, "grad_norm": 0.3593559685406414, "learning_rate": 9.454318064875767e-06, "loss": 0.474, "step": 1695 }, { "epoch": 1.176144244105409, "grad_norm": 0.37699022032545726, "learning_rate": 9.45321788652643e-06, "loss": 0.5317, "step": 1696 }, { "epoch": 1.1768377253814146, "grad_norm": 0.3583201560438081, "learning_rate": 9.452116664375072e-06, "loss": 0.4868, "step": 1697 }, { "epoch": 1.1775312066574202, "grad_norm": 0.3683286584518682, "learning_rate": 9.451014398679814e-06, "loss": 0.4933, "step": 1698 }, { "epoch": 1.1782246879334257, "grad_norm": 0.37985472360003564, "learning_rate": 9.449911089699015e-06, "loss": 0.4535, "step": 1699 }, { "epoch": 1.1789181692094313, "grad_norm": 0.3534260584391775, "learning_rate": 9.448806737691285e-06, "loss": 0.5715, "step": 1700 }, { "epoch": 1.1796116504854368, "grad_norm": 0.34369316549359924, "learning_rate": 9.447701342915473e-06, "loss": 0.4808, "step": 1701 }, { "epoch": 1.1803051317614424, "grad_norm": 0.3341010220537988, "learning_rate": 9.446594905630682e-06, "loss": 0.4959, "step": 1702 }, { "epoch": 1.180998613037448, "grad_norm": 0.3574491631980831, "learning_rate": 9.445487426096247e-06, "loss": 0.4908, "step": 1703 }, { "epoch": 1.1816920943134535, "grad_norm": 0.336352118318098, "learning_rate": 9.444378904571753e-06, "loss": 0.5053, "step": 1704 }, { "epoch": 1.182385575589459, "grad_norm": 0.32712792361743737, "learning_rate": 9.443269341317034e-06, "loss": 0.474, "step": 1705 }, { "epoch": 1.1830790568654646, "grad_norm": 0.38552992785239587, "learning_rate": 9.442158736592163e-06, "loss": 0.4843, "step": 1706 }, { "epoch": 1.18377253814147, "grad_norm": 0.37955703265610796, "learning_rate": 9.441047090657452e-06, "loss": 0.545, "step": 1707 }, { "epoch": 1.1844660194174756, "grad_norm": 0.37868135130282043, "learning_rate": 9.439934403773468e-06, "loss": 0.4657, "step": 1708 }, { "epoch": 1.1851595006934812, "grad_norm": 0.359046967807064, "learning_rate": 9.438820676201013e-06, "loss": 0.4929, "step": 1709 }, { "epoch": 1.1858529819694867, "grad_norm": 0.3619491084136992, "learning_rate": 9.437705908201142e-06, "loss": 0.4691, "step": 1710 }, { "epoch": 1.1865464632454923, "grad_norm": 0.34760598844116564, "learning_rate": 9.436590100035145e-06, "loss": 0.5365, "step": 1711 }, { "epoch": 1.1872399445214978, "grad_norm": 0.34970273680841074, "learning_rate": 9.435473251964559e-06, "loss": 0.5012, "step": 1712 }, { "epoch": 1.1879334257975034, "grad_norm": 0.34684043383821284, "learning_rate": 9.434355364251167e-06, "loss": 0.5256, "step": 1713 }, { "epoch": 1.188626907073509, "grad_norm": 0.3524132031035981, "learning_rate": 9.43323643715699e-06, "loss": 0.553, "step": 1714 }, { "epoch": 1.1893203883495145, "grad_norm": 0.3307702534918243, "learning_rate": 9.4321164709443e-06, "loss": 0.516, "step": 1715 }, { "epoch": 1.19001386962552, "grad_norm": 0.36002029927529566, "learning_rate": 9.43099546587561e-06, "loss": 0.5012, "step": 1716 }, { "epoch": 1.1907073509015256, "grad_norm": 0.331128273417454, "learning_rate": 9.429873422213673e-06, "loss": 0.4807, "step": 1717 }, { "epoch": 1.1914008321775311, "grad_norm": 0.3899947371381505, "learning_rate": 9.428750340221488e-06, "loss": 0.5244, "step": 1718 }, { "epoch": 1.1920943134535367, "grad_norm": 0.36816407199272744, "learning_rate": 9.427626220162298e-06, "loss": 0.5394, "step": 1719 }, { "epoch": 1.1927877947295422, "grad_norm": 0.32023218435108053, "learning_rate": 9.42650106229959e-06, "loss": 0.4513, "step": 1720 }, { "epoch": 1.1934812760055478, "grad_norm": 0.3773033890248266, "learning_rate": 9.425374866897088e-06, "loss": 0.4799, "step": 1721 }, { "epoch": 1.1941747572815533, "grad_norm": 0.3288651593328371, "learning_rate": 9.42424763421877e-06, "loss": 0.5217, "step": 1722 }, { "epoch": 1.1948682385575589, "grad_norm": 0.36004861260648363, "learning_rate": 9.423119364528848e-06, "loss": 0.5179, "step": 1723 }, { "epoch": 1.1955617198335644, "grad_norm": 0.3488622875642143, "learning_rate": 9.42199005809178e-06, "loss": 0.5417, "step": 1724 }, { "epoch": 1.19625520110957, "grad_norm": 0.38678204590663706, "learning_rate": 9.420859715172267e-06, "loss": 0.5219, "step": 1725 }, { "epoch": 1.1969486823855755, "grad_norm": 0.4030026622612167, "learning_rate": 9.419728336035254e-06, "loss": 0.5526, "step": 1726 }, { "epoch": 1.197642163661581, "grad_norm": 0.3890856940765283, "learning_rate": 9.41859592094593e-06, "loss": 0.4808, "step": 1727 }, { "epoch": 1.1983356449375866, "grad_norm": 0.3407430238459701, "learning_rate": 9.417462470169722e-06, "loss": 0.5091, "step": 1728 }, { "epoch": 1.1990291262135921, "grad_norm": 0.33439585047935827, "learning_rate": 9.416327983972304e-06, "loss": 0.5866, "step": 1729 }, { "epoch": 1.1997226074895977, "grad_norm": 0.3594607665557123, "learning_rate": 9.415192462619591e-06, "loss": 0.5438, "step": 1730 }, { "epoch": 1.2004160887656032, "grad_norm": 0.34124882578538185, "learning_rate": 9.414055906377743e-06, "loss": 0.5278, "step": 1731 }, { "epoch": 1.2011095700416088, "grad_norm": 0.31154514796541144, "learning_rate": 9.412918315513156e-06, "loss": 0.4986, "step": 1732 }, { "epoch": 1.2018030513176143, "grad_norm": 0.5691552778263413, "learning_rate": 9.411779690292478e-06, "loss": 0.5235, "step": 1733 }, { "epoch": 1.2024965325936199, "grad_norm": 0.33272400114356926, "learning_rate": 9.41064003098259e-06, "loss": 0.4806, "step": 1734 }, { "epoch": 1.2031900138696254, "grad_norm": 0.31230477933126916, "learning_rate": 9.409499337850623e-06, "loss": 0.5402, "step": 1735 }, { "epoch": 1.203883495145631, "grad_norm": 0.3839760199541642, "learning_rate": 9.408357611163945e-06, "loss": 0.4798, "step": 1736 }, { "epoch": 1.2045769764216365, "grad_norm": 0.3644018935729482, "learning_rate": 9.407214851190172e-06, "loss": 0.5468, "step": 1737 }, { "epoch": 1.205270457697642, "grad_norm": 0.34112571531473196, "learning_rate": 9.406071058197154e-06, "loss": 0.506, "step": 1738 }, { "epoch": 1.2059639389736476, "grad_norm": 0.32972775762348827, "learning_rate": 9.404926232452993e-06, "loss": 0.5189, "step": 1739 }, { "epoch": 1.2066574202496532, "grad_norm": 0.3736726803102763, "learning_rate": 9.403780374226024e-06, "loss": 0.4819, "step": 1740 }, { "epoch": 1.2073509015256587, "grad_norm": 0.34460422716649386, "learning_rate": 9.402633483784829e-06, "loss": 0.4662, "step": 1741 }, { "epoch": 1.2080443828016643, "grad_norm": 0.35343428807366783, "learning_rate": 9.40148556139823e-06, "loss": 0.5337, "step": 1742 }, { "epoch": 1.2087378640776698, "grad_norm": 0.3310744447621281, "learning_rate": 9.400336607335294e-06, "loss": 0.5169, "step": 1743 }, { "epoch": 1.2094313453536754, "grad_norm": 1.0993749520175058, "learning_rate": 9.399186621865323e-06, "loss": 0.4987, "step": 1744 }, { "epoch": 1.210124826629681, "grad_norm": 0.3787079557675599, "learning_rate": 9.398035605257871e-06, "loss": 0.5271, "step": 1745 }, { "epoch": 1.2108183079056865, "grad_norm": 0.3283299973065705, "learning_rate": 9.396883557782726e-06, "loss": 0.5133, "step": 1746 }, { "epoch": 1.211511789181692, "grad_norm": 0.30892901634222936, "learning_rate": 9.395730479709916e-06, "loss": 0.5178, "step": 1747 }, { "epoch": 1.2122052704576975, "grad_norm": 0.33384867713731503, "learning_rate": 9.394576371309719e-06, "loss": 0.4944, "step": 1748 }, { "epoch": 1.212898751733703, "grad_norm": 0.3184968769229458, "learning_rate": 9.393421232852647e-06, "loss": 0.5243, "step": 1749 }, { "epoch": 1.2135922330097086, "grad_norm": 0.32512841113260527, "learning_rate": 9.392265064609455e-06, "loss": 0.5185, "step": 1750 }, { "epoch": 1.2142857142857142, "grad_norm": 0.3780573473062979, "learning_rate": 9.391107866851143e-06, "loss": 0.518, "step": 1751 }, { "epoch": 1.2149791955617197, "grad_norm": 0.3764154859820463, "learning_rate": 9.38994963984895e-06, "loss": 0.5036, "step": 1752 }, { "epoch": 1.2156726768377253, "grad_norm": 0.3459017489085076, "learning_rate": 9.388790383874354e-06, "loss": 0.4779, "step": 1753 }, { "epoch": 1.2163661581137308, "grad_norm": 0.3733669780271188, "learning_rate": 9.387630099199078e-06, "loss": 0.5058, "step": 1754 }, { "epoch": 1.2170596393897364, "grad_norm": 0.344433307636627, "learning_rate": 9.386468786095083e-06, "loss": 0.521, "step": 1755 }, { "epoch": 1.217753120665742, "grad_norm": 0.3474449635046229, "learning_rate": 9.385306444834573e-06, "loss": 0.4371, "step": 1756 }, { "epoch": 1.2184466019417475, "grad_norm": 0.35089326845114144, "learning_rate": 9.384143075689992e-06, "loss": 0.4947, "step": 1757 }, { "epoch": 1.219140083217753, "grad_norm": 0.3732377905127223, "learning_rate": 9.382978678934025e-06, "loss": 0.5801, "step": 1758 }, { "epoch": 1.2198335644937588, "grad_norm": 0.3444938530511972, "learning_rate": 9.381813254839599e-06, "loss": 0.543, "step": 1759 }, { "epoch": 1.2205270457697641, "grad_norm": 0.3545084942767667, "learning_rate": 9.38064680367988e-06, "loss": 0.4803, "step": 1760 }, { "epoch": 1.2212205270457699, "grad_norm": 0.3496106779691193, "learning_rate": 9.379479325728278e-06, "loss": 0.5073, "step": 1761 }, { "epoch": 1.2219140083217752, "grad_norm": 0.3565004982279895, "learning_rate": 9.378310821258438e-06, "loss": 0.4367, "step": 1762 }, { "epoch": 1.222607489597781, "grad_norm": 0.380005631044401, "learning_rate": 9.377141290544252e-06, "loss": 0.5482, "step": 1763 }, { "epoch": 1.2233009708737863, "grad_norm": 0.32483113388649515, "learning_rate": 9.375970733859848e-06, "loss": 0.4772, "step": 1764 }, { "epoch": 1.223994452149792, "grad_norm": 0.34642682526857854, "learning_rate": 9.374799151479597e-06, "loss": 0.5607, "step": 1765 }, { "epoch": 1.2246879334257974, "grad_norm": 0.3761000577403412, "learning_rate": 9.373626543678106e-06, "loss": 0.4968, "step": 1766 }, { "epoch": 1.2253814147018032, "grad_norm": 0.38479697912876704, "learning_rate": 9.37245291073023e-06, "loss": 0.4979, "step": 1767 }, { "epoch": 1.2260748959778085, "grad_norm": 0.384078635865825, "learning_rate": 9.371278252911061e-06, "loss": 0.5143, "step": 1768 }, { "epoch": 1.2267683772538143, "grad_norm": 0.33940895010505023, "learning_rate": 9.370102570495925e-06, "loss": 0.4829, "step": 1769 }, { "epoch": 1.2274618585298196, "grad_norm": 0.3754837694351547, "learning_rate": 9.368925863760396e-06, "loss": 0.4952, "step": 1770 }, { "epoch": 1.2281553398058254, "grad_norm": 0.30986664311154366, "learning_rate": 9.367748132980286e-06, "loss": 0.4546, "step": 1771 }, { "epoch": 1.2288488210818307, "grad_norm": 0.36817457336350934, "learning_rate": 9.366569378431647e-06, "loss": 0.4881, "step": 1772 }, { "epoch": 1.2295423023578365, "grad_norm": 0.33131085104720265, "learning_rate": 9.36538960039077e-06, "loss": 0.4924, "step": 1773 }, { "epoch": 1.2302357836338418, "grad_norm": 0.4382446417776454, "learning_rate": 9.364208799134187e-06, "loss": 0.5041, "step": 1774 }, { "epoch": 1.2309292649098476, "grad_norm": 0.35821233800222924, "learning_rate": 9.363026974938667e-06, "loss": 0.5396, "step": 1775 }, { "epoch": 1.2316227461858529, "grad_norm": 0.5342569170911093, "learning_rate": 9.361844128081224e-06, "loss": 0.5203, "step": 1776 }, { "epoch": 1.2323162274618586, "grad_norm": 0.38002321598260624, "learning_rate": 9.360660258839105e-06, "loss": 0.525, "step": 1777 }, { "epoch": 1.233009708737864, "grad_norm": 0.7115304619708138, "learning_rate": 9.359475367489805e-06, "loss": 0.4508, "step": 1778 }, { "epoch": 1.2337031900138697, "grad_norm": 0.39373627900885494, "learning_rate": 9.35828945431105e-06, "loss": 0.5039, "step": 1779 }, { "epoch": 1.234396671289875, "grad_norm": 0.3542774756046947, "learning_rate": 9.357102519580814e-06, "loss": 0.4401, "step": 1780 }, { "epoch": 1.2350901525658808, "grad_norm": 0.32112702665284176, "learning_rate": 9.3559145635773e-06, "loss": 0.4374, "step": 1781 }, { "epoch": 1.2357836338418862, "grad_norm": 0.3585003989544278, "learning_rate": 9.354725586578961e-06, "loss": 0.5308, "step": 1782 }, { "epoch": 1.236477115117892, "grad_norm": 0.34996791544106826, "learning_rate": 9.353535588864481e-06, "loss": 0.534, "step": 1783 }, { "epoch": 1.2371705963938973, "grad_norm": 0.35276513540142673, "learning_rate": 9.35234457071279e-06, "loss": 0.4946, "step": 1784 }, { "epoch": 1.237864077669903, "grad_norm": 0.34501235341728115, "learning_rate": 9.351152532403054e-06, "loss": 0.4946, "step": 1785 }, { "epoch": 1.2385575589459084, "grad_norm": 0.6636162679288155, "learning_rate": 9.349959474214677e-06, "loss": 0.4955, "step": 1786 }, { "epoch": 1.2392510402219141, "grad_norm": 0.36482412876196674, "learning_rate": 9.348765396427301e-06, "loss": 0.4954, "step": 1787 }, { "epoch": 1.2399445214979194, "grad_norm": 0.380783285560688, "learning_rate": 9.347570299320811e-06, "loss": 0.5423, "step": 1788 }, { "epoch": 1.2406380027739252, "grad_norm": 0.4111420045033692, "learning_rate": 9.346374183175332e-06, "loss": 0.5494, "step": 1789 }, { "epoch": 1.2413314840499305, "grad_norm": 0.390779385886406, "learning_rate": 9.34517704827122e-06, "loss": 0.4949, "step": 1790 }, { "epoch": 1.2420249653259363, "grad_norm": 0.4119419521078131, "learning_rate": 9.34397889488908e-06, "loss": 0.4751, "step": 1791 }, { "epoch": 1.2427184466019416, "grad_norm": 0.36895537173839116, "learning_rate": 9.342779723309746e-06, "loss": 0.5126, "step": 1792 }, { "epoch": 1.2434119278779474, "grad_norm": 0.3800295326968954, "learning_rate": 9.341579533814295e-06, "loss": 0.4806, "step": 1793 }, { "epoch": 1.2441054091539527, "grad_norm": 0.38533573980298713, "learning_rate": 9.340378326684046e-06, "loss": 0.5453, "step": 1794 }, { "epoch": 1.2447988904299585, "grad_norm": 0.32525208351074025, "learning_rate": 9.339176102200552e-06, "loss": 0.5033, "step": 1795 }, { "epoch": 1.2454923717059638, "grad_norm": 0.35893685303904643, "learning_rate": 9.337972860645605e-06, "loss": 0.5081, "step": 1796 }, { "epoch": 1.2461858529819696, "grad_norm": 0.3756367763280424, "learning_rate": 9.336768602301237e-06, "loss": 0.5728, "step": 1797 }, { "epoch": 1.246879334257975, "grad_norm": 0.39848342030640704, "learning_rate": 9.335563327449717e-06, "loss": 0.5115, "step": 1798 }, { "epoch": 1.2475728155339807, "grad_norm": 0.3332538030512722, "learning_rate": 9.334357036373552e-06, "loss": 0.4812, "step": 1799 }, { "epoch": 1.248266296809986, "grad_norm": 0.4936305563062856, "learning_rate": 9.333149729355488e-06, "loss": 0.5534, "step": 1800 }, { "epoch": 1.2489597780859918, "grad_norm": 0.3464077648272386, "learning_rate": 9.33194140667851e-06, "loss": 0.4755, "step": 1801 }, { "epoch": 1.2496532593619971, "grad_norm": 0.34412226798281487, "learning_rate": 9.330732068625841e-06, "loss": 0.472, "step": 1802 }, { "epoch": 1.2503467406380029, "grad_norm": 0.40125874252663685, "learning_rate": 9.32952171548094e-06, "loss": 0.5316, "step": 1803 }, { "epoch": 1.2510402219140082, "grad_norm": 0.3747570794371812, "learning_rate": 9.328310347527502e-06, "loss": 0.6068, "step": 1804 }, { "epoch": 1.251733703190014, "grad_norm": 0.3519003462584428, "learning_rate": 9.32709796504947e-06, "loss": 0.5555, "step": 1805 }, { "epoch": 1.2524271844660193, "grad_norm": 0.3616136500041631, "learning_rate": 9.32588456833101e-06, "loss": 0.4587, "step": 1806 }, { "epoch": 1.253120665742025, "grad_norm": 0.36318051003589935, "learning_rate": 9.324670157656537e-06, "loss": 0.4811, "step": 1807 }, { "epoch": 1.2538141470180304, "grad_norm": 0.6895559289517076, "learning_rate": 9.323454733310699e-06, "loss": 0.4445, "step": 1808 }, { "epoch": 1.2545076282940362, "grad_norm": 0.48409487764298625, "learning_rate": 9.322238295578385e-06, "loss": 0.5179, "step": 1809 }, { "epoch": 1.2552011095700415, "grad_norm": 0.33808201346064753, "learning_rate": 9.321020844744717e-06, "loss": 0.4823, "step": 1810 }, { "epoch": 1.2558945908460473, "grad_norm": 0.3501876609955232, "learning_rate": 9.319802381095058e-06, "loss": 0.487, "step": 1811 }, { "epoch": 1.2565880721220526, "grad_norm": 0.39446790540112453, "learning_rate": 9.318582904915006e-06, "loss": 0.5297, "step": 1812 }, { "epoch": 1.2572815533980584, "grad_norm": 0.3612380146284427, "learning_rate": 9.317362416490396e-06, "loss": 0.493, "step": 1813 }, { "epoch": 1.2579750346740637, "grad_norm": 0.33438828335321563, "learning_rate": 9.316140916107305e-06, "loss": 0.4888, "step": 1814 }, { "epoch": 1.2586685159500695, "grad_norm": 0.37737050106701486, "learning_rate": 9.314918404052043e-06, "loss": 0.5449, "step": 1815 }, { "epoch": 1.2593619972260748, "grad_norm": 0.3148973693528381, "learning_rate": 9.313694880611157e-06, "loss": 0.4131, "step": 1816 }, { "epoch": 1.2600554785020806, "grad_norm": 0.3470073014150124, "learning_rate": 9.312470346071432e-06, "loss": 0.5104, "step": 1817 }, { "epoch": 1.2607489597780859, "grad_norm": 0.3371210211191612, "learning_rate": 9.31124480071989e-06, "loss": 0.4775, "step": 1818 }, { "epoch": 1.2614424410540916, "grad_norm": 0.41493715604100356, "learning_rate": 9.310018244843789e-06, "loss": 0.5228, "step": 1819 }, { "epoch": 1.262135922330097, "grad_norm": 0.36177471993141525, "learning_rate": 9.308790678730627e-06, "loss": 0.5413, "step": 1820 }, { "epoch": 1.2628294036061027, "grad_norm": 0.3653922251647071, "learning_rate": 9.307562102668135e-06, "loss": 0.4889, "step": 1821 }, { "epoch": 1.263522884882108, "grad_norm": 0.36061884106135783, "learning_rate": 9.306332516944286e-06, "loss": 0.5054, "step": 1822 }, { "epoch": 1.2642163661581138, "grad_norm": 0.33099520054862996, "learning_rate": 9.30510192184728e-06, "loss": 0.514, "step": 1823 }, { "epoch": 1.2649098474341192, "grad_norm": 0.35230487588580767, "learning_rate": 9.303870317665562e-06, "loss": 0.4735, "step": 1824 }, { "epoch": 1.265603328710125, "grad_norm": 0.34182709412480217, "learning_rate": 9.302637704687813e-06, "loss": 0.5315, "step": 1825 }, { "epoch": 1.2662968099861303, "grad_norm": 0.34561666292270243, "learning_rate": 9.301404083202947e-06, "loss": 0.5152, "step": 1826 }, { "epoch": 1.266990291262136, "grad_norm": 0.34694018822517697, "learning_rate": 9.300169453500117e-06, "loss": 0.5084, "step": 1827 }, { "epoch": 1.2676837725381414, "grad_norm": 0.3383285721000504, "learning_rate": 9.29893381586871e-06, "loss": 0.5365, "step": 1828 }, { "epoch": 1.2683772538141471, "grad_norm": 0.32798283121490285, "learning_rate": 9.29769717059835e-06, "loss": 0.507, "step": 1829 }, { "epoch": 1.2690707350901524, "grad_norm": 0.4242650703398752, "learning_rate": 9.296459517978898e-06, "loss": 0.5339, "step": 1830 }, { "epoch": 1.2697642163661582, "grad_norm": 0.36550918769480284, "learning_rate": 9.295220858300454e-06, "loss": 0.4783, "step": 1831 }, { "epoch": 1.2704576976421635, "grad_norm": 0.3393992967884969, "learning_rate": 9.293981191853345e-06, "loss": 0.5473, "step": 1832 }, { "epoch": 1.2711511789181693, "grad_norm": 0.36297350829327885, "learning_rate": 9.292740518928145e-06, "loss": 0.5456, "step": 1833 }, { "epoch": 1.2718446601941746, "grad_norm": 0.6438831175708963, "learning_rate": 9.291498839815658e-06, "loss": 0.5028, "step": 1834 }, { "epoch": 1.2725381414701804, "grad_norm": 0.34934010035959623, "learning_rate": 9.29025615480692e-06, "loss": 0.536, "step": 1835 }, { "epoch": 1.2732316227461857, "grad_norm": 0.34350905906439205, "learning_rate": 9.289012464193215e-06, "loss": 0.5463, "step": 1836 }, { "epoch": 1.2739251040221915, "grad_norm": 0.3185676661328429, "learning_rate": 9.287767768266046e-06, "loss": 0.4963, "step": 1837 }, { "epoch": 1.2746185852981968, "grad_norm": 0.33608499489175475, "learning_rate": 9.28652206731717e-06, "loss": 0.5328, "step": 1838 }, { "epoch": 1.2753120665742026, "grad_norm": 0.31767013577623654, "learning_rate": 9.285275361638564e-06, "loss": 0.4891, "step": 1839 }, { "epoch": 1.276005547850208, "grad_norm": 0.3733185568785257, "learning_rate": 9.284027651522449e-06, "loss": 0.5227, "step": 1840 }, { "epoch": 1.2766990291262137, "grad_norm": 0.363373361322098, "learning_rate": 9.282778937261279e-06, "loss": 0.4966, "step": 1841 }, { "epoch": 1.277392510402219, "grad_norm": 0.3418047154421495, "learning_rate": 9.281529219147742e-06, "loss": 0.5532, "step": 1842 }, { "epoch": 1.2780859916782248, "grad_norm": 0.31665406680819047, "learning_rate": 9.280278497474765e-06, "loss": 0.4579, "step": 1843 }, { "epoch": 1.2787794729542301, "grad_norm": 0.3583116096524031, "learning_rate": 9.279026772535508e-06, "loss": 0.4589, "step": 1844 }, { "epoch": 1.2794729542302359, "grad_norm": 0.37982949955058526, "learning_rate": 9.277774044623366e-06, "loss": 0.5608, "step": 1845 }, { "epoch": 1.2801664355062412, "grad_norm": 0.33138853176950483, "learning_rate": 9.27652031403197e-06, "loss": 0.5216, "step": 1846 }, { "epoch": 1.280859916782247, "grad_norm": 0.33450791406369784, "learning_rate": 9.275265581055183e-06, "loss": 0.5004, "step": 1847 }, { "epoch": 1.2815533980582523, "grad_norm": 0.3795679929765968, "learning_rate": 9.274009845987106e-06, "loss": 0.5585, "step": 1848 }, { "epoch": 1.282246879334258, "grad_norm": 0.35512081526603556, "learning_rate": 9.272753109122077e-06, "loss": 0.5057, "step": 1849 }, { "epoch": 1.2829403606102634, "grad_norm": 0.34003401418927753, "learning_rate": 9.271495370754661e-06, "loss": 0.5055, "step": 1850 }, { "epoch": 1.2836338418862692, "grad_norm": 0.3528803434944884, "learning_rate": 9.270236631179667e-06, "loss": 0.4718, "step": 1851 }, { "epoch": 1.2843273231622745, "grad_norm": 0.29720816670752237, "learning_rate": 9.268976890692136e-06, "loss": 0.4671, "step": 1852 }, { "epoch": 1.2850208044382803, "grad_norm": 0.3499516875454265, "learning_rate": 9.267716149587336e-06, "loss": 0.4894, "step": 1853 }, { "epoch": 1.2857142857142856, "grad_norm": 0.35706546425277286, "learning_rate": 9.266454408160779e-06, "loss": 0.4647, "step": 1854 }, { "epoch": 1.2864077669902914, "grad_norm": 0.3317602798535959, "learning_rate": 9.26519166670821e-06, "loss": 0.4981, "step": 1855 }, { "epoch": 1.2871012482662967, "grad_norm": 0.4062212899320207, "learning_rate": 9.263927925525602e-06, "loss": 0.4919, "step": 1856 }, { "epoch": 1.2877947295423025, "grad_norm": 0.34891518938031596, "learning_rate": 9.262663184909168e-06, "loss": 0.5429, "step": 1857 }, { "epoch": 1.2884882108183078, "grad_norm": 0.6255808059722502, "learning_rate": 9.261397445155358e-06, "loss": 0.4118, "step": 1858 }, { "epoch": 1.2891816920943135, "grad_norm": 0.37102991097352156, "learning_rate": 9.260130706560847e-06, "loss": 0.5428, "step": 1859 }, { "epoch": 1.2898751733703189, "grad_norm": 0.3218548607114772, "learning_rate": 9.258862969422554e-06, "loss": 0.4679, "step": 1860 }, { "epoch": 1.2905686546463246, "grad_norm": 0.3513152116288358, "learning_rate": 9.257594234037624e-06, "loss": 0.488, "step": 1861 }, { "epoch": 1.29126213592233, "grad_norm": 0.3787649439476071, "learning_rate": 9.256324500703439e-06, "loss": 0.5203, "step": 1862 }, { "epoch": 1.2919556171983357, "grad_norm": 0.3579882013479024, "learning_rate": 9.255053769717618e-06, "loss": 0.535, "step": 1863 }, { "epoch": 1.292649098474341, "grad_norm": 0.386163256949996, "learning_rate": 9.253782041378012e-06, "loss": 0.5253, "step": 1864 }, { "epoch": 1.2933425797503468, "grad_norm": 0.34260691499067647, "learning_rate": 9.252509315982701e-06, "loss": 0.4582, "step": 1865 }, { "epoch": 1.2940360610263522, "grad_norm": 0.3326924122711991, "learning_rate": 9.251235593830003e-06, "loss": 0.4941, "step": 1866 }, { "epoch": 1.294729542302358, "grad_norm": 0.34313143935430807, "learning_rate": 9.249960875218474e-06, "loss": 0.5028, "step": 1867 }, { "epoch": 1.2954230235783633, "grad_norm": 0.5000172994508971, "learning_rate": 9.248685160446892e-06, "loss": 0.4919, "step": 1868 }, { "epoch": 1.296116504854369, "grad_norm": 0.3481836528118851, "learning_rate": 9.247408449814281e-06, "loss": 0.6002, "step": 1869 }, { "epoch": 1.2968099861303743, "grad_norm": 1.2486713064261739, "learning_rate": 9.24613074361989e-06, "loss": 0.4673, "step": 1870 }, { "epoch": 1.2975034674063801, "grad_norm": 0.35244066709293037, "learning_rate": 9.244852042163207e-06, "loss": 0.5167, "step": 1871 }, { "epoch": 1.2981969486823854, "grad_norm": 0.3571215715967322, "learning_rate": 9.243572345743944e-06, "loss": 0.5173, "step": 1872 }, { "epoch": 1.2988904299583912, "grad_norm": 0.3542359960492466, "learning_rate": 9.24229165466206e-06, "loss": 0.5332, "step": 1873 }, { "epoch": 1.2995839112343965, "grad_norm": 0.34014165933613844, "learning_rate": 9.241009969217734e-06, "loss": 0.5264, "step": 1874 }, { "epoch": 1.3002773925104023, "grad_norm": 0.33693680377191965, "learning_rate": 9.239727289711385e-06, "loss": 0.4559, "step": 1875 }, { "epoch": 1.3009708737864076, "grad_norm": 0.33057940031515953, "learning_rate": 9.238443616443666e-06, "loss": 0.4736, "step": 1876 }, { "epoch": 1.3016643550624134, "grad_norm": 0.4539264723083045, "learning_rate": 9.237158949715462e-06, "loss": 0.565, "step": 1877 }, { "epoch": 1.3023578363384187, "grad_norm": 0.35282147506975486, "learning_rate": 9.235873289827883e-06, "loss": 0.4716, "step": 1878 }, { "epoch": 1.3030513176144245, "grad_norm": 0.42154634305474753, "learning_rate": 9.234586637082285e-06, "loss": 0.5168, "step": 1879 }, { "epoch": 1.3037447988904298, "grad_norm": 0.3817510127645713, "learning_rate": 9.233298991780247e-06, "loss": 0.5179, "step": 1880 }, { "epoch": 1.3044382801664356, "grad_norm": 0.32170508439422457, "learning_rate": 9.232010354223584e-06, "loss": 0.4551, "step": 1881 }, { "epoch": 1.305131761442441, "grad_norm": 0.5401977930323866, "learning_rate": 9.230720724714345e-06, "loss": 0.4646, "step": 1882 }, { "epoch": 1.3058252427184467, "grad_norm": 0.36683850299211856, "learning_rate": 9.229430103554808e-06, "loss": 0.4684, "step": 1883 }, { "epoch": 1.306518723994452, "grad_norm": 0.3290350115726689, "learning_rate": 9.228138491047484e-06, "loss": 0.493, "step": 1884 }, { "epoch": 1.3072122052704578, "grad_norm": 0.32829750076246034, "learning_rate": 9.226845887495121e-06, "loss": 0.5041, "step": 1885 }, { "epoch": 1.307905686546463, "grad_norm": 0.3395242367031229, "learning_rate": 9.225552293200694e-06, "loss": 0.431, "step": 1886 }, { "epoch": 1.3085991678224689, "grad_norm": 0.34592279759627054, "learning_rate": 9.224257708467412e-06, "loss": 0.4945, "step": 1887 }, { "epoch": 1.3092926490984742, "grad_norm": 0.35782151715558513, "learning_rate": 9.222962133598716e-06, "loss": 0.4849, "step": 1888 }, { "epoch": 1.30998613037448, "grad_norm": 0.3582950740376383, "learning_rate": 9.22166556889828e-06, "loss": 0.5128, "step": 1889 }, { "epoch": 1.3106796116504853, "grad_norm": 0.32662955303731456, "learning_rate": 9.22036801467001e-06, "loss": 0.5063, "step": 1890 }, { "epoch": 1.311373092926491, "grad_norm": 0.33167950055132195, "learning_rate": 9.219069471218045e-06, "loss": 0.5088, "step": 1891 }, { "epoch": 1.3120665742024964, "grad_norm": 0.3428562599490065, "learning_rate": 9.21776993884675e-06, "loss": 0.523, "step": 1892 }, { "epoch": 1.3127600554785022, "grad_norm": 0.31385836415922064, "learning_rate": 9.216469417860727e-06, "loss": 0.5093, "step": 1893 }, { "epoch": 1.3134535367545077, "grad_norm": 0.3476910529162486, "learning_rate": 9.215167908564811e-06, "loss": 0.528, "step": 1894 }, { "epoch": 1.3141470180305133, "grad_norm": 0.3509319421764095, "learning_rate": 9.213865411264063e-06, "loss": 0.4749, "step": 1895 }, { "epoch": 1.3148404993065188, "grad_norm": 0.4085357152406572, "learning_rate": 9.212561926263783e-06, "loss": 0.4822, "step": 1896 }, { "epoch": 1.3155339805825244, "grad_norm": 0.31470837074324837, "learning_rate": 9.211257453869495e-06, "loss": 0.44, "step": 1897 }, { "epoch": 1.31622746185853, "grad_norm": 0.346880710192492, "learning_rate": 9.209951994386959e-06, "loss": 0.4946, "step": 1898 }, { "epoch": 1.3169209431345354, "grad_norm": 0.33761442780969503, "learning_rate": 9.208645548122166e-06, "loss": 0.5012, "step": 1899 }, { "epoch": 1.317614424410541, "grad_norm": 0.9123401009768137, "learning_rate": 9.207338115381337e-06, "loss": 0.5107, "step": 1900 }, { "epoch": 1.3183079056865465, "grad_norm": 0.32941756567469493, "learning_rate": 9.206029696470924e-06, "loss": 0.4634, "step": 1901 }, { "epoch": 1.319001386962552, "grad_norm": 0.4467262343503186, "learning_rate": 9.204720291697613e-06, "loss": 0.5188, "step": 1902 }, { "epoch": 1.3196948682385576, "grad_norm": 0.3606064256688591, "learning_rate": 9.203409901368317e-06, "loss": 0.5293, "step": 1903 }, { "epoch": 1.3203883495145632, "grad_norm": 0.34796190640242625, "learning_rate": 9.202098525790182e-06, "loss": 0.4966, "step": 1904 }, { "epoch": 1.3210818307905687, "grad_norm": 0.3851546692100887, "learning_rate": 9.200786165270585e-06, "loss": 0.5016, "step": 1905 }, { "epoch": 1.3217753120665743, "grad_norm": 0.34453151168862584, "learning_rate": 9.199472820117136e-06, "loss": 0.5305, "step": 1906 }, { "epoch": 1.3224687933425798, "grad_norm": 0.34667882515232046, "learning_rate": 9.198158490637671e-06, "loss": 0.5328, "step": 1907 }, { "epoch": 1.3231622746185854, "grad_norm": 0.3236231532972804, "learning_rate": 9.196843177140262e-06, "loss": 0.5044, "step": 1908 }, { "epoch": 1.323855755894591, "grad_norm": 0.345112813517985, "learning_rate": 9.195526879933206e-06, "loss": 0.5277, "step": 1909 }, { "epoch": 1.3245492371705965, "grad_norm": 0.36270086969994875, "learning_rate": 9.194209599325035e-06, "loss": 0.4663, "step": 1910 }, { "epoch": 1.325242718446602, "grad_norm": 0.3699520050573922, "learning_rate": 9.192891335624508e-06, "loss": 0.4745, "step": 1911 }, { "epoch": 1.3259361997226076, "grad_norm": 0.3546368208576948, "learning_rate": 9.19157208914062e-06, "loss": 0.4745, "step": 1912 }, { "epoch": 1.3266296809986131, "grad_norm": 0.33467924594224446, "learning_rate": 9.19025186018259e-06, "loss": 0.5437, "step": 1913 }, { "epoch": 1.3273231622746187, "grad_norm": 0.33368651009289546, "learning_rate": 9.188930649059873e-06, "loss": 0.4745, "step": 1914 }, { "epoch": 1.3280166435506242, "grad_norm": 0.3772498370675096, "learning_rate": 9.18760845608215e-06, "loss": 0.5473, "step": 1915 }, { "epoch": 1.3287101248266298, "grad_norm": 0.34393568999619517, "learning_rate": 9.186285281559331e-06, "loss": 0.4749, "step": 1916 }, { "epoch": 1.3294036061026353, "grad_norm": 0.3891592970007088, "learning_rate": 9.18496112580156e-06, "loss": 0.5428, "step": 1917 }, { "epoch": 1.3300970873786409, "grad_norm": 0.3602504345888441, "learning_rate": 9.183635989119211e-06, "loss": 0.5104, "step": 1918 }, { "epoch": 1.3307905686546464, "grad_norm": 0.3435705254934971, "learning_rate": 9.182309871822886e-06, "loss": 0.4898, "step": 1919 }, { "epoch": 1.331484049930652, "grad_norm": 0.34073929125777125, "learning_rate": 9.180982774223416e-06, "loss": 0.4581, "step": 1920 }, { "epoch": 1.3321775312066575, "grad_norm": 0.32966061300631383, "learning_rate": 9.179654696631865e-06, "loss": 0.4959, "step": 1921 }, { "epoch": 1.332871012482663, "grad_norm": 0.34066439491923833, "learning_rate": 9.178325639359522e-06, "loss": 0.546, "step": 1922 }, { "epoch": 1.3335644937586686, "grad_norm": 0.3446644332438958, "learning_rate": 9.176995602717912e-06, "loss": 0.4348, "step": 1923 }, { "epoch": 1.3342579750346741, "grad_norm": 0.38647871059078065, "learning_rate": 9.175664587018782e-06, "loss": 0.5249, "step": 1924 }, { "epoch": 1.3349514563106797, "grad_norm": 0.350583398344365, "learning_rate": 9.174332592574115e-06, "loss": 0.5134, "step": 1925 }, { "epoch": 1.3356449375866852, "grad_norm": 0.3456941501029919, "learning_rate": 9.172999619696118e-06, "loss": 0.5048, "step": 1926 }, { "epoch": 1.3363384188626908, "grad_norm": 0.358925043740068, "learning_rate": 9.171665668697236e-06, "loss": 0.5295, "step": 1927 }, { "epoch": 1.3370319001386963, "grad_norm": 0.3301735183670066, "learning_rate": 9.170330739890133e-06, "loss": 0.5118, "step": 1928 }, { "epoch": 1.3377253814147019, "grad_norm": 0.33064501569266713, "learning_rate": 9.168994833587707e-06, "loss": 0.4833, "step": 1929 }, { "epoch": 1.3384188626907074, "grad_norm": 0.35167935250762944, "learning_rate": 9.167657950103085e-06, "loss": 0.5254, "step": 1930 }, { "epoch": 1.339112343966713, "grad_norm": 0.32943616423598954, "learning_rate": 9.166320089749623e-06, "loss": 0.4531, "step": 1931 }, { "epoch": 1.3398058252427185, "grad_norm": 0.36170518515368594, "learning_rate": 9.164981252840908e-06, "loss": 0.5155, "step": 1932 }, { "epoch": 1.340499306518724, "grad_norm": 0.37713628473972527, "learning_rate": 9.16364143969075e-06, "loss": 0.5764, "step": 1933 }, { "epoch": 1.3411927877947296, "grad_norm": 0.3434375663522262, "learning_rate": 9.162300650613192e-06, "loss": 0.5295, "step": 1934 }, { "epoch": 1.3418862690707352, "grad_norm": 0.4135405738431598, "learning_rate": 9.160958885922508e-06, "loss": 0.5176, "step": 1935 }, { "epoch": 1.3425797503467407, "grad_norm": 0.36227045032667216, "learning_rate": 9.159616145933194e-06, "loss": 0.521, "step": 1936 }, { "epoch": 1.3432732316227463, "grad_norm": 0.3130254608989982, "learning_rate": 9.158272430959982e-06, "loss": 0.4726, "step": 1937 }, { "epoch": 1.3439667128987518, "grad_norm": 0.31929662397535824, "learning_rate": 9.156927741317829e-06, "loss": 0.5399, "step": 1938 }, { "epoch": 1.3446601941747574, "grad_norm": 0.3377022421522008, "learning_rate": 9.155582077321918e-06, "loss": 0.4771, "step": 1939 }, { "epoch": 1.345353675450763, "grad_norm": 0.3281963594186775, "learning_rate": 9.154235439287665e-06, "loss": 0.4835, "step": 1940 }, { "epoch": 1.3460471567267684, "grad_norm": 0.3241665844093833, "learning_rate": 9.152887827530711e-06, "loss": 0.4743, "step": 1941 }, { "epoch": 1.346740638002774, "grad_norm": 0.39032873971384746, "learning_rate": 9.151539242366926e-06, "loss": 0.5394, "step": 1942 }, { "epoch": 1.3474341192787795, "grad_norm": 0.3414710217148856, "learning_rate": 9.150189684112412e-06, "loss": 0.4927, "step": 1943 }, { "epoch": 1.348127600554785, "grad_norm": 0.3755657853423203, "learning_rate": 9.148839153083492e-06, "loss": 0.5162, "step": 1944 }, { "epoch": 1.3488210818307906, "grad_norm": 0.3665213335101388, "learning_rate": 9.14748764959672e-06, "loss": 0.4832, "step": 1945 }, { "epoch": 1.3495145631067962, "grad_norm": 0.3353798881557397, "learning_rate": 9.146135173968881e-06, "loss": 0.4796, "step": 1946 }, { "epoch": 1.3502080443828017, "grad_norm": 0.3715049523272947, "learning_rate": 9.144781726516987e-06, "loss": 0.5228, "step": 1947 }, { "epoch": 1.3509015256588073, "grad_norm": 0.3681059833547073, "learning_rate": 9.143427307558273e-06, "loss": 0.5248, "step": 1948 }, { "epoch": 1.3515950069348128, "grad_norm": 0.3943594985391834, "learning_rate": 9.142071917410205e-06, "loss": 0.5, "step": 1949 }, { "epoch": 1.3522884882108184, "grad_norm": 0.3159122587189733, "learning_rate": 9.140715556390478e-06, "loss": 0.4616, "step": 1950 }, { "epoch": 1.352981969486824, "grad_norm": 0.36864890400438305, "learning_rate": 9.139358224817014e-06, "loss": 0.5548, "step": 1951 }, { "epoch": 1.3536754507628295, "grad_norm": 0.4466662649295465, "learning_rate": 9.13799992300796e-06, "loss": 0.4906, "step": 1952 }, { "epoch": 1.354368932038835, "grad_norm": 0.34155954537718386, "learning_rate": 9.136640651281694e-06, "loss": 0.5071, "step": 1953 }, { "epoch": 1.3550624133148406, "grad_norm": 0.3410588498849418, "learning_rate": 9.135280409956819e-06, "loss": 0.4705, "step": 1954 }, { "epoch": 1.3557558945908461, "grad_norm": 0.35503814912701037, "learning_rate": 9.133919199352163e-06, "loss": 0.5441, "step": 1955 }, { "epoch": 1.3564493758668517, "grad_norm": 0.3535272880958023, "learning_rate": 9.132557019786788e-06, "loss": 0.5011, "step": 1956 }, { "epoch": 1.3571428571428572, "grad_norm": 0.4086155883737188, "learning_rate": 9.131193871579975e-06, "loss": 0.5543, "step": 1957 }, { "epoch": 1.3578363384188628, "grad_norm": 0.430477822463212, "learning_rate": 9.129829755051239e-06, "loss": 0.4904, "step": 1958 }, { "epoch": 1.3585298196948683, "grad_norm": 0.3314910774056916, "learning_rate": 9.128464670520318e-06, "loss": 0.4956, "step": 1959 }, { "epoch": 1.3592233009708738, "grad_norm": 0.35154731657033544, "learning_rate": 9.127098618307177e-06, "loss": 0.5145, "step": 1960 }, { "epoch": 1.3599167822468794, "grad_norm": 0.36388159699529904, "learning_rate": 9.125731598732011e-06, "loss": 0.5233, "step": 1961 }, { "epoch": 1.360610263522885, "grad_norm": 0.36800106644061464, "learning_rate": 9.124363612115236e-06, "loss": 0.5569, "step": 1962 }, { "epoch": 1.3613037447988905, "grad_norm": 0.34092547978000565, "learning_rate": 9.122994658777504e-06, "loss": 0.4814, "step": 1963 }, { "epoch": 1.361997226074896, "grad_norm": 0.32932084798226996, "learning_rate": 9.121624739039682e-06, "loss": 0.5132, "step": 1964 }, { "epoch": 1.3626907073509016, "grad_norm": 0.33372224323166977, "learning_rate": 9.120253853222872e-06, "loss": 0.5647, "step": 1965 }, { "epoch": 1.3633841886269071, "grad_norm": 0.39150113814683085, "learning_rate": 9.118882001648398e-06, "loss": 0.5084, "step": 1966 }, { "epoch": 1.3640776699029127, "grad_norm": 0.34411873662988157, "learning_rate": 9.117509184637814e-06, "loss": 0.4616, "step": 1967 }, { "epoch": 1.3647711511789182, "grad_norm": 0.3288103030226968, "learning_rate": 9.116135402512897e-06, "loss": 0.5063, "step": 1968 }, { "epoch": 1.3654646324549238, "grad_norm": 0.405824720854079, "learning_rate": 9.114760655595653e-06, "loss": 0.4789, "step": 1969 }, { "epoch": 1.3661581137309293, "grad_norm": 0.36204030828621414, "learning_rate": 9.11338494420831e-06, "loss": 0.5153, "step": 1970 }, { "epoch": 1.3668515950069349, "grad_norm": 0.35661344896539776, "learning_rate": 9.112008268673329e-06, "loss": 0.5357, "step": 1971 }, { "epoch": 1.3675450762829404, "grad_norm": 0.3356112356485565, "learning_rate": 9.110630629313388e-06, "loss": 0.4943, "step": 1972 }, { "epoch": 1.368238557558946, "grad_norm": 0.37632531231512184, "learning_rate": 9.1092520264514e-06, "loss": 0.5494, "step": 1973 }, { "epoch": 1.3689320388349515, "grad_norm": 0.3968694416479254, "learning_rate": 9.107872460410496e-06, "loss": 0.5375, "step": 1974 }, { "epoch": 1.369625520110957, "grad_norm": 0.3210852092298961, "learning_rate": 9.10649193151404e-06, "loss": 0.4835, "step": 1975 }, { "epoch": 1.3703190013869626, "grad_norm": 0.3609337315216403, "learning_rate": 9.105110440085613e-06, "loss": 0.5464, "step": 1976 }, { "epoch": 1.3710124826629682, "grad_norm": 0.36357706506404275, "learning_rate": 9.103727986449034e-06, "loss": 0.5759, "step": 1977 }, { "epoch": 1.3717059639389737, "grad_norm": 0.35258802728545247, "learning_rate": 9.102344570928333e-06, "loss": 0.5178, "step": 1978 }, { "epoch": 1.3723994452149793, "grad_norm": 0.35930011648447896, "learning_rate": 9.100960193847773e-06, "loss": 0.6151, "step": 1979 }, { "epoch": 1.3730929264909848, "grad_norm": 0.3487670030211412, "learning_rate": 9.099574855531846e-06, "loss": 0.5149, "step": 1980 }, { "epoch": 1.3737864077669903, "grad_norm": 0.9869632462397623, "learning_rate": 9.098188556305262e-06, "loss": 0.4822, "step": 1981 }, { "epoch": 1.374479889042996, "grad_norm": 0.31863533357197305, "learning_rate": 9.096801296492963e-06, "loss": 0.4841, "step": 1982 }, { "epoch": 1.3751733703190014, "grad_norm": 0.4822400161903467, "learning_rate": 9.09541307642011e-06, "loss": 0.5083, "step": 1983 }, { "epoch": 1.375866851595007, "grad_norm": 0.37115373959780606, "learning_rate": 9.094023896412092e-06, "loss": 0.4785, "step": 1984 }, { "epoch": 1.3765603328710125, "grad_norm": 0.32645115093592375, "learning_rate": 9.092633756794523e-06, "loss": 0.522, "step": 1985 }, { "epoch": 1.377253814147018, "grad_norm": 0.3164366936010976, "learning_rate": 9.091242657893241e-06, "loss": 0.4712, "step": 1986 }, { "epoch": 1.3779472954230236, "grad_norm": 0.3753907910898338, "learning_rate": 9.089850600034312e-06, "loss": 0.499, "step": 1987 }, { "epoch": 1.3786407766990292, "grad_norm": 0.3460678887561886, "learning_rate": 9.088457583544022e-06, "loss": 0.5596, "step": 1988 }, { "epoch": 1.3793342579750347, "grad_norm": 0.33646652029208507, "learning_rate": 9.087063608748883e-06, "loss": 0.4688, "step": 1989 }, { "epoch": 1.3800277392510403, "grad_norm": 0.4227406159344678, "learning_rate": 9.085668675975634e-06, "loss": 0.4758, "step": 1990 }, { "epoch": 1.3807212205270458, "grad_norm": 0.3469319880400181, "learning_rate": 9.084272785551237e-06, "loss": 0.4735, "step": 1991 }, { "epoch": 1.3814147018030514, "grad_norm": 0.35593313436819635, "learning_rate": 9.08287593780288e-06, "loss": 0.5332, "step": 1992 }, { "epoch": 1.382108183079057, "grad_norm": 0.3536191263984846, "learning_rate": 9.081478133057972e-06, "loss": 0.4984, "step": 1993 }, { "epoch": 1.3828016643550625, "grad_norm": 0.3413760268111103, "learning_rate": 9.080079371644151e-06, "loss": 0.4519, "step": 1994 }, { "epoch": 1.383495145631068, "grad_norm": 0.3477983386054444, "learning_rate": 9.078679653889273e-06, "loss": 0.505, "step": 1995 }, { "epoch": 1.3841886269070736, "grad_norm": 0.3388010557099065, "learning_rate": 9.077278980121422e-06, "loss": 0.4826, "step": 1996 }, { "epoch": 1.384882108183079, "grad_norm": 0.3615316941958222, "learning_rate": 9.075877350668909e-06, "loss": 0.5495, "step": 1997 }, { "epoch": 1.3855755894590847, "grad_norm": 0.3420360781245435, "learning_rate": 9.074474765860264e-06, "loss": 0.5217, "step": 1998 }, { "epoch": 1.3862690707350902, "grad_norm": 0.32317076294353375, "learning_rate": 9.073071226024242e-06, "loss": 0.5058, "step": 1999 }, { "epoch": 1.3869625520110958, "grad_norm": 0.3851948084166722, "learning_rate": 9.071666731489824e-06, "loss": 0.5253, "step": 2000 }, { "epoch": 1.3876560332871013, "grad_norm": 0.4180566489080604, "learning_rate": 9.07026128258621e-06, "loss": 0.4451, "step": 2001 }, { "epoch": 1.3883495145631068, "grad_norm": 0.3559935952517109, "learning_rate": 9.068854879642833e-06, "loss": 0.5685, "step": 2002 }, { "epoch": 1.3890429958391124, "grad_norm": 0.3286178778917271, "learning_rate": 9.067447522989337e-06, "loss": 0.4819, "step": 2003 }, { "epoch": 1.389736477115118, "grad_norm": 0.3612689827673842, "learning_rate": 9.066039212955602e-06, "loss": 0.5175, "step": 2004 }, { "epoch": 1.3904299583911235, "grad_norm": 0.3552749797383618, "learning_rate": 9.064629949871721e-06, "loss": 0.4727, "step": 2005 }, { "epoch": 1.391123439667129, "grad_norm": 0.36342615888636715, "learning_rate": 9.063219734068019e-06, "loss": 0.4836, "step": 2006 }, { "epoch": 1.3918169209431346, "grad_norm": 0.36055358488933875, "learning_rate": 9.061808565875037e-06, "loss": 0.4703, "step": 2007 }, { "epoch": 1.3925104022191401, "grad_norm": 0.34473756432631464, "learning_rate": 9.060396445623545e-06, "loss": 0.505, "step": 2008 }, { "epoch": 1.3932038834951457, "grad_norm": 0.33222195880954425, "learning_rate": 9.058983373644532e-06, "loss": 0.4928, "step": 2009 }, { "epoch": 1.3938973647711512, "grad_norm": 0.36215219367300794, "learning_rate": 9.057569350269214e-06, "loss": 0.576, "step": 2010 }, { "epoch": 1.3945908460471568, "grad_norm": 0.4219491000516108, "learning_rate": 9.056154375829028e-06, "loss": 0.5195, "step": 2011 }, { "epoch": 1.3952843273231623, "grad_norm": 0.32138636461949377, "learning_rate": 9.054738450655628e-06, "loss": 0.5442, "step": 2012 }, { "epoch": 1.3959778085991679, "grad_norm": 0.35071835083037484, "learning_rate": 9.053321575080905e-06, "loss": 0.4999, "step": 2013 }, { "epoch": 1.3966712898751734, "grad_norm": 0.3287371478947435, "learning_rate": 9.05190374943696e-06, "loss": 0.4832, "step": 2014 }, { "epoch": 1.397364771151179, "grad_norm": 0.3470611829870114, "learning_rate": 9.05048497405612e-06, "loss": 0.4977, "step": 2015 }, { "epoch": 1.3980582524271845, "grad_norm": 0.32023025568043867, "learning_rate": 9.049065249270936e-06, "loss": 0.5207, "step": 2016 }, { "epoch": 1.39875173370319, "grad_norm": 0.3395651298847561, "learning_rate": 9.047644575414184e-06, "loss": 0.5021, "step": 2017 }, { "epoch": 1.3994452149791956, "grad_norm": 0.36578043964741436, "learning_rate": 9.046222952818857e-06, "loss": 0.5643, "step": 2018 }, { "epoch": 1.4001386962552012, "grad_norm": 0.30457827608908566, "learning_rate": 9.044800381818175e-06, "loss": 0.4992, "step": 2019 }, { "epoch": 1.4008321775312067, "grad_norm": 0.33976922886508304, "learning_rate": 9.043376862745576e-06, "loss": 0.5138, "step": 2020 }, { "epoch": 1.4015256588072122, "grad_norm": 0.37214816766432446, "learning_rate": 9.041952395934726e-06, "loss": 0.4518, "step": 2021 }, { "epoch": 1.4022191400832178, "grad_norm": 0.3845961163640592, "learning_rate": 9.040526981719506e-06, "loss": 0.4607, "step": 2022 }, { "epoch": 1.4029126213592233, "grad_norm": 0.3676275315409118, "learning_rate": 9.039100620434025e-06, "loss": 0.5147, "step": 2023 }, { "epoch": 1.403606102635229, "grad_norm": 0.3062267445478142, "learning_rate": 9.03767331241261e-06, "loss": 0.4654, "step": 2024 }, { "epoch": 1.4042995839112344, "grad_norm": 0.35555930078364206, "learning_rate": 9.036245057989815e-06, "loss": 0.458, "step": 2025 }, { "epoch": 1.40499306518724, "grad_norm": 0.325210467347809, "learning_rate": 9.034815857500407e-06, "loss": 0.5059, "step": 2026 }, { "epoch": 1.4056865464632455, "grad_norm": 0.35884101622999165, "learning_rate": 9.033385711279385e-06, "loss": 0.5423, "step": 2027 }, { "epoch": 1.406380027739251, "grad_norm": 0.35037320776031283, "learning_rate": 9.031954619661964e-06, "loss": 0.477, "step": 2028 }, { "epoch": 1.4070735090152566, "grad_norm": 0.30423510507148654, "learning_rate": 9.030522582983582e-06, "loss": 0.4304, "step": 2029 }, { "epoch": 1.4077669902912622, "grad_norm": 0.31991677056009316, "learning_rate": 9.029089601579895e-06, "loss": 0.4998, "step": 2030 }, { "epoch": 1.4084604715672677, "grad_norm": 0.3470277841469619, "learning_rate": 9.027655675786785e-06, "loss": 0.5473, "step": 2031 }, { "epoch": 1.4091539528432733, "grad_norm": 0.31084928616723073, "learning_rate": 9.026220805940357e-06, "loss": 0.4927, "step": 2032 }, { "epoch": 1.4098474341192788, "grad_norm": 0.32211321657284153, "learning_rate": 9.02478499237693e-06, "loss": 0.5132, "step": 2033 }, { "epoch": 1.4105409153952844, "grad_norm": 0.38960526198219364, "learning_rate": 9.02334823543305e-06, "loss": 0.5028, "step": 2034 }, { "epoch": 1.41123439667129, "grad_norm": 0.30185956613060205, "learning_rate": 9.021910535445479e-06, "loss": 0.437, "step": 2035 }, { "epoch": 1.4119278779472955, "grad_norm": 0.5097190134626377, "learning_rate": 9.02047189275121e-06, "loss": 0.5201, "step": 2036 }, { "epoch": 1.412621359223301, "grad_norm": 0.330140909546037, "learning_rate": 9.019032307687446e-06, "loss": 0.4561, "step": 2037 }, { "epoch": 1.4133148404993066, "grad_norm": 0.3640389309441148, "learning_rate": 9.017591780591615e-06, "loss": 0.5578, "step": 2038 }, { "epoch": 1.414008321775312, "grad_norm": 0.3519252082018009, "learning_rate": 9.016150311801371e-06, "loss": 0.4984, "step": 2039 }, { "epoch": 1.4147018030513177, "grad_norm": 0.3069686446379406, "learning_rate": 9.014707901654576e-06, "loss": 0.407, "step": 2040 }, { "epoch": 1.4153952843273232, "grad_norm": 0.34196917299982177, "learning_rate": 9.013264550489327e-06, "loss": 0.579, "step": 2041 }, { "epoch": 1.4160887656033287, "grad_norm": 0.35132469722852405, "learning_rate": 9.011820258643933e-06, "loss": 0.5231, "step": 2042 }, { "epoch": 1.4167822468793343, "grad_norm": 0.33909092335381796, "learning_rate": 9.010375026456923e-06, "loss": 0.4898, "step": 2043 }, { "epoch": 1.4174757281553398, "grad_norm": 0.3568708820304695, "learning_rate": 9.008928854267054e-06, "loss": 0.4565, "step": 2044 }, { "epoch": 1.4181692094313454, "grad_norm": 0.3615298783462584, "learning_rate": 9.007481742413295e-06, "loss": 0.5035, "step": 2045 }, { "epoch": 1.418862690707351, "grad_norm": 0.32615725363710346, "learning_rate": 9.006033691234838e-06, "loss": 0.4762, "step": 2046 }, { "epoch": 1.4195561719833565, "grad_norm": 0.3361013448137094, "learning_rate": 9.004584701071098e-06, "loss": 0.5041, "step": 2047 }, { "epoch": 1.420249653259362, "grad_norm": 0.3318446360127158, "learning_rate": 9.003134772261705e-06, "loss": 0.5036, "step": 2048 }, { "epoch": 1.4209431345353676, "grad_norm": 0.36039462613717194, "learning_rate": 9.001683905146516e-06, "loss": 0.5307, "step": 2049 }, { "epoch": 1.4216366158113731, "grad_norm": 0.3726157396910557, "learning_rate": 9.000232100065599e-06, "loss": 0.4882, "step": 2050 }, { "epoch": 1.4223300970873787, "grad_norm": 0.32280840980165915, "learning_rate": 8.99877935735925e-06, "loss": 0.4782, "step": 2051 }, { "epoch": 1.4230235783633842, "grad_norm": 0.3725354963452832, "learning_rate": 8.99732567736798e-06, "loss": 0.4809, "step": 2052 }, { "epoch": 1.4237170596393898, "grad_norm": 0.3634630137709549, "learning_rate": 8.99587106043252e-06, "loss": 0.4983, "step": 2053 }, { "epoch": 1.4244105409153953, "grad_norm": 0.37109924576279596, "learning_rate": 8.994415506893824e-06, "loss": 0.5249, "step": 2054 }, { "epoch": 1.4251040221914009, "grad_norm": 0.32474529487445025, "learning_rate": 8.992959017093062e-06, "loss": 0.4616, "step": 2055 }, { "epoch": 1.4257975034674064, "grad_norm": 0.3323036851505102, "learning_rate": 8.991501591371625e-06, "loss": 0.5263, "step": 2056 }, { "epoch": 1.426490984743412, "grad_norm": 0.36395942534364123, "learning_rate": 8.990043230071123e-06, "loss": 0.5053, "step": 2057 }, { "epoch": 1.4271844660194175, "grad_norm": 0.33019473310497166, "learning_rate": 8.988583933533384e-06, "loss": 0.5055, "step": 2058 }, { "epoch": 1.427877947295423, "grad_norm": 0.35570573889246276, "learning_rate": 8.987123702100459e-06, "loss": 0.498, "step": 2059 }, { "epoch": 1.4285714285714286, "grad_norm": 0.35527667461959495, "learning_rate": 8.985662536114614e-06, "loss": 0.5337, "step": 2060 }, { "epoch": 1.4292649098474342, "grad_norm": 0.33569270892402076, "learning_rate": 8.984200435918335e-06, "loss": 0.4616, "step": 2061 }, { "epoch": 1.4299583911234397, "grad_norm": 0.38513224867004137, "learning_rate": 8.982737401854328e-06, "loss": 0.5027, "step": 2062 }, { "epoch": 1.4306518723994452, "grad_norm": 0.3376653705404227, "learning_rate": 8.981273434265521e-06, "loss": 0.532, "step": 2063 }, { "epoch": 1.4313453536754508, "grad_norm": 0.33072082850503065, "learning_rate": 8.979808533495054e-06, "loss": 0.4822, "step": 2064 }, { "epoch": 1.4320388349514563, "grad_norm": 0.3614107118048633, "learning_rate": 8.978342699886289e-06, "loss": 0.4828, "step": 2065 }, { "epoch": 1.4327323162274619, "grad_norm": 0.3237837634048971, "learning_rate": 8.976875933782808e-06, "loss": 0.472, "step": 2066 }, { "epoch": 1.4334257975034674, "grad_norm": 0.3836504827557471, "learning_rate": 8.97540823552841e-06, "loss": 0.5415, "step": 2067 }, { "epoch": 1.434119278779473, "grad_norm": 0.3386868314164909, "learning_rate": 8.973939605467112e-06, "loss": 0.4822, "step": 2068 }, { "epoch": 1.4348127600554785, "grad_norm": 0.3279036246414576, "learning_rate": 8.972470043943153e-06, "loss": 0.4811, "step": 2069 }, { "epoch": 1.435506241331484, "grad_norm": 0.357983401347262, "learning_rate": 8.970999551300985e-06, "loss": 0.5094, "step": 2070 }, { "epoch": 1.4361997226074896, "grad_norm": 0.33008142807332214, "learning_rate": 8.969528127885281e-06, "loss": 0.4911, "step": 2071 }, { "epoch": 1.4368932038834952, "grad_norm": 0.32539185770423046, "learning_rate": 8.968055774040932e-06, "loss": 0.535, "step": 2072 }, { "epoch": 1.4375866851595007, "grad_norm": 0.39822619448159824, "learning_rate": 8.966582490113049e-06, "loss": 0.4831, "step": 2073 }, { "epoch": 1.4382801664355063, "grad_norm": 0.33787571084119516, "learning_rate": 8.965108276446956e-06, "loss": 0.4439, "step": 2074 }, { "epoch": 1.4389736477115118, "grad_norm": 0.332140043882061, "learning_rate": 8.963633133388201e-06, "loss": 0.4741, "step": 2075 }, { "epoch": 1.4396671289875174, "grad_norm": 0.3418478839502107, "learning_rate": 8.962157061282545e-06, "loss": 0.5358, "step": 2076 }, { "epoch": 1.440360610263523, "grad_norm": 0.32405471861893115, "learning_rate": 8.96068006047597e-06, "loss": 0.4444, "step": 2077 }, { "epoch": 1.4410540915395285, "grad_norm": 0.41591462290526815, "learning_rate": 8.959202131314672e-06, "loss": 0.4977, "step": 2078 }, { "epoch": 1.441747572815534, "grad_norm": 0.35991599715141853, "learning_rate": 8.95772327414507e-06, "loss": 0.5205, "step": 2079 }, { "epoch": 1.4424410540915396, "grad_norm": 0.35078008153840906, "learning_rate": 8.956243489313795e-06, "loss": 0.4837, "step": 2080 }, { "epoch": 1.443134535367545, "grad_norm": 0.36087599320853947, "learning_rate": 8.954762777167697e-06, "loss": 0.567, "step": 2081 }, { "epoch": 1.4438280166435506, "grad_norm": 0.3305749845675022, "learning_rate": 8.953281138053847e-06, "loss": 0.4989, "step": 2082 }, { "epoch": 1.4445214979195562, "grad_norm": 0.617152688711202, "learning_rate": 8.95179857231953e-06, "loss": 0.5462, "step": 2083 }, { "epoch": 1.4452149791955617, "grad_norm": 0.45083498726989446, "learning_rate": 8.950315080312246e-06, "loss": 0.5485, "step": 2084 }, { "epoch": 1.4459084604715673, "grad_norm": 0.3172409370500703, "learning_rate": 8.948830662379717e-06, "loss": 0.4968, "step": 2085 }, { "epoch": 1.4466019417475728, "grad_norm": 0.33435860902312314, "learning_rate": 8.947345318869883e-06, "loss": 0.5809, "step": 2086 }, { "epoch": 1.4472954230235784, "grad_norm": 0.34240526523562725, "learning_rate": 8.94585905013089e-06, "loss": 0.4777, "step": 2087 }, { "epoch": 1.447988904299584, "grad_norm": 0.34760879718399595, "learning_rate": 8.944371856511116e-06, "loss": 0.4491, "step": 2088 }, { "epoch": 1.4486823855755895, "grad_norm": 0.3189560363141416, "learning_rate": 8.942883738359142e-06, "loss": 0.5456, "step": 2089 }, { "epoch": 1.449375866851595, "grad_norm": 0.3170983540269611, "learning_rate": 8.941394696023779e-06, "loss": 0.4565, "step": 2090 }, { "epoch": 1.4500693481276006, "grad_norm": 0.33882475327031664, "learning_rate": 8.939904729854042e-06, "loss": 0.4521, "step": 2091 }, { "epoch": 1.4507628294036061, "grad_norm": 0.34335086382401686, "learning_rate": 8.93841384019917e-06, "loss": 0.5064, "step": 2092 }, { "epoch": 1.4514563106796117, "grad_norm": 0.33692361629012535, "learning_rate": 8.936922027408618e-06, "loss": 0.4659, "step": 2093 }, { "epoch": 1.4521497919556172, "grad_norm": 0.33470829041958633, "learning_rate": 8.935429291832056e-06, "loss": 0.5368, "step": 2094 }, { "epoch": 1.4528432732316228, "grad_norm": 0.30851002647805853, "learning_rate": 8.933935633819369e-06, "loss": 0.4258, "step": 2095 }, { "epoch": 1.4535367545076283, "grad_norm": 0.3594190765928513, "learning_rate": 8.93244105372066e-06, "loss": 0.4932, "step": 2096 }, { "epoch": 1.4542302357836339, "grad_norm": 0.3693326429186167, "learning_rate": 8.930945551886249e-06, "loss": 0.4843, "step": 2097 }, { "epoch": 1.4549237170596394, "grad_norm": 0.3797760938509028, "learning_rate": 8.92944912866667e-06, "loss": 0.4876, "step": 2098 }, { "epoch": 1.455617198335645, "grad_norm": 0.34168379646376157, "learning_rate": 8.927951784412673e-06, "loss": 0.4963, "step": 2099 }, { "epoch": 1.4563106796116505, "grad_norm": 0.30037460374802394, "learning_rate": 8.926453519475225e-06, "loss": 0.403, "step": 2100 }, { "epoch": 1.457004160887656, "grad_norm": 0.367983397258792, "learning_rate": 8.924954334205509e-06, "loss": 0.5647, "step": 2101 }, { "epoch": 1.4576976421636616, "grad_norm": 0.3220638697647383, "learning_rate": 8.923454228954924e-06, "loss": 0.4745, "step": 2102 }, { "epoch": 1.4583911234396671, "grad_norm": 0.3623342305949139, "learning_rate": 8.92195320407508e-06, "loss": 0.52, "step": 2103 }, { "epoch": 1.4590846047156727, "grad_norm": 0.41548187399238445, "learning_rate": 8.920451259917813e-06, "loss": 0.5143, "step": 2104 }, { "epoch": 1.4597780859916782, "grad_norm": 0.3386943994206969, "learning_rate": 8.918948396835161e-06, "loss": 0.5242, "step": 2105 }, { "epoch": 1.4604715672676838, "grad_norm": 0.335730677770548, "learning_rate": 8.917444615179386e-06, "loss": 0.4774, "step": 2106 }, { "epoch": 1.4611650485436893, "grad_norm": 0.37515893735896927, "learning_rate": 8.91593991530297e-06, "loss": 0.5748, "step": 2107 }, { "epoch": 1.4618585298196949, "grad_norm": 0.3623649893228703, "learning_rate": 8.914434297558594e-06, "loss": 0.5299, "step": 2108 }, { "epoch": 1.4625520110957004, "grad_norm": 0.34828253504299667, "learning_rate": 8.912927762299169e-06, "loss": 0.5234, "step": 2109 }, { "epoch": 1.463245492371706, "grad_norm": 0.3579084360582346, "learning_rate": 8.911420309877816e-06, "loss": 0.4325, "step": 2110 }, { "epoch": 1.4639389736477115, "grad_norm": 0.3471295356989196, "learning_rate": 8.909911940647868e-06, "loss": 0.4906, "step": 2111 }, { "epoch": 1.464632454923717, "grad_norm": 0.33132240937688123, "learning_rate": 8.90840265496288e-06, "loss": 0.5101, "step": 2112 }, { "epoch": 1.4653259361997226, "grad_norm": 0.8576544345183823, "learning_rate": 8.906892453176617e-06, "loss": 0.5016, "step": 2113 }, { "epoch": 1.4660194174757282, "grad_norm": 0.6223028504780337, "learning_rate": 8.905381335643056e-06, "loss": 0.4905, "step": 2114 }, { "epoch": 1.4667128987517337, "grad_norm": 0.34079976651899996, "learning_rate": 8.903869302716395e-06, "loss": 0.5331, "step": 2115 }, { "epoch": 1.4674063800277393, "grad_norm": 0.35387099693512847, "learning_rate": 8.902356354751042e-06, "loss": 0.5002, "step": 2116 }, { "epoch": 1.4680998613037448, "grad_norm": 0.33390282585804015, "learning_rate": 8.900842492101622e-06, "loss": 0.4827, "step": 2117 }, { "epoch": 1.4687933425797504, "grad_norm": 0.3484679866645666, "learning_rate": 8.899327715122972e-06, "loss": 0.4649, "step": 2118 }, { "epoch": 1.469486823855756, "grad_norm": 0.35427941169862176, "learning_rate": 8.897812024170147e-06, "loss": 0.5253, "step": 2119 }, { "epoch": 1.4701803051317615, "grad_norm": 0.3814576109457133, "learning_rate": 8.896295419598412e-06, "loss": 0.5234, "step": 2120 }, { "epoch": 1.470873786407767, "grad_norm": 0.3388632778408937, "learning_rate": 8.89477790176325e-06, "loss": 0.4988, "step": 2121 }, { "epoch": 1.4715672676837726, "grad_norm": 0.3681405474214004, "learning_rate": 8.893259471020354e-06, "loss": 0.5191, "step": 2122 }, { "epoch": 1.472260748959778, "grad_norm": 0.32775445125977226, "learning_rate": 8.891740127725634e-06, "loss": 0.5023, "step": 2123 }, { "epoch": 1.4729542302357836, "grad_norm": 0.3457111298038547, "learning_rate": 8.890219872235215e-06, "loss": 0.5326, "step": 2124 }, { "epoch": 1.4736477115117892, "grad_norm": 0.32876607089486615, "learning_rate": 8.888698704905431e-06, "loss": 0.475, "step": 2125 }, { "epoch": 1.4743411927877947, "grad_norm": 0.36121090069085415, "learning_rate": 8.887176626092836e-06, "loss": 0.519, "step": 2126 }, { "epoch": 1.4750346740638003, "grad_norm": 0.3294304857532917, "learning_rate": 8.88565363615419e-06, "loss": 0.5223, "step": 2127 }, { "epoch": 1.4757281553398058, "grad_norm": 0.3381813421678541, "learning_rate": 8.884129735446471e-06, "loss": 0.5335, "step": 2128 }, { "epoch": 1.4764216366158114, "grad_norm": 0.32304364687073783, "learning_rate": 8.882604924326877e-06, "loss": 0.4938, "step": 2129 }, { "epoch": 1.477115117891817, "grad_norm": 0.3299954486263169, "learning_rate": 8.881079203152805e-06, "loss": 0.5033, "step": 2130 }, { "epoch": 1.4778085991678225, "grad_norm": 0.3515561596055821, "learning_rate": 8.879552572281876e-06, "loss": 0.5462, "step": 2131 }, { "epoch": 1.478502080443828, "grad_norm": 0.3576475327911327, "learning_rate": 8.878025032071922e-06, "loss": 0.504, "step": 2132 }, { "epoch": 1.4791955617198336, "grad_norm": 0.3399071238302747, "learning_rate": 8.876496582880984e-06, "loss": 0.4852, "step": 2133 }, { "epoch": 1.4798890429958391, "grad_norm": 0.3581013682860097, "learning_rate": 8.874967225067325e-06, "loss": 0.498, "step": 2134 }, { "epoch": 1.4805825242718447, "grad_norm": 0.35887746174050233, "learning_rate": 8.873436958989409e-06, "loss": 0.524, "step": 2135 }, { "epoch": 1.4812760055478502, "grad_norm": 0.369093070770657, "learning_rate": 8.871905785005925e-06, "loss": 0.562, "step": 2136 }, { "epoch": 1.4819694868238558, "grad_norm": 0.3327975410271189, "learning_rate": 8.870373703475767e-06, "loss": 0.475, "step": 2137 }, { "epoch": 1.4826629680998613, "grad_norm": 0.34585276926972686, "learning_rate": 8.868840714758043e-06, "loss": 0.4356, "step": 2138 }, { "epoch": 1.4833564493758669, "grad_norm": 0.3431727946927612, "learning_rate": 8.867306819212074e-06, "loss": 0.5257, "step": 2139 }, { "epoch": 1.4840499306518724, "grad_norm": 0.31659786173406046, "learning_rate": 8.865772017197395e-06, "loss": 0.4091, "step": 2140 }, { "epoch": 1.484743411927878, "grad_norm": 0.3289203283433317, "learning_rate": 8.864236309073753e-06, "loss": 0.446, "step": 2141 }, { "epoch": 1.4854368932038835, "grad_norm": 0.34570866516093024, "learning_rate": 8.862699695201107e-06, "loss": 0.455, "step": 2142 }, { "epoch": 1.486130374479889, "grad_norm": 0.3243325705552734, "learning_rate": 8.861162175939626e-06, "loss": 0.4854, "step": 2143 }, { "epoch": 1.4868238557558946, "grad_norm": 0.4633649442916241, "learning_rate": 8.859623751649696e-06, "loss": 0.4723, "step": 2144 }, { "epoch": 1.4875173370319001, "grad_norm": 0.3783059857256445, "learning_rate": 8.858084422691911e-06, "loss": 0.55, "step": 2145 }, { "epoch": 1.4882108183079057, "grad_norm": 0.32371798362830867, "learning_rate": 8.856544189427078e-06, "loss": 0.4608, "step": 2146 }, { "epoch": 1.4889042995839112, "grad_norm": 0.35498144049642766, "learning_rate": 8.855003052216219e-06, "loss": 0.5067, "step": 2147 }, { "epoch": 1.4895977808599168, "grad_norm": 0.3158213403639081, "learning_rate": 8.853461011420563e-06, "loss": 0.4768, "step": 2148 }, { "epoch": 1.4902912621359223, "grad_norm": 0.38794064014236784, "learning_rate": 8.851918067401552e-06, "loss": 0.5308, "step": 2149 }, { "epoch": 1.4909847434119279, "grad_norm": 0.39633647521149623, "learning_rate": 8.850374220520845e-06, "loss": 0.5074, "step": 2150 }, { "epoch": 1.4916782246879334, "grad_norm": 0.3486306393934178, "learning_rate": 8.848829471140308e-06, "loss": 0.4879, "step": 2151 }, { "epoch": 1.492371705963939, "grad_norm": 0.4664071746463165, "learning_rate": 8.847283819622015e-06, "loss": 0.5047, "step": 2152 }, { "epoch": 1.4930651872399445, "grad_norm": 0.32723494962213, "learning_rate": 8.845737266328258e-06, "loss": 0.49, "step": 2153 }, { "epoch": 1.49375866851595, "grad_norm": 0.3251236319193131, "learning_rate": 8.84418981162154e-06, "loss": 0.4836, "step": 2154 }, { "epoch": 1.4944521497919556, "grad_norm": 0.3583769316924854, "learning_rate": 8.842641455864568e-06, "loss": 0.5113, "step": 2155 }, { "epoch": 1.4951456310679612, "grad_norm": 0.4026476547769237, "learning_rate": 8.84109219942027e-06, "loss": 0.5318, "step": 2156 }, { "epoch": 1.4958391123439667, "grad_norm": 0.3867651071790106, "learning_rate": 8.83954204265178e-06, "loss": 0.454, "step": 2157 }, { "epoch": 1.4965325936199723, "grad_norm": 0.360269646116235, "learning_rate": 8.837990985922442e-06, "loss": 0.5314, "step": 2158 }, { "epoch": 1.4972260748959778, "grad_norm": 0.342018217831643, "learning_rate": 8.836439029595811e-06, "loss": 0.4687, "step": 2159 }, { "epoch": 1.4979195561719834, "grad_norm": 0.343738873338028, "learning_rate": 8.83488617403566e-06, "loss": 0.482, "step": 2160 }, { "epoch": 1.498613037447989, "grad_norm": 0.34365928684064706, "learning_rate": 8.83333241960596e-06, "loss": 0.5088, "step": 2161 }, { "epoch": 1.4993065187239945, "grad_norm": 0.32521792862418825, "learning_rate": 8.831777766670904e-06, "loss": 0.4986, "step": 2162 }, { "epoch": 1.5, "grad_norm": 0.3372337645649222, "learning_rate": 8.83022221559489e-06, "loss": 0.4881, "step": 2163 }, { "epoch": 1.5006934812760055, "grad_norm": 0.3338903724583596, "learning_rate": 8.82866576674253e-06, "loss": 0.5232, "step": 2164 }, { "epoch": 1.501386962552011, "grad_norm": 0.34114613890599516, "learning_rate": 8.827108420478643e-06, "loss": 0.5049, "step": 2165 }, { "epoch": 1.5020804438280166, "grad_norm": 0.3574385595787303, "learning_rate": 8.825550177168258e-06, "loss": 0.5112, "step": 2166 }, { "epoch": 1.5027739251040222, "grad_norm": 0.3591715625156868, "learning_rate": 8.823991037176618e-06, "loss": 0.5137, "step": 2167 }, { "epoch": 1.5034674063800277, "grad_norm": 0.2982555515327498, "learning_rate": 8.822431000869173e-06, "loss": 0.4842, "step": 2168 }, { "epoch": 1.5041608876560333, "grad_norm": 0.343208379987108, "learning_rate": 8.820870068611585e-06, "loss": 0.5075, "step": 2169 }, { "epoch": 1.5048543689320388, "grad_norm": 0.3358606227046012, "learning_rate": 8.819308240769726e-06, "loss": 0.5765, "step": 2170 }, { "epoch": 1.5055478502080444, "grad_norm": 0.357421882435548, "learning_rate": 8.817745517709675e-06, "loss": 0.4963, "step": 2171 }, { "epoch": 1.50624133148405, "grad_norm": 0.3151770170586487, "learning_rate": 8.816181899797725e-06, "loss": 0.5054, "step": 2172 }, { "epoch": 1.5069348127600555, "grad_norm": 0.36371559438674267, "learning_rate": 8.814617387400373e-06, "loss": 0.5025, "step": 2173 }, { "epoch": 1.507628294036061, "grad_norm": 0.3395056390621181, "learning_rate": 8.813051980884336e-06, "loss": 0.4751, "step": 2174 }, { "epoch": 1.5083217753120666, "grad_norm": 0.3420029494780606, "learning_rate": 8.811485680616527e-06, "loss": 0.4887, "step": 2175 }, { "epoch": 1.5090152565880721, "grad_norm": 0.3508653123781212, "learning_rate": 8.809918486964079e-06, "loss": 0.5213, "step": 2176 }, { "epoch": 1.5097087378640777, "grad_norm": 0.4725204458538169, "learning_rate": 8.808350400294332e-06, "loss": 0.5489, "step": 2177 }, { "epoch": 1.5104022191400832, "grad_norm": 0.36416513504755693, "learning_rate": 8.806781420974832e-06, "loss": 0.515, "step": 2178 }, { "epoch": 1.5110957004160888, "grad_norm": 0.343763042212301, "learning_rate": 8.805211549373335e-06, "loss": 0.5338, "step": 2179 }, { "epoch": 1.5117891816920943, "grad_norm": 0.6018582584105034, "learning_rate": 8.803640785857811e-06, "loss": 0.4995, "step": 2180 }, { "epoch": 1.5124826629680999, "grad_norm": 0.3578014597280583, "learning_rate": 8.802069130796436e-06, "loss": 0.5633, "step": 2181 }, { "epoch": 1.5131761442441054, "grad_norm": 0.3319497961863665, "learning_rate": 8.80049658455759e-06, "loss": 0.4817, "step": 2182 }, { "epoch": 1.513869625520111, "grad_norm": 0.3657315763626466, "learning_rate": 8.79892314750987e-06, "loss": 0.4595, "step": 2183 }, { "epoch": 1.5145631067961165, "grad_norm": 0.3494130129852575, "learning_rate": 8.797348820022079e-06, "loss": 0.5178, "step": 2184 }, { "epoch": 1.515256588072122, "grad_norm": 0.31213717070599123, "learning_rate": 8.795773602463223e-06, "loss": 0.4673, "step": 2185 }, { "epoch": 1.5159500693481276, "grad_norm": 0.3680008443335597, "learning_rate": 8.794197495202525e-06, "loss": 0.5085, "step": 2186 }, { "epoch": 1.5166435506241331, "grad_norm": 0.3483466342828103, "learning_rate": 8.792620498609416e-06, "loss": 0.5171, "step": 2187 }, { "epoch": 1.5173370319001387, "grad_norm": 0.3156290430000329, "learning_rate": 8.791042613053527e-06, "loss": 0.4657, "step": 2188 }, { "epoch": 1.5180305131761442, "grad_norm": 0.3558287728094569, "learning_rate": 8.789463838904707e-06, "loss": 0.4955, "step": 2189 }, { "epoch": 1.5187239944521498, "grad_norm": 0.3613488978910829, "learning_rate": 8.787884176533007e-06, "loss": 0.5479, "step": 2190 }, { "epoch": 1.5194174757281553, "grad_norm": 0.3490092046960001, "learning_rate": 8.78630362630869e-06, "loss": 0.4182, "step": 2191 }, { "epoch": 1.5201109570041609, "grad_norm": 0.332937402859545, "learning_rate": 8.784722188602224e-06, "loss": 0.5469, "step": 2192 }, { "epoch": 1.5208044382801664, "grad_norm": 0.35683842696023627, "learning_rate": 8.783139863784287e-06, "loss": 0.5153, "step": 2193 }, { "epoch": 1.521497919556172, "grad_norm": 0.37203079646420295, "learning_rate": 8.781556652225765e-06, "loss": 0.5426, "step": 2194 }, { "epoch": 1.5221914008321775, "grad_norm": 0.33946884040932535, "learning_rate": 8.779972554297752e-06, "loss": 0.5234, "step": 2195 }, { "epoch": 1.522884882108183, "grad_norm": 0.40931857078688355, "learning_rate": 8.778387570371544e-06, "loss": 0.5656, "step": 2196 }, { "epoch": 1.5235783633841886, "grad_norm": 0.3415686478395844, "learning_rate": 8.776801700818658e-06, "loss": 0.5285, "step": 2197 }, { "epoch": 1.5242718446601942, "grad_norm": 0.3390355908928575, "learning_rate": 8.775214946010806e-06, "loss": 0.486, "step": 2198 }, { "epoch": 1.5249653259361997, "grad_norm": 0.37005348586484604, "learning_rate": 8.773627306319912e-06, "loss": 0.4675, "step": 2199 }, { "epoch": 1.5256588072122053, "grad_norm": 0.3403610720395017, "learning_rate": 8.772038782118106e-06, "loss": 0.4876, "step": 2200 }, { "epoch": 1.5263522884882108, "grad_norm": 0.32314497663843095, "learning_rate": 8.770449373777729e-06, "loss": 0.4879, "step": 2201 }, { "epoch": 1.5270457697642164, "grad_norm": 0.33696459431857195, "learning_rate": 8.768859081671323e-06, "loss": 0.5044, "step": 2202 }, { "epoch": 1.527739251040222, "grad_norm": 0.33474549090149996, "learning_rate": 8.767267906171647e-06, "loss": 0.4893, "step": 2203 }, { "epoch": 1.5284327323162274, "grad_norm": 0.35168303363530057, "learning_rate": 8.765675847651655e-06, "loss": 0.4831, "step": 2204 }, { "epoch": 1.529126213592233, "grad_norm": 0.33683092114089147, "learning_rate": 8.764082906484518e-06, "loss": 0.4908, "step": 2205 }, { "epoch": 1.5298196948682385, "grad_norm": 0.3427384026255094, "learning_rate": 8.76248908304361e-06, "loss": 0.4932, "step": 2206 }, { "epoch": 1.530513176144244, "grad_norm": 0.3365800012479881, "learning_rate": 8.760894377702508e-06, "loss": 0.5293, "step": 2207 }, { "epoch": 1.5312066574202496, "grad_norm": 0.32274601620790755, "learning_rate": 8.759298790835002e-06, "loss": 0.4612, "step": 2208 }, { "epoch": 1.5319001386962552, "grad_norm": 0.5669181402136003, "learning_rate": 8.757702322815086e-06, "loss": 0.5368, "step": 2209 }, { "epoch": 1.5325936199722607, "grad_norm": 0.3433752220386639, "learning_rate": 8.756104974016959e-06, "loss": 0.4821, "step": 2210 }, { "epoch": 1.5332871012482663, "grad_norm": 0.332519921079417, "learning_rate": 8.754506744815031e-06, "loss": 0.4661, "step": 2211 }, { "epoch": 1.5339805825242718, "grad_norm": 0.3640908473928857, "learning_rate": 8.752907635583911e-06, "loss": 0.5831, "step": 2212 }, { "epoch": 1.5346740638002774, "grad_norm": 0.3528664268276014, "learning_rate": 8.751307646698423e-06, "loss": 0.5935, "step": 2213 }, { "epoch": 1.535367545076283, "grad_norm": 0.3262030255599233, "learning_rate": 8.74970677853359e-06, "loss": 0.4768, "step": 2214 }, { "epoch": 1.5360610263522885, "grad_norm": 0.3654045691376196, "learning_rate": 8.748105031464644e-06, "loss": 0.4644, "step": 2215 }, { "epoch": 1.536754507628294, "grad_norm": 0.32899870313543844, "learning_rate": 8.746502405867025e-06, "loss": 0.5342, "step": 2216 }, { "epoch": 1.5374479889042996, "grad_norm": 0.327544087264539, "learning_rate": 8.744898902116375e-06, "loss": 0.4568, "step": 2217 }, { "epoch": 1.5381414701803051, "grad_norm": 0.32970059808450525, "learning_rate": 8.743294520588545e-06, "loss": 0.4778, "step": 2218 }, { "epoch": 1.5388349514563107, "grad_norm": 0.34241561151168054, "learning_rate": 8.74168926165959e-06, "loss": 0.5069, "step": 2219 }, { "epoch": 1.5395284327323162, "grad_norm": 0.38376640762900927, "learning_rate": 8.740083125705769e-06, "loss": 0.5319, "step": 2220 }, { "epoch": 1.5402219140083218, "grad_norm": 0.34473123025809793, "learning_rate": 8.738476113103551e-06, "loss": 0.4952, "step": 2221 }, { "epoch": 1.5409153952843273, "grad_norm": 0.33219211908462165, "learning_rate": 8.736868224229606e-06, "loss": 0.5298, "step": 2222 }, { "epoch": 1.5416088765603329, "grad_norm": 0.3975438898102467, "learning_rate": 8.735259459460813e-06, "loss": 0.5781, "step": 2223 }, { "epoch": 1.5423023578363384, "grad_norm": 0.34285171530903735, "learning_rate": 8.733649819174257e-06, "loss": 0.5105, "step": 2224 }, { "epoch": 1.542995839112344, "grad_norm": 0.3444838753241053, "learning_rate": 8.732039303747223e-06, "loss": 0.5485, "step": 2225 }, { "epoch": 1.5436893203883495, "grad_norm": 0.4314287733836552, "learning_rate": 8.730427913557205e-06, "loss": 0.4907, "step": 2226 }, { "epoch": 1.544382801664355, "grad_norm": 0.3625342086114008, "learning_rate": 8.7288156489819e-06, "loss": 0.5203, "step": 2227 }, { "epoch": 1.5450762829403606, "grad_norm": 0.375773964504486, "learning_rate": 8.727202510399213e-06, "loss": 0.4542, "step": 2228 }, { "epoch": 1.5457697642163661, "grad_norm": 0.3790830361898822, "learning_rate": 8.725588498187251e-06, "loss": 0.5421, "step": 2229 }, { "epoch": 1.5464632454923717, "grad_norm": 0.33287718148572004, "learning_rate": 8.723973612724328e-06, "loss": 0.524, "step": 2230 }, { "epoch": 1.5471567267683772, "grad_norm": 0.3190052764715288, "learning_rate": 8.722357854388958e-06, "loss": 0.4854, "step": 2231 }, { "epoch": 1.5478502080443828, "grad_norm": 0.32989629940199827, "learning_rate": 8.720741223559867e-06, "loss": 0.5089, "step": 2232 }, { "epoch": 1.5485436893203883, "grad_norm": 0.3553590645382289, "learning_rate": 8.71912372061598e-06, "loss": 0.4597, "step": 2233 }, { "epoch": 1.5492371705963939, "grad_norm": 0.3444501626690935, "learning_rate": 8.71750534593643e-06, "loss": 0.5094, "step": 2234 }, { "epoch": 1.5499306518723994, "grad_norm": 0.362346575247488, "learning_rate": 8.715886099900547e-06, "loss": 0.4871, "step": 2235 }, { "epoch": 1.550624133148405, "grad_norm": 0.32275597228122355, "learning_rate": 8.714265982887875e-06, "loss": 0.4799, "step": 2236 }, { "epoch": 1.5513176144244105, "grad_norm": 0.3321687215820013, "learning_rate": 8.712644995278157e-06, "loss": 0.489, "step": 2237 }, { "epoch": 1.552011095700416, "grad_norm": 0.3230531682078885, "learning_rate": 8.711023137451343e-06, "loss": 0.4948, "step": 2238 }, { "epoch": 1.5527045769764216, "grad_norm": 0.33731908056730714, "learning_rate": 8.709400409787579e-06, "loss": 0.551, "step": 2239 }, { "epoch": 1.5533980582524272, "grad_norm": 0.35789621588271175, "learning_rate": 8.707776812667224e-06, "loss": 0.5029, "step": 2240 }, { "epoch": 1.5540915395284327, "grad_norm": 0.34244349742755764, "learning_rate": 8.706152346470836e-06, "loss": 0.4838, "step": 2241 }, { "epoch": 1.5547850208044383, "grad_norm": 0.3735191834740914, "learning_rate": 8.704527011579181e-06, "loss": 0.5116, "step": 2242 }, { "epoch": 1.5554785020804438, "grad_norm": 0.3371303277672469, "learning_rate": 8.702900808373223e-06, "loss": 0.5015, "step": 2243 }, { "epoch": 1.5561719833564494, "grad_norm": 0.3790896432244701, "learning_rate": 8.701273737234133e-06, "loss": 0.4674, "step": 2244 }, { "epoch": 1.556865464632455, "grad_norm": 0.33844643340669506, "learning_rate": 8.699645798543286e-06, "loss": 0.5107, "step": 2245 }, { "epoch": 1.5575589459084604, "grad_norm": 0.34770428537631803, "learning_rate": 8.698016992682257e-06, "loss": 0.4917, "step": 2246 }, { "epoch": 1.558252427184466, "grad_norm": 0.3245168369391238, "learning_rate": 8.696387320032827e-06, "loss": 0.4983, "step": 2247 }, { "epoch": 1.5589459084604715, "grad_norm": 0.3332521517448146, "learning_rate": 8.694756780976981e-06, "loss": 0.5166, "step": 2248 }, { "epoch": 1.559639389736477, "grad_norm": 0.4033705446210668, "learning_rate": 8.693125375896903e-06, "loss": 0.528, "step": 2249 }, { "epoch": 1.5603328710124826, "grad_norm": 0.30375975024798835, "learning_rate": 8.691493105174984e-06, "loss": 0.4417, "step": 2250 }, { "epoch": 1.5610263522884882, "grad_norm": 0.32536134105180453, "learning_rate": 8.689859969193817e-06, "loss": 0.4784, "step": 2251 }, { "epoch": 1.5617198335644937, "grad_norm": 0.3569204969265695, "learning_rate": 8.688225968336196e-06, "loss": 0.4372, "step": 2252 }, { "epoch": 1.5624133148404993, "grad_norm": 0.3345953434225004, "learning_rate": 8.686591102985118e-06, "loss": 0.412, "step": 2253 }, { "epoch": 1.5631067961165048, "grad_norm": 0.3449520006433837, "learning_rate": 8.684955373523787e-06, "loss": 0.4837, "step": 2254 }, { "epoch": 1.5638002773925104, "grad_norm": 0.3373259756591131, "learning_rate": 8.683318780335604e-06, "loss": 0.4888, "step": 2255 }, { "epoch": 1.564493758668516, "grad_norm": 0.4129542149778871, "learning_rate": 8.681681323804173e-06, "loss": 0.4928, "step": 2256 }, { "epoch": 1.5651872399445215, "grad_norm": 0.34042187219101755, "learning_rate": 8.680043004313306e-06, "loss": 0.502, "step": 2257 }, { "epoch": 1.565880721220527, "grad_norm": 0.3631752829877917, "learning_rate": 8.67840382224701e-06, "loss": 0.5212, "step": 2258 }, { "epoch": 1.5665742024965326, "grad_norm": 0.35915220713464724, "learning_rate": 8.676763777989496e-06, "loss": 0.4883, "step": 2259 }, { "epoch": 1.5672676837725381, "grad_norm": 0.3395023662489134, "learning_rate": 8.675122871925183e-06, "loss": 0.4934, "step": 2260 }, { "epoch": 1.5679611650485437, "grad_norm": 0.33741239069706436, "learning_rate": 8.673481104438685e-06, "loss": 0.5191, "step": 2261 }, { "epoch": 1.5686546463245492, "grad_norm": 0.3194854552562877, "learning_rate": 8.671838475914822e-06, "loss": 0.4698, "step": 2262 }, { "epoch": 1.5693481276005548, "grad_norm": 0.32188779556439207, "learning_rate": 8.670194986738612e-06, "loss": 0.4829, "step": 2263 }, { "epoch": 1.5700416088765603, "grad_norm": 0.3721520248916597, "learning_rate": 8.668550637295277e-06, "loss": 0.522, "step": 2264 }, { "epoch": 1.5707350901525658, "grad_norm": 0.35028826293968157, "learning_rate": 8.666905427970243e-06, "loss": 0.4901, "step": 2265 }, { "epoch": 1.5714285714285714, "grad_norm": 0.3639130105475345, "learning_rate": 8.665259359149132e-06, "loss": 0.5087, "step": 2266 }, { "epoch": 1.572122052704577, "grad_norm": 0.5190313296340657, "learning_rate": 8.663612431217774e-06, "loss": 0.5392, "step": 2267 }, { "epoch": 1.5728155339805825, "grad_norm": 0.38849983750330797, "learning_rate": 8.661964644562194e-06, "loss": 0.5082, "step": 2268 }, { "epoch": 1.573509015256588, "grad_norm": 0.3531878311721917, "learning_rate": 8.660315999568623e-06, "loss": 0.4826, "step": 2269 }, { "epoch": 1.5742024965325936, "grad_norm": 0.3615359411346645, "learning_rate": 8.658666496623492e-06, "loss": 0.5247, "step": 2270 }, { "epoch": 1.5748959778085991, "grad_norm": 0.3569792692889927, "learning_rate": 8.65701613611343e-06, "loss": 0.5474, "step": 2271 }, { "epoch": 1.5755894590846047, "grad_norm": 0.33357778315429043, "learning_rate": 8.65536491842527e-06, "loss": 0.5327, "step": 2272 }, { "epoch": 1.5762829403606102, "grad_norm": 0.3515711698917928, "learning_rate": 8.653712843946048e-06, "loss": 0.5045, "step": 2273 }, { "epoch": 1.5769764216366158, "grad_norm": 0.3733973745796123, "learning_rate": 8.652059913062998e-06, "loss": 0.4955, "step": 2274 }, { "epoch": 1.5776699029126213, "grad_norm": 0.5170113877554505, "learning_rate": 8.650406126163553e-06, "loss": 0.4766, "step": 2275 }, { "epoch": 1.5783633841886269, "grad_norm": 0.3808574808748637, "learning_rate": 8.648751483635349e-06, "loss": 0.4612, "step": 2276 }, { "epoch": 1.5790568654646324, "grad_norm": 0.33085709681247133, "learning_rate": 8.647095985866222e-06, "loss": 0.5142, "step": 2277 }, { "epoch": 1.579750346740638, "grad_norm": 0.32359313611043633, "learning_rate": 8.64543963324421e-06, "loss": 0.4902, "step": 2278 }, { "epoch": 1.5804438280166435, "grad_norm": 0.3420322606933061, "learning_rate": 8.64378242615755e-06, "loss": 0.4427, "step": 2279 }, { "epoch": 1.581137309292649, "grad_norm": 0.34829402470440507, "learning_rate": 8.642124364994678e-06, "loss": 0.5111, "step": 2280 }, { "epoch": 1.5818307905686546, "grad_norm": 0.37885662645245954, "learning_rate": 8.640465450144232e-06, "loss": 0.5464, "step": 2281 }, { "epoch": 1.5825242718446602, "grad_norm": 0.35659453895805726, "learning_rate": 8.638805681995052e-06, "loss": 0.515, "step": 2282 }, { "epoch": 1.5832177531206657, "grad_norm": 0.35062405710868483, "learning_rate": 8.637145060936172e-06, "loss": 0.5005, "step": 2283 }, { "epoch": 1.5839112343966713, "grad_norm": 0.3742523026598048, "learning_rate": 8.635483587356833e-06, "loss": 0.5186, "step": 2284 }, { "epoch": 1.5846047156726768, "grad_norm": 0.32541801716115343, "learning_rate": 8.63382126164647e-06, "loss": 0.5098, "step": 2285 }, { "epoch": 1.5852981969486823, "grad_norm": 0.3464661377434391, "learning_rate": 8.632158084194718e-06, "loss": 0.4817, "step": 2286 }, { "epoch": 1.585991678224688, "grad_norm": 0.33903649571410066, "learning_rate": 8.630494055391418e-06, "loss": 0.4999, "step": 2287 }, { "epoch": 1.5866851595006934, "grad_norm": 0.35932718031968586, "learning_rate": 8.628829175626605e-06, "loss": 0.4996, "step": 2288 }, { "epoch": 1.587378640776699, "grad_norm": 0.35072726408323124, "learning_rate": 8.627163445290514e-06, "loss": 0.4932, "step": 2289 }, { "epoch": 1.5880721220527045, "grad_norm": 0.33516518196198114, "learning_rate": 8.625496864773581e-06, "loss": 0.4947, "step": 2290 }, { "epoch": 1.58876560332871, "grad_norm": 0.3206029389144817, "learning_rate": 8.62382943446644e-06, "loss": 0.4845, "step": 2291 }, { "epoch": 1.5894590846047156, "grad_norm": 0.3519214815637636, "learning_rate": 8.622161154759925e-06, "loss": 0.5381, "step": 2292 }, { "epoch": 1.5901525658807212, "grad_norm": 0.3407678357986744, "learning_rate": 8.620492026045067e-06, "loss": 0.4865, "step": 2293 }, { "epoch": 1.5908460471567267, "grad_norm": 0.34776026433527907, "learning_rate": 8.6188220487131e-06, "loss": 0.4531, "step": 2294 }, { "epoch": 1.5915395284327323, "grad_norm": 0.4015731021920983, "learning_rate": 8.617151223155453e-06, "loss": 0.5543, "step": 2295 }, { "epoch": 1.5922330097087378, "grad_norm": 0.3122812610509576, "learning_rate": 8.615479549763756e-06, "loss": 0.5013, "step": 2296 }, { "epoch": 1.5929264909847434, "grad_norm": 0.6181767435580547, "learning_rate": 8.613807028929837e-06, "loss": 0.4736, "step": 2297 }, { "epoch": 1.593619972260749, "grad_norm": 0.35460464085136595, "learning_rate": 8.612133661045724e-06, "loss": 0.5251, "step": 2298 }, { "epoch": 1.5943134535367545, "grad_norm": 0.34417225984133837, "learning_rate": 8.610459446503641e-06, "loss": 0.5461, "step": 2299 }, { "epoch": 1.59500693481276, "grad_norm": 0.3376208103562316, "learning_rate": 8.60878438569601e-06, "loss": 0.5495, "step": 2300 }, { "epoch": 1.5957004160887656, "grad_norm": 0.31049008765757147, "learning_rate": 8.607108479015456e-06, "loss": 0.4468, "step": 2301 }, { "epoch": 1.596393897364771, "grad_norm": 0.32866297624409146, "learning_rate": 8.605431726854798e-06, "loss": 0.4943, "step": 2302 }, { "epoch": 1.5970873786407767, "grad_norm": 0.4131569653907188, "learning_rate": 8.603754129607055e-06, "loss": 0.5033, "step": 2303 }, { "epoch": 1.5977808599167822, "grad_norm": 0.3939326141839437, "learning_rate": 8.602075687665445e-06, "loss": 0.5686, "step": 2304 }, { "epoch": 1.5984743411927878, "grad_norm": 0.3686498367717291, "learning_rate": 8.600396401423382e-06, "loss": 0.5072, "step": 2305 }, { "epoch": 1.5991678224687933, "grad_norm": 0.32625710778831524, "learning_rate": 8.598716271274475e-06, "loss": 0.4786, "step": 2306 }, { "epoch": 1.5998613037447988, "grad_norm": 0.3222713337884579, "learning_rate": 8.597035297612537e-06, "loss": 0.4692, "step": 2307 }, { "epoch": 1.6005547850208044, "grad_norm": 0.31254029594108695, "learning_rate": 8.595353480831579e-06, "loss": 0.468, "step": 2308 }, { "epoch": 1.60124826629681, "grad_norm": 0.3706521656367522, "learning_rate": 8.5936708213258e-06, "loss": 0.5644, "step": 2309 }, { "epoch": 1.6019417475728155, "grad_norm": 0.49448762398620105, "learning_rate": 8.591987319489612e-06, "loss": 0.5405, "step": 2310 }, { "epoch": 1.602635228848821, "grad_norm": 0.30149327734099346, "learning_rate": 8.590302975717608e-06, "loss": 0.4343, "step": 2311 }, { "epoch": 1.6033287101248266, "grad_norm": 0.3403371891151002, "learning_rate": 8.58861779040459e-06, "loss": 0.5056, "step": 2312 }, { "epoch": 1.6040221914008321, "grad_norm": 0.3133703130401212, "learning_rate": 8.58693176394555e-06, "loss": 0.5165, "step": 2313 }, { "epoch": 1.6047156726768377, "grad_norm": 0.4080524394945862, "learning_rate": 8.585244896735683e-06, "loss": 0.4862, "step": 2314 }, { "epoch": 1.6054091539528432, "grad_norm": 0.32544398241823613, "learning_rate": 8.583557189170378e-06, "loss": 0.4556, "step": 2315 }, { "epoch": 1.6061026352288488, "grad_norm": 0.4049591006774152, "learning_rate": 8.58186864164522e-06, "loss": 0.5284, "step": 2316 }, { "epoch": 1.6067961165048543, "grad_norm": 0.32844390352811, "learning_rate": 8.580179254555997e-06, "loss": 0.471, "step": 2317 }, { "epoch": 1.6074895977808599, "grad_norm": 0.3339450897007203, "learning_rate": 8.578489028298682e-06, "loss": 0.4949, "step": 2318 }, { "epoch": 1.6081830790568654, "grad_norm": 0.316657542337206, "learning_rate": 8.576797963269457e-06, "loss": 0.4694, "step": 2319 }, { "epoch": 1.608876560332871, "grad_norm": 0.34848180397483325, "learning_rate": 8.575106059864692e-06, "loss": 0.5174, "step": 2320 }, { "epoch": 1.6095700416088765, "grad_norm": 0.34531692714722445, "learning_rate": 8.573413318480962e-06, "loss": 0.4592, "step": 2321 }, { "epoch": 1.610263522884882, "grad_norm": 0.3297192718940854, "learning_rate": 8.571719739515027e-06, "loss": 0.517, "step": 2322 }, { "epoch": 1.6109570041608876, "grad_norm": 0.5006368300085653, "learning_rate": 8.570025323363853e-06, "loss": 0.5746, "step": 2323 }, { "epoch": 1.6116504854368932, "grad_norm": 0.3338139895127677, "learning_rate": 8.5683300704246e-06, "loss": 0.4648, "step": 2324 }, { "epoch": 1.6123439667128987, "grad_norm": 0.3102976806507002, "learning_rate": 8.566633981094621e-06, "loss": 0.4877, "step": 2325 }, { "epoch": 1.6130374479889042, "grad_norm": 0.3525751147871692, "learning_rate": 8.564937055771468e-06, "loss": 0.5632, "step": 2326 }, { "epoch": 1.6137309292649098, "grad_norm": 0.33655687426200803, "learning_rate": 8.563239294852885e-06, "loss": 0.5249, "step": 2327 }, { "epoch": 1.6144244105409153, "grad_norm": 0.3341375490350698, "learning_rate": 8.561540698736821e-06, "loss": 0.5165, "step": 2328 }, { "epoch": 1.615117891816921, "grad_norm": 0.3647406383552219, "learning_rate": 8.559841267821409e-06, "loss": 0.5521, "step": 2329 }, { "epoch": 1.6158113730929264, "grad_norm": 0.33799324385201407, "learning_rate": 8.558141002504987e-06, "loss": 0.5019, "step": 2330 }, { "epoch": 1.616504854368932, "grad_norm": 0.31136255655238165, "learning_rate": 8.556439903186082e-06, "loss": 0.4336, "step": 2331 }, { "epoch": 1.6171983356449375, "grad_norm": 0.3331417838237942, "learning_rate": 8.55473797026342e-06, "loss": 0.4883, "step": 2332 }, { "epoch": 1.617891816920943, "grad_norm": 0.3077342605630263, "learning_rate": 8.553035204135925e-06, "loss": 0.4584, "step": 2333 }, { "epoch": 1.6185852981969486, "grad_norm": 0.31512207029311223, "learning_rate": 8.551331605202708e-06, "loss": 0.4595, "step": 2334 }, { "epoch": 1.6192787794729542, "grad_norm": 0.3100505336704164, "learning_rate": 8.549627173863085e-06, "loss": 0.4833, "step": 2335 }, { "epoch": 1.6199722607489597, "grad_norm": 0.3348803822961055, "learning_rate": 8.547921910516556e-06, "loss": 0.4647, "step": 2336 }, { "epoch": 1.6206657420249653, "grad_norm": 0.35412026804913915, "learning_rate": 8.546215815562831e-06, "loss": 0.456, "step": 2337 }, { "epoch": 1.6213592233009708, "grad_norm": 0.3373130549490593, "learning_rate": 8.544508889401799e-06, "loss": 0.4551, "step": 2338 }, { "epoch": 1.6220527045769764, "grad_norm": 0.3416280756280188, "learning_rate": 8.542801132433554e-06, "loss": 0.5362, "step": 2339 }, { "epoch": 1.622746185852982, "grad_norm": 0.3547589986333213, "learning_rate": 8.541092545058383e-06, "loss": 0.5314, "step": 2340 }, { "epoch": 1.6234396671289875, "grad_norm": 0.34010660436726803, "learning_rate": 8.539383127676764e-06, "loss": 0.5294, "step": 2341 }, { "epoch": 1.624133148404993, "grad_norm": 0.33219758876713085, "learning_rate": 8.537672880689374e-06, "loss": 0.4964, "step": 2342 }, { "epoch": 1.6248266296809986, "grad_norm": 0.3381789141756323, "learning_rate": 8.535961804497081e-06, "loss": 0.4363, "step": 2343 }, { "epoch": 1.6255201109570043, "grad_norm": 0.3421398607098175, "learning_rate": 8.53424989950095e-06, "loss": 0.5314, "step": 2344 }, { "epoch": 1.6262135922330097, "grad_norm": 0.3926612390608668, "learning_rate": 8.53253716610224e-06, "loss": 0.5333, "step": 2345 }, { "epoch": 1.6269070735090154, "grad_norm": 0.31329313173084145, "learning_rate": 8.530823604702402e-06, "loss": 0.4712, "step": 2346 }, { "epoch": 1.6276005547850207, "grad_norm": 0.32639142826056655, "learning_rate": 8.529109215703082e-06, "loss": 0.4718, "step": 2347 }, { "epoch": 1.6282940360610265, "grad_norm": 0.35821919319031414, "learning_rate": 8.52739399950612e-06, "loss": 0.4757, "step": 2348 }, { "epoch": 1.6289875173370318, "grad_norm": 0.3585954356440912, "learning_rate": 8.525677956513552e-06, "loss": 0.4417, "step": 2349 }, { "epoch": 1.6296809986130376, "grad_norm": 0.3688836681015824, "learning_rate": 8.523961087127605e-06, "loss": 0.532, "step": 2350 }, { "epoch": 1.630374479889043, "grad_norm": 0.3670613238165888, "learning_rate": 8.522243391750699e-06, "loss": 0.4667, "step": 2351 }, { "epoch": 1.6310679611650487, "grad_norm": 0.34101555271089323, "learning_rate": 8.520524870785453e-06, "loss": 0.5179, "step": 2352 }, { "epoch": 1.631761442441054, "grad_norm": 0.35111151709950245, "learning_rate": 8.518805524634675e-06, "loss": 0.5117, "step": 2353 }, { "epoch": 1.6324549237170598, "grad_norm": 0.3317940926638129, "learning_rate": 8.517085353701366e-06, "loss": 0.5048, "step": 2354 }, { "epoch": 1.6331484049930651, "grad_norm": 0.37414614984125183, "learning_rate": 8.515364358388722e-06, "loss": 0.5789, "step": 2355 }, { "epoch": 1.633841886269071, "grad_norm": 0.3376622621389358, "learning_rate": 8.51364253910013e-06, "loss": 0.4428, "step": 2356 }, { "epoch": 1.6345353675450762, "grad_norm": 0.34426885902052357, "learning_rate": 8.511919896239176e-06, "loss": 0.4405, "step": 2357 }, { "epoch": 1.635228848821082, "grad_norm": 0.3847237659073976, "learning_rate": 8.510196430209632e-06, "loss": 0.4971, "step": 2358 }, { "epoch": 1.6359223300970873, "grad_norm": 0.37985385094614077, "learning_rate": 8.508472141415468e-06, "loss": 0.5116, "step": 2359 }, { "epoch": 1.636615811373093, "grad_norm": 0.31211572860115416, "learning_rate": 8.506747030260841e-06, "loss": 0.4679, "step": 2360 }, { "epoch": 1.6373092926490984, "grad_norm": 0.32726561785027947, "learning_rate": 8.505021097150108e-06, "loss": 0.557, "step": 2361 }, { "epoch": 1.6380027739251042, "grad_norm": 0.35681214550346435, "learning_rate": 8.503294342487815e-06, "loss": 0.4792, "step": 2362 }, { "epoch": 1.6386962552011095, "grad_norm": 0.3492848909148922, "learning_rate": 8.501566766678701e-06, "loss": 0.5307, "step": 2363 }, { "epoch": 1.6393897364771153, "grad_norm": 0.31637614213127896, "learning_rate": 8.499838370127696e-06, "loss": 0.4835, "step": 2364 }, { "epoch": 1.6400832177531206, "grad_norm": 0.33730014438008116, "learning_rate": 8.498109153239924e-06, "loss": 0.5135, "step": 2365 }, { "epoch": 1.6407766990291264, "grad_norm": 0.40935874263822575, "learning_rate": 8.4963791164207e-06, "loss": 0.5421, "step": 2366 }, { "epoch": 1.6414701803051317, "grad_norm": 0.3379064073476399, "learning_rate": 8.494648260075533e-06, "loss": 0.523, "step": 2367 }, { "epoch": 1.6421636615811375, "grad_norm": 0.31702626560645264, "learning_rate": 8.492916584610124e-06, "loss": 0.419, "step": 2368 }, { "epoch": 1.6428571428571428, "grad_norm": 0.3601047079642492, "learning_rate": 8.491184090430365e-06, "loss": 0.5253, "step": 2369 }, { "epoch": 1.6435506241331486, "grad_norm": 0.3382148672494078, "learning_rate": 8.489450777942339e-06, "loss": 0.4794, "step": 2370 }, { "epoch": 1.6442441054091539, "grad_norm": 0.38147039535952726, "learning_rate": 8.487716647552321e-06, "loss": 0.5426, "step": 2371 }, { "epoch": 1.6449375866851597, "grad_norm": 0.39262805949278906, "learning_rate": 8.485981699666783e-06, "loss": 0.6066, "step": 2372 }, { "epoch": 1.645631067961165, "grad_norm": 0.3311634228969915, "learning_rate": 8.484245934692379e-06, "loss": 0.4823, "step": 2373 }, { "epoch": 1.6463245492371708, "grad_norm": 0.3094441739636701, "learning_rate": 8.482509353035963e-06, "loss": 0.4549, "step": 2374 }, { "epoch": 1.647018030513176, "grad_norm": 0.3451938236517863, "learning_rate": 8.480771955104576e-06, "loss": 0.5351, "step": 2375 }, { "epoch": 1.6477115117891818, "grad_norm": 0.3914481969080076, "learning_rate": 8.479033741305451e-06, "loss": 0.5234, "step": 2376 }, { "epoch": 1.6484049930651872, "grad_norm": 0.35019362739476884, "learning_rate": 8.477294712046015e-06, "loss": 0.4876, "step": 2377 }, { "epoch": 1.649098474341193, "grad_norm": 0.32659517998467763, "learning_rate": 8.47555486773388e-06, "loss": 0.4765, "step": 2378 }, { "epoch": 1.6497919556171983, "grad_norm": 0.3544349859107952, "learning_rate": 8.473814208776859e-06, "loss": 0.5009, "step": 2379 }, { "epoch": 1.650485436893204, "grad_norm": 0.33145226650270854, "learning_rate": 8.472072735582942e-06, "loss": 0.4908, "step": 2380 }, { "epoch": 1.6511789181692094, "grad_norm": 0.36111427694300746, "learning_rate": 8.470330448560322e-06, "loss": 0.5376, "step": 2381 }, { "epoch": 1.6518723994452151, "grad_norm": 0.34250434203443375, "learning_rate": 8.46858734811738e-06, "loss": 0.5121, "step": 2382 }, { "epoch": 1.6525658807212205, "grad_norm": 0.3386582342923932, "learning_rate": 8.466843434662684e-06, "loss": 0.459, "step": 2383 }, { "epoch": 1.6532593619972262, "grad_norm": 0.30438521537428603, "learning_rate": 8.465098708604993e-06, "loss": 0.4244, "step": 2384 }, { "epoch": 1.6539528432732316, "grad_norm": 0.35913260197182756, "learning_rate": 8.463353170353263e-06, "loss": 0.4943, "step": 2385 }, { "epoch": 1.6546463245492373, "grad_norm": 0.34759450587753227, "learning_rate": 8.46160682031663e-06, "loss": 0.5221, "step": 2386 }, { "epoch": 1.6553398058252426, "grad_norm": 0.44793266660351866, "learning_rate": 8.45985965890443e-06, "loss": 0.5094, "step": 2387 }, { "epoch": 1.6560332871012484, "grad_norm": 0.3362340108452135, "learning_rate": 8.458111686526183e-06, "loss": 0.4799, "step": 2388 }, { "epoch": 1.6567267683772537, "grad_norm": 0.3623547035990375, "learning_rate": 8.456362903591602e-06, "loss": 0.5116, "step": 2389 }, { "epoch": 1.6574202496532595, "grad_norm": 0.3347272319748318, "learning_rate": 8.454613310510589e-06, "loss": 0.5074, "step": 2390 }, { "epoch": 1.6581137309292648, "grad_norm": 0.3419824071284227, "learning_rate": 8.452862907693233e-06, "loss": 0.5128, "step": 2391 }, { "epoch": 1.6588072122052706, "grad_norm": 0.35485842251470634, "learning_rate": 8.45111169554982e-06, "loss": 0.5367, "step": 2392 }, { "epoch": 1.659500693481276, "grad_norm": 0.37391988512056223, "learning_rate": 8.44935967449082e-06, "loss": 0.4835, "step": 2393 }, { "epoch": 1.6601941747572817, "grad_norm": 0.3673986650933405, "learning_rate": 8.447606844926895e-06, "loss": 0.5052, "step": 2394 }, { "epoch": 1.660887656033287, "grad_norm": 0.3881693520995384, "learning_rate": 8.44585320726889e-06, "loss": 0.4922, "step": 2395 }, { "epoch": 1.6615811373092928, "grad_norm": 0.325655165536149, "learning_rate": 8.444098761927855e-06, "loss": 0.4583, "step": 2396 }, { "epoch": 1.6622746185852981, "grad_norm": 0.3846994429297036, "learning_rate": 8.44234350931501e-06, "loss": 0.497, "step": 2397 }, { "epoch": 1.662968099861304, "grad_norm": 0.4501128496829653, "learning_rate": 8.440587449841778e-06, "loss": 0.5311, "step": 2398 }, { "epoch": 1.6636615811373092, "grad_norm": 0.3391140575840351, "learning_rate": 8.438830583919764e-06, "loss": 0.5657, "step": 2399 }, { "epoch": 1.664355062413315, "grad_norm": 0.3276414395723224, "learning_rate": 8.437072911960768e-06, "loss": 0.5109, "step": 2400 }, { "epoch": 1.6650485436893203, "grad_norm": 0.3186521534762923, "learning_rate": 8.435314434376773e-06, "loss": 0.4476, "step": 2401 }, { "epoch": 1.665742024965326, "grad_norm": 0.3258387241321177, "learning_rate": 8.433555151579955e-06, "loss": 0.4531, "step": 2402 }, { "epoch": 1.6664355062413314, "grad_norm": 0.3141937433091077, "learning_rate": 8.431795063982676e-06, "loss": 0.5081, "step": 2403 }, { "epoch": 1.6671289875173372, "grad_norm": 0.3533433800951231, "learning_rate": 8.430034171997487e-06, "loss": 0.537, "step": 2404 }, { "epoch": 1.6678224687933425, "grad_norm": 0.3785611905897186, "learning_rate": 8.428272476037131e-06, "loss": 0.4925, "step": 2405 }, { "epoch": 1.6685159500693483, "grad_norm": 0.3451032533804043, "learning_rate": 8.426509976514535e-06, "loss": 0.5093, "step": 2406 }, { "epoch": 1.6692094313453536, "grad_norm": 0.3983835234416269, "learning_rate": 8.424746673842817e-06, "loss": 0.5818, "step": 2407 }, { "epoch": 1.6699029126213594, "grad_norm": 0.35946256960241124, "learning_rate": 8.422982568435283e-06, "loss": 0.5147, "step": 2408 }, { "epoch": 1.6705963938973647, "grad_norm": 0.33512022199958486, "learning_rate": 8.421217660705423e-06, "loss": 0.5014, "step": 2409 }, { "epoch": 1.6712898751733705, "grad_norm": 0.3279723714645418, "learning_rate": 8.419451951066922e-06, "loss": 0.4734, "step": 2410 }, { "epoch": 1.6719833564493758, "grad_norm": 0.3113632797827704, "learning_rate": 8.417685439933647e-06, "loss": 0.4941, "step": 2411 }, { "epoch": 1.6726768377253816, "grad_norm": 0.361208800565422, "learning_rate": 8.415918127719659e-06, "loss": 0.4964, "step": 2412 }, { "epoch": 1.6733703190013869, "grad_norm": 0.35029005520106865, "learning_rate": 8.4141500148392e-06, "loss": 0.5474, "step": 2413 }, { "epoch": 1.6740638002773927, "grad_norm": 0.38762583517759724, "learning_rate": 8.412381101706706e-06, "loss": 0.5499, "step": 2414 }, { "epoch": 1.674757281553398, "grad_norm": 0.33443090207346654, "learning_rate": 8.410611388736793e-06, "loss": 0.4624, "step": 2415 }, { "epoch": 1.6754507628294038, "grad_norm": 0.3218181688695065, "learning_rate": 8.408840876344271e-06, "loss": 0.4613, "step": 2416 }, { "epoch": 1.676144244105409, "grad_norm": 0.33119099547396874, "learning_rate": 8.407069564944136e-06, "loss": 0.5194, "step": 2417 }, { "epoch": 1.6768377253814148, "grad_norm": 0.33015613336472177, "learning_rate": 8.405297454951571e-06, "loss": 0.498, "step": 2418 }, { "epoch": 1.6775312066574202, "grad_norm": 0.3452461370979553, "learning_rate": 8.403524546781945e-06, "loss": 0.4673, "step": 2419 }, { "epoch": 1.678224687933426, "grad_norm": 0.3885633218107612, "learning_rate": 8.401750840850814e-06, "loss": 0.5108, "step": 2420 }, { "epoch": 1.6789181692094313, "grad_norm": 0.38120231623458456, "learning_rate": 8.399976337573922e-06, "loss": 0.524, "step": 2421 }, { "epoch": 1.679611650485437, "grad_norm": 0.35916656477788433, "learning_rate": 8.398201037367202e-06, "loss": 0.4893, "step": 2422 }, { "epoch": 1.6803051317614424, "grad_norm": 0.32811593502155867, "learning_rate": 8.39642494064677e-06, "loss": 0.4315, "step": 2423 }, { "epoch": 1.6809986130374481, "grad_norm": 0.3432195418816232, "learning_rate": 8.394648047828929e-06, "loss": 0.445, "step": 2424 }, { "epoch": 1.6816920943134535, "grad_norm": 0.3450660030159922, "learning_rate": 8.39287035933017e-06, "loss": 0.4823, "step": 2425 }, { "epoch": 1.6823855755894592, "grad_norm": 0.327998520064132, "learning_rate": 8.391091875567172e-06, "loss": 0.457, "step": 2426 }, { "epoch": 1.6830790568654646, "grad_norm": 0.3214512239859119, "learning_rate": 8.389312596956797e-06, "loss": 0.4285, "step": 2427 }, { "epoch": 1.6837725381414703, "grad_norm": 0.3746752871576561, "learning_rate": 8.387532523916097e-06, "loss": 0.5089, "step": 2428 }, { "epoch": 1.6844660194174756, "grad_norm": 0.3344666780359997, "learning_rate": 8.385751656862305e-06, "loss": 0.4917, "step": 2429 }, { "epoch": 1.6851595006934814, "grad_norm": 0.3090978619731297, "learning_rate": 8.383969996212847e-06, "loss": 0.5079, "step": 2430 }, { "epoch": 1.6858529819694867, "grad_norm": 0.3500562402914821, "learning_rate": 8.382187542385329e-06, "loss": 0.5758, "step": 2431 }, { "epoch": 1.6865464632454925, "grad_norm": 0.3024623056590533, "learning_rate": 8.380404295797549e-06, "loss": 0.457, "step": 2432 }, { "epoch": 1.6872399445214978, "grad_norm": 0.3569864578946277, "learning_rate": 8.37862025686748e-06, "loss": 0.4905, "step": 2433 }, { "epoch": 1.6879334257975036, "grad_norm": 0.37597910967736453, "learning_rate": 8.376835426013293e-06, "loss": 0.5645, "step": 2434 }, { "epoch": 1.688626907073509, "grad_norm": 0.3215998184985862, "learning_rate": 8.375049803653338e-06, "loss": 0.4702, "step": 2435 }, { "epoch": 1.6893203883495147, "grad_norm": 0.3304465412730977, "learning_rate": 8.373263390206155e-06, "loss": 0.5013, "step": 2436 }, { "epoch": 1.69001386962552, "grad_norm": 0.3362356458388988, "learning_rate": 8.37147618609046e-06, "loss": 0.5372, "step": 2437 }, { "epoch": 1.6907073509015258, "grad_norm": 0.3316397757105315, "learning_rate": 8.369688191725167e-06, "loss": 0.4841, "step": 2438 }, { "epoch": 1.6914008321775311, "grad_norm": 0.3436839283442423, "learning_rate": 8.367899407529366e-06, "loss": 0.4775, "step": 2439 }, { "epoch": 1.692094313453537, "grad_norm": 0.3530057847109228, "learning_rate": 8.366109833922335e-06, "loss": 0.5103, "step": 2440 }, { "epoch": 1.6927877947295422, "grad_norm": 0.38237495469363436, "learning_rate": 8.364319471323537e-06, "loss": 0.5424, "step": 2441 }, { "epoch": 1.693481276005548, "grad_norm": 0.3323218181024924, "learning_rate": 8.362528320152621e-06, "loss": 0.5073, "step": 2442 }, { "epoch": 1.6941747572815533, "grad_norm": 0.3395087230743487, "learning_rate": 8.36073638082942e-06, "loss": 0.4976, "step": 2443 }, { "epoch": 1.694868238557559, "grad_norm": 0.43803096709012057, "learning_rate": 8.35894365377395e-06, "loss": 0.5614, "step": 2444 }, { "epoch": 1.6955617198335644, "grad_norm": 0.4092037821750451, "learning_rate": 8.357150139406414e-06, "loss": 0.5346, "step": 2445 }, { "epoch": 1.6962552011095702, "grad_norm": 0.36379433307402875, "learning_rate": 8.355355838147199e-06, "loss": 0.482, "step": 2446 }, { "epoch": 1.6969486823855755, "grad_norm": 0.33761524734315884, "learning_rate": 8.353560750416876e-06, "loss": 0.5029, "step": 2447 }, { "epoch": 1.6976421636615813, "grad_norm": 0.3200023027999064, "learning_rate": 8.351764876636202e-06, "loss": 0.4316, "step": 2448 }, { "epoch": 1.6983356449375866, "grad_norm": 0.35372822392811026, "learning_rate": 8.349968217226114e-06, "loss": 0.5526, "step": 2449 }, { "epoch": 1.6990291262135924, "grad_norm": 0.33553182120293173, "learning_rate": 8.348170772607737e-06, "loss": 0.5162, "step": 2450 }, { "epoch": 1.6997226074895977, "grad_norm": 0.3479254809150717, "learning_rate": 8.346372543202382e-06, "loss": 0.5343, "step": 2451 }, { "epoch": 1.7004160887656035, "grad_norm": 0.5812679854894354, "learning_rate": 8.344573529431536e-06, "loss": 0.4798, "step": 2452 }, { "epoch": 1.7011095700416088, "grad_norm": 0.3674849404115623, "learning_rate": 8.342773731716878e-06, "loss": 0.5301, "step": 2453 }, { "epoch": 1.7018030513176146, "grad_norm": 0.37233526724345034, "learning_rate": 8.340973150480266e-06, "loss": 0.5202, "step": 2454 }, { "epoch": 1.7024965325936199, "grad_norm": 0.35593068200025635, "learning_rate": 8.339171786143747e-06, "loss": 0.5084, "step": 2455 }, { "epoch": 1.7031900138696257, "grad_norm": 0.3230195542324644, "learning_rate": 8.337369639129541e-06, "loss": 0.4771, "step": 2456 }, { "epoch": 1.703883495145631, "grad_norm": 0.3599876212209188, "learning_rate": 8.335566709860065e-06, "loss": 0.5087, "step": 2457 }, { "epoch": 1.7045769764216367, "grad_norm": 0.2976928682742035, "learning_rate": 8.333762998757908e-06, "loss": 0.4271, "step": 2458 }, { "epoch": 1.705270457697642, "grad_norm": 0.45662886179373235, "learning_rate": 8.331958506245849e-06, "loss": 0.5037, "step": 2459 }, { "epoch": 1.7059639389736478, "grad_norm": 0.6422844526708903, "learning_rate": 8.330153232746846e-06, "loss": 0.4767, "step": 2460 }, { "epoch": 1.7066574202496532, "grad_norm": 0.38898480654620865, "learning_rate": 8.328347178684045e-06, "loss": 0.5264, "step": 2461 }, { "epoch": 1.707350901525659, "grad_norm": 0.3540969996439805, "learning_rate": 8.32654034448077e-06, "loss": 0.5359, "step": 2462 }, { "epoch": 1.7080443828016643, "grad_norm": 0.3505125568171822, "learning_rate": 8.32473273056053e-06, "loss": 0.4538, "step": 2463 }, { "epoch": 1.70873786407767, "grad_norm": 0.3730192545013024, "learning_rate": 8.322924337347016e-06, "loss": 0.5113, "step": 2464 }, { "epoch": 1.7094313453536754, "grad_norm": 0.3437784529450237, "learning_rate": 8.321115165264102e-06, "loss": 0.4481, "step": 2465 }, { "epoch": 1.7101248266296811, "grad_norm": 0.3500718730594351, "learning_rate": 8.31930521473585e-06, "loss": 0.5189, "step": 2466 }, { "epoch": 1.7108183079056865, "grad_norm": 1.3020713277067153, "learning_rate": 8.31749448618649e-06, "loss": 0.4947, "step": 2467 }, { "epoch": 1.7115117891816922, "grad_norm": 0.38730326596472725, "learning_rate": 8.315682980040454e-06, "loss": 0.5072, "step": 2468 }, { "epoch": 1.7122052704576975, "grad_norm": 0.3385802479454782, "learning_rate": 8.313870696722338e-06, "loss": 0.4652, "step": 2469 }, { "epoch": 1.7128987517337033, "grad_norm": 0.31967293601897206, "learning_rate": 8.31205763665693e-06, "loss": 0.4788, "step": 2470 }, { "epoch": 1.7135922330097086, "grad_norm": 0.3908487619420314, "learning_rate": 8.3102438002692e-06, "loss": 0.4675, "step": 2471 }, { "epoch": 1.7142857142857144, "grad_norm": 0.40637026388607694, "learning_rate": 8.308429187984298e-06, "loss": 0.5062, "step": 2472 }, { "epoch": 1.7149791955617197, "grad_norm": 0.3542652823210553, "learning_rate": 8.306613800227555e-06, "loss": 0.5213, "step": 2473 }, { "epoch": 1.7156726768377255, "grad_norm": 0.32988776133636527, "learning_rate": 8.304797637424484e-06, "loss": 0.5028, "step": 2474 }, { "epoch": 1.7163661581137308, "grad_norm": 0.36309291428325613, "learning_rate": 8.30298070000078e-06, "loss": 0.4874, "step": 2475 }, { "epoch": 1.7170596393897366, "grad_norm": 0.353391374842643, "learning_rate": 8.301162988382325e-06, "loss": 0.5011, "step": 2476 }, { "epoch": 1.717753120665742, "grad_norm": 0.3212144882534916, "learning_rate": 8.29934450299517e-06, "loss": 0.5247, "step": 2477 }, { "epoch": 1.7184466019417477, "grad_norm": 0.3444249238312598, "learning_rate": 8.29752524426556e-06, "loss": 0.5182, "step": 2478 }, { "epoch": 1.719140083217753, "grad_norm": 0.3675114739167973, "learning_rate": 8.295705212619916e-06, "loss": 0.5026, "step": 2479 }, { "epoch": 1.7198335644937588, "grad_norm": 0.36757609168659333, "learning_rate": 8.293884408484835e-06, "loss": 0.5189, "step": 2480 }, { "epoch": 1.7205270457697641, "grad_norm": 0.3473402414865221, "learning_rate": 8.292062832287107e-06, "loss": 0.5004, "step": 2481 }, { "epoch": 1.7212205270457699, "grad_norm": 0.3248994512518578, "learning_rate": 8.290240484453693e-06, "loss": 0.4429, "step": 2482 }, { "epoch": 1.7219140083217752, "grad_norm": 0.3754054166117554, "learning_rate": 8.288417365411738e-06, "loss": 0.478, "step": 2483 }, { "epoch": 1.722607489597781, "grad_norm": 0.4546400086310538, "learning_rate": 8.28659347558857e-06, "loss": 0.4843, "step": 2484 }, { "epoch": 1.7233009708737863, "grad_norm": 0.3333067144791095, "learning_rate": 8.284768815411693e-06, "loss": 0.4708, "step": 2485 }, { "epoch": 1.723994452149792, "grad_norm": 0.3707418743071705, "learning_rate": 8.282943385308794e-06, "loss": 0.4993, "step": 2486 }, { "epoch": 1.7246879334257974, "grad_norm": 0.3320962453755535, "learning_rate": 8.281117185707741e-06, "loss": 0.5005, "step": 2487 }, { "epoch": 1.7253814147018032, "grad_norm": 0.34239302367868774, "learning_rate": 8.279290217036583e-06, "loss": 0.5262, "step": 2488 }, { "epoch": 1.7260748959778085, "grad_norm": 0.34487267720362547, "learning_rate": 8.27746247972355e-06, "loss": 0.4887, "step": 2489 }, { "epoch": 1.7267683772538143, "grad_norm": 0.3521400798206825, "learning_rate": 8.275633974197048e-06, "loss": 0.4905, "step": 2490 }, { "epoch": 1.7274618585298196, "grad_norm": 0.3437904733859773, "learning_rate": 8.273804700885664e-06, "loss": 0.4866, "step": 2491 }, { "epoch": 1.7281553398058254, "grad_norm": 0.326765053662226, "learning_rate": 8.27197466021817e-06, "loss": 0.4424, "step": 2492 }, { "epoch": 1.7288488210818307, "grad_norm": 0.33938505275484304, "learning_rate": 8.27014385262351e-06, "loss": 0.5245, "step": 2493 }, { "epoch": 1.7295423023578365, "grad_norm": 0.3709333437985796, "learning_rate": 8.268312278530816e-06, "loss": 0.4937, "step": 2494 }, { "epoch": 1.7302357836338418, "grad_norm": 0.3648784958531084, "learning_rate": 8.266479938369395e-06, "loss": 0.4699, "step": 2495 }, { "epoch": 1.7309292649098476, "grad_norm": 0.38184892930452446, "learning_rate": 8.26464683256873e-06, "loss": 0.5272, "step": 2496 }, { "epoch": 1.7316227461858529, "grad_norm": 0.31143739047605695, "learning_rate": 8.262812961558494e-06, "loss": 0.4721, "step": 2497 }, { "epoch": 1.7323162274618586, "grad_norm": 0.36252154762018163, "learning_rate": 8.26097832576853e-06, "loss": 0.5058, "step": 2498 }, { "epoch": 1.733009708737864, "grad_norm": 0.34851031406026894, "learning_rate": 8.259142925628862e-06, "loss": 0.491, "step": 2499 }, { "epoch": 1.7337031900138697, "grad_norm": 0.4156842337734494, "learning_rate": 8.257306761569697e-06, "loss": 0.4562, "step": 2500 }, { "epoch": 1.734396671289875, "grad_norm": 0.34043316498210197, "learning_rate": 8.25546983402142e-06, "loss": 0.4957, "step": 2501 }, { "epoch": 1.7350901525658808, "grad_norm": 0.3797393027097939, "learning_rate": 8.253632143414586e-06, "loss": 0.564, "step": 2502 }, { "epoch": 1.7357836338418862, "grad_norm": 0.33700527540918146, "learning_rate": 8.251793690179947e-06, "loss": 0.5597, "step": 2503 }, { "epoch": 1.736477115117892, "grad_norm": 0.3690079523704426, "learning_rate": 8.249954474748414e-06, "loss": 0.5116, "step": 2504 }, { "epoch": 1.7371705963938973, "grad_norm": 0.314775969337088, "learning_rate": 8.24811449755109e-06, "loss": 0.4644, "step": 2505 }, { "epoch": 1.737864077669903, "grad_norm": 0.34626876034402704, "learning_rate": 8.246273759019252e-06, "loss": 0.5306, "step": 2506 }, { "epoch": 1.7385575589459084, "grad_norm": 0.34914103867214885, "learning_rate": 8.244432259584356e-06, "loss": 0.4415, "step": 2507 }, { "epoch": 1.7392510402219141, "grad_norm": 0.3085317865880849, "learning_rate": 8.242589999678037e-06, "loss": 0.4894, "step": 2508 }, { "epoch": 1.7399445214979194, "grad_norm": 0.32595498921763916, "learning_rate": 8.240746979732103e-06, "loss": 0.494, "step": 2509 }, { "epoch": 1.7406380027739252, "grad_norm": 0.40704577999226504, "learning_rate": 8.23890320017855e-06, "loss": 0.5072, "step": 2510 }, { "epoch": 1.7413314840499305, "grad_norm": 0.37428305256480154, "learning_rate": 8.237058661449543e-06, "loss": 0.5011, "step": 2511 }, { "epoch": 1.7420249653259363, "grad_norm": 0.37528891685475274, "learning_rate": 8.23521336397743e-06, "loss": 0.5164, "step": 2512 }, { "epoch": 1.7427184466019416, "grad_norm": 0.32956195069890537, "learning_rate": 8.233367308194735e-06, "loss": 0.4872, "step": 2513 }, { "epoch": 1.7434119278779474, "grad_norm": 0.3180733197569015, "learning_rate": 8.231520494534158e-06, "loss": 0.4476, "step": 2514 }, { "epoch": 1.7441054091539527, "grad_norm": 0.34602940917357783, "learning_rate": 8.229672923428582e-06, "loss": 0.4924, "step": 2515 }, { "epoch": 1.7447988904299585, "grad_norm": 0.3370268041212885, "learning_rate": 8.227824595311064e-06, "loss": 0.5216, "step": 2516 }, { "epoch": 1.7454923717059638, "grad_norm": 0.33663939923219494, "learning_rate": 8.22597551061484e-06, "loss": 0.4844, "step": 2517 }, { "epoch": 1.7461858529819696, "grad_norm": 0.3199724563766412, "learning_rate": 8.224125669773315e-06, "loss": 0.4997, "step": 2518 }, { "epoch": 1.746879334257975, "grad_norm": 0.3340021689703054, "learning_rate": 8.222275073220087e-06, "loss": 0.4821, "step": 2519 }, { "epoch": 1.7475728155339807, "grad_norm": 0.3119035674803005, "learning_rate": 8.220423721388918e-06, "loss": 0.4826, "step": 2520 }, { "epoch": 1.748266296809986, "grad_norm": 0.33039699110525106, "learning_rate": 8.21857161471375e-06, "loss": 0.4728, "step": 2521 }, { "epoch": 1.7489597780859918, "grad_norm": 0.45355884487551595, "learning_rate": 8.216718753628708e-06, "loss": 0.5551, "step": 2522 }, { "epoch": 1.7496532593619971, "grad_norm": 0.33963600367254804, "learning_rate": 8.214865138568084e-06, "loss": 0.5278, "step": 2523 }, { "epoch": 1.7503467406380029, "grad_norm": 0.3505431172678928, "learning_rate": 8.213010769966356e-06, "loss": 0.5443, "step": 2524 }, { "epoch": 1.7510402219140082, "grad_norm": 0.3206237977061909, "learning_rate": 8.211155648258174e-06, "loss": 0.4502, "step": 2525 }, { "epoch": 1.751733703190014, "grad_norm": 0.30258900307978975, "learning_rate": 8.209299773878366e-06, "loss": 0.4519, "step": 2526 }, { "epoch": 1.7524271844660193, "grad_norm": 0.3250013913618086, "learning_rate": 8.20744314726193e-06, "loss": 0.4612, "step": 2527 }, { "epoch": 1.753120665742025, "grad_norm": 0.3573259112331063, "learning_rate": 8.205585768844051e-06, "loss": 0.5632, "step": 2528 }, { "epoch": 1.7538141470180304, "grad_norm": 0.3875040213395464, "learning_rate": 8.203727639060085e-06, "loss": 0.5466, "step": 2529 }, { "epoch": 1.7545076282940362, "grad_norm": 0.37836072095406464, "learning_rate": 8.201868758345561e-06, "loss": 0.551, "step": 2530 }, { "epoch": 1.7552011095700415, "grad_norm": 0.3258967129982822, "learning_rate": 8.200009127136192e-06, "loss": 0.4895, "step": 2531 }, { "epoch": 1.7558945908460473, "grad_norm": 0.34535841444900556, "learning_rate": 8.198148745867855e-06, "loss": 0.4943, "step": 2532 }, { "epoch": 1.7565880721220526, "grad_norm": 0.29666217557456687, "learning_rate": 8.196287614976617e-06, "loss": 0.4573, "step": 2533 }, { "epoch": 1.7572815533980584, "grad_norm": 0.3704184635178012, "learning_rate": 8.19442573489871e-06, "loss": 0.4879, "step": 2534 }, { "epoch": 1.7579750346740637, "grad_norm": 0.36091698139889916, "learning_rate": 8.192563106070547e-06, "loss": 0.5343, "step": 2535 }, { "epoch": 1.7586685159500695, "grad_norm": 0.3630942363096478, "learning_rate": 8.190699728928712e-06, "loss": 0.4714, "step": 2536 }, { "epoch": 1.7593619972260748, "grad_norm": 0.3132017824288624, "learning_rate": 8.188835603909967e-06, "loss": 0.4992, "step": 2537 }, { "epoch": 1.7600554785020806, "grad_norm": 0.3027009572027872, "learning_rate": 8.186970731451255e-06, "loss": 0.5029, "step": 2538 }, { "epoch": 1.7607489597780859, "grad_norm": 0.313851498975355, "learning_rate": 8.185105111989683e-06, "loss": 0.4923, "step": 2539 }, { "epoch": 1.7614424410540916, "grad_norm": 0.36778585480576975, "learning_rate": 8.18323874596254e-06, "loss": 0.5133, "step": 2540 }, { "epoch": 1.762135922330097, "grad_norm": 0.3449764370113829, "learning_rate": 8.181371633807289e-06, "loss": 0.4989, "step": 2541 }, { "epoch": 1.7628294036061027, "grad_norm": 0.33842515688554986, "learning_rate": 8.179503775961569e-06, "loss": 0.4814, "step": 2542 }, { "epoch": 1.763522884882108, "grad_norm": 0.3313212544424528, "learning_rate": 8.17763517286319e-06, "loss": 0.4879, "step": 2543 }, { "epoch": 1.7642163661581138, "grad_norm": 0.32778248906505236, "learning_rate": 8.175765824950139e-06, "loss": 0.4401, "step": 2544 }, { "epoch": 1.7649098474341192, "grad_norm": 0.3599077775354727, "learning_rate": 8.17389573266058e-06, "loss": 0.4718, "step": 2545 }, { "epoch": 1.765603328710125, "grad_norm": 0.3396295924894958, "learning_rate": 8.172024896432847e-06, "loss": 0.5057, "step": 2546 }, { "epoch": 1.7662968099861303, "grad_norm": 0.3471878954839222, "learning_rate": 8.17015331670545e-06, "loss": 0.4972, "step": 2547 }, { "epoch": 1.766990291262136, "grad_norm": 0.32736351081474324, "learning_rate": 8.168280993917078e-06, "loss": 0.4346, "step": 2548 }, { "epoch": 1.7676837725381414, "grad_norm": 0.38675782685334803, "learning_rate": 8.166407928506583e-06, "loss": 0.5634, "step": 2549 }, { "epoch": 1.7683772538141471, "grad_norm": 0.3683899472275578, "learning_rate": 8.164534120913004e-06, "loss": 0.5239, "step": 2550 }, { "epoch": 1.7690707350901524, "grad_norm": 0.31560258879219105, "learning_rate": 8.162659571575546e-06, "loss": 0.416, "step": 2551 }, { "epoch": 1.7697642163661582, "grad_norm": 0.3788950818694178, "learning_rate": 8.160784280933589e-06, "loss": 0.5286, "step": 2552 }, { "epoch": 1.7704576976421635, "grad_norm": 0.3549915525328208, "learning_rate": 8.158908249426687e-06, "loss": 0.4567, "step": 2553 }, { "epoch": 1.7711511789181693, "grad_norm": 0.3452211009361919, "learning_rate": 8.15703147749457e-06, "loss": 0.4821, "step": 2554 }, { "epoch": 1.7718446601941746, "grad_norm": 0.333760562080443, "learning_rate": 8.155153965577139e-06, "loss": 0.4665, "step": 2555 }, { "epoch": 1.7725381414701804, "grad_norm": 0.4003116848324873, "learning_rate": 8.15327571411447e-06, "loss": 0.4835, "step": 2556 }, { "epoch": 1.7732316227461857, "grad_norm": 0.34523761601236286, "learning_rate": 8.15139672354681e-06, "loss": 0.494, "step": 2557 }, { "epoch": 1.7739251040221915, "grad_norm": 0.3092718222272152, "learning_rate": 8.149516994314581e-06, "loss": 0.4647, "step": 2558 }, { "epoch": 1.7746185852981968, "grad_norm": 0.33388856773233827, "learning_rate": 8.14763652685838e-06, "loss": 0.4958, "step": 2559 }, { "epoch": 1.7753120665742026, "grad_norm": 0.3198893523900066, "learning_rate": 8.145755321618972e-06, "loss": 0.448, "step": 2560 }, { "epoch": 1.776005547850208, "grad_norm": 0.31866305537905737, "learning_rate": 8.1438733790373e-06, "loss": 0.4403, "step": 2561 }, { "epoch": 1.7766990291262137, "grad_norm": 0.3485110953024072, "learning_rate": 8.141990699554476e-06, "loss": 0.4709, "step": 2562 }, { "epoch": 1.777392510402219, "grad_norm": 0.3164179012114437, "learning_rate": 8.140107283611787e-06, "loss": 0.4648, "step": 2563 }, { "epoch": 1.7780859916782248, "grad_norm": 0.36956234103451036, "learning_rate": 8.138223131650693e-06, "loss": 0.5307, "step": 2564 }, { "epoch": 1.7787794729542301, "grad_norm": 0.3437914379915076, "learning_rate": 8.136338244112826e-06, "loss": 0.4599, "step": 2565 }, { "epoch": 1.7794729542302359, "grad_norm": 0.3439786188514912, "learning_rate": 8.134452621439988e-06, "loss": 0.4308, "step": 2566 }, { "epoch": 1.7801664355062412, "grad_norm": 0.3311734097123211, "learning_rate": 8.132566264074157e-06, "loss": 0.5035, "step": 2567 }, { "epoch": 1.780859916782247, "grad_norm": 0.37148619562143187, "learning_rate": 8.13067917245748e-06, "loss": 0.4722, "step": 2568 }, { "epoch": 1.7815533980582523, "grad_norm": 0.6697161458880733, "learning_rate": 8.12879134703228e-06, "loss": 0.4492, "step": 2569 }, { "epoch": 1.782246879334258, "grad_norm": 0.3512123657436848, "learning_rate": 8.126902788241045e-06, "loss": 0.4539, "step": 2570 }, { "epoch": 1.7829403606102634, "grad_norm": 0.3621406791220599, "learning_rate": 8.125013496526444e-06, "loss": 0.4563, "step": 2571 }, { "epoch": 1.7836338418862692, "grad_norm": 0.331242124044267, "learning_rate": 8.123123472331314e-06, "loss": 0.4574, "step": 2572 }, { "epoch": 1.7843273231622745, "grad_norm": 0.31959757371424324, "learning_rate": 8.121232716098659e-06, "loss": 0.4923, "step": 2573 }, { "epoch": 1.7850208044382803, "grad_norm": 0.4494412160848984, "learning_rate": 8.11934122827166e-06, "loss": 0.5015, "step": 2574 }, { "epoch": 1.7857142857142856, "grad_norm": 0.34521632420014386, "learning_rate": 8.117449009293668e-06, "loss": 0.493, "step": 2575 }, { "epoch": 1.7864077669902914, "grad_norm": 0.32728678861733757, "learning_rate": 8.115556059608208e-06, "loss": 0.4858, "step": 2576 }, { "epoch": 1.7871012482662967, "grad_norm": 0.3582845788413125, "learning_rate": 8.113662379658969e-06, "loss": 0.4944, "step": 2577 }, { "epoch": 1.7877947295423025, "grad_norm": 0.3717939194931279, "learning_rate": 8.11176796988982e-06, "loss": 0.5563, "step": 2578 }, { "epoch": 1.7884882108183078, "grad_norm": 0.37431454154338295, "learning_rate": 8.109872830744795e-06, "loss": 0.5072, "step": 2579 }, { "epoch": 1.7891816920943135, "grad_norm": 0.3510047894435186, "learning_rate": 8.107976962668102e-06, "loss": 0.497, "step": 2580 }, { "epoch": 1.7898751733703189, "grad_norm": 0.3535079515486122, "learning_rate": 8.106080366104117e-06, "loss": 0.4557, "step": 2581 }, { "epoch": 1.7905686546463246, "grad_norm": 0.3229653503595275, "learning_rate": 8.104183041497389e-06, "loss": 0.4905, "step": 2582 }, { "epoch": 1.79126213592233, "grad_norm": 0.44770065352819477, "learning_rate": 8.102284989292639e-06, "loss": 0.6119, "step": 2583 }, { "epoch": 1.7919556171983357, "grad_norm": 0.3357961525739306, "learning_rate": 8.100386209934754e-06, "loss": 0.4958, "step": 2584 }, { "epoch": 1.792649098474341, "grad_norm": 0.308513659858812, "learning_rate": 8.098486703868796e-06, "loss": 0.4223, "step": 2585 }, { "epoch": 1.7933425797503468, "grad_norm": 0.3214398273471279, "learning_rate": 8.096586471539994e-06, "loss": 0.4968, "step": 2586 }, { "epoch": 1.7940360610263522, "grad_norm": 0.3420112961470549, "learning_rate": 8.094685513393752e-06, "loss": 0.473, "step": 2587 }, { "epoch": 1.794729542302358, "grad_norm": 0.3666497150224497, "learning_rate": 8.092783829875636e-06, "loss": 0.5762, "step": 2588 }, { "epoch": 1.7954230235783633, "grad_norm": 0.34547184429680877, "learning_rate": 8.09088142143139e-06, "loss": 0.5207, "step": 2589 }, { "epoch": 1.796116504854369, "grad_norm": 0.3834786211695057, "learning_rate": 8.088978288506923e-06, "loss": 0.5767, "step": 2590 }, { "epoch": 1.7968099861303743, "grad_norm": 0.33081319617384436, "learning_rate": 8.08707443154832e-06, "loss": 0.4609, "step": 2591 }, { "epoch": 1.7975034674063801, "grad_norm": 0.33005505320516115, "learning_rate": 8.085169851001825e-06, "loss": 0.4168, "step": 2592 }, { "epoch": 1.7981969486823854, "grad_norm": 0.35581289193530136, "learning_rate": 8.083264547313863e-06, "loss": 0.513, "step": 2593 }, { "epoch": 1.7988904299583912, "grad_norm": 0.3492346033091902, "learning_rate": 8.081358520931018e-06, "loss": 0.5047, "step": 2594 }, { "epoch": 1.7995839112343965, "grad_norm": 0.3772907702393545, "learning_rate": 8.079451772300052e-06, "loss": 0.472, "step": 2595 }, { "epoch": 1.8002773925104023, "grad_norm": 0.360546535425315, "learning_rate": 8.077544301867896e-06, "loss": 0.4899, "step": 2596 }, { "epoch": 1.8009708737864076, "grad_norm": 0.36631850143040506, "learning_rate": 8.075636110081643e-06, "loss": 0.5397, "step": 2597 }, { "epoch": 1.8016643550624134, "grad_norm": 0.3629427258375066, "learning_rate": 8.073727197388561e-06, "loss": 0.498, "step": 2598 }, { "epoch": 1.8023578363384187, "grad_norm": 0.3447326751967329, "learning_rate": 8.071817564236084e-06, "loss": 0.458, "step": 2599 }, { "epoch": 1.8030513176144245, "grad_norm": 0.39934035769365833, "learning_rate": 8.069907211071816e-06, "loss": 0.509, "step": 2600 }, { "epoch": 1.8037447988904298, "grad_norm": 0.3628951280754408, "learning_rate": 8.067996138343535e-06, "loss": 0.4864, "step": 2601 }, { "epoch": 1.8044382801664356, "grad_norm": 0.3764346549515586, "learning_rate": 8.066084346499176e-06, "loss": 0.5411, "step": 2602 }, { "epoch": 1.805131761442441, "grad_norm": 0.36142407024268214, "learning_rate": 8.064171835986852e-06, "loss": 0.5122, "step": 2603 }, { "epoch": 1.8058252427184467, "grad_norm": 0.3329712865601718, "learning_rate": 8.062258607254841e-06, "loss": 0.4673, "step": 2604 }, { "epoch": 1.806518723994452, "grad_norm": 0.33761714138991, "learning_rate": 8.060344660751591e-06, "loss": 0.4684, "step": 2605 }, { "epoch": 1.8072122052704578, "grad_norm": 0.34647098619660877, "learning_rate": 8.058429996925717e-06, "loss": 0.5236, "step": 2606 }, { "epoch": 1.807905686546463, "grad_norm": 0.3558401909155935, "learning_rate": 8.056514616226001e-06, "loss": 0.4761, "step": 2607 }, { "epoch": 1.8085991678224689, "grad_norm": 0.34910942910293996, "learning_rate": 8.054598519101396e-06, "loss": 0.5036, "step": 2608 }, { "epoch": 1.8092926490984742, "grad_norm": 0.3481902999440837, "learning_rate": 8.05268170600102e-06, "loss": 0.5216, "step": 2609 }, { "epoch": 1.80998613037448, "grad_norm": 0.3400124519897086, "learning_rate": 8.05076417737416e-06, "loss": 0.491, "step": 2610 }, { "epoch": 1.8106796116504853, "grad_norm": 0.3379953053496073, "learning_rate": 8.048845933670274e-06, "loss": 0.4978, "step": 2611 }, { "epoch": 1.811373092926491, "grad_norm": 0.365658545970509, "learning_rate": 8.046926975338978e-06, "loss": 0.496, "step": 2612 }, { "epoch": 1.8120665742024964, "grad_norm": 0.39904400921742555, "learning_rate": 8.045007302830068e-06, "loss": 0.4882, "step": 2613 }, { "epoch": 1.8127600554785022, "grad_norm": 0.36452724045373436, "learning_rate": 8.0430869165935e-06, "loss": 0.521, "step": 2614 }, { "epoch": 1.8134535367545075, "grad_norm": 0.36517647714668033, "learning_rate": 8.041165817079397e-06, "loss": 0.4932, "step": 2615 }, { "epoch": 1.8141470180305133, "grad_norm": 0.33548186116342915, "learning_rate": 8.039244004738051e-06, "loss": 0.5209, "step": 2616 }, { "epoch": 1.8148404993065186, "grad_norm": 0.3259302086648711, "learning_rate": 8.037321480019921e-06, "loss": 0.4927, "step": 2617 }, { "epoch": 1.8155339805825244, "grad_norm": 0.2825621847555902, "learning_rate": 8.035398243375636e-06, "loss": 0.4603, "step": 2618 }, { "epoch": 1.8162274618585297, "grad_norm": 0.37932516511922554, "learning_rate": 8.033474295255986e-06, "loss": 0.5119, "step": 2619 }, { "epoch": 1.8169209431345354, "grad_norm": 0.33649712682234856, "learning_rate": 8.031549636111928e-06, "loss": 0.5313, "step": 2620 }, { "epoch": 1.8176144244105408, "grad_norm": 0.36794769079990797, "learning_rate": 8.029624266394592e-06, "loss": 0.522, "step": 2621 }, { "epoch": 1.8183079056865465, "grad_norm": 0.3534751022570253, "learning_rate": 8.02769818655527e-06, "loss": 0.4682, "step": 2622 }, { "epoch": 1.8190013869625519, "grad_norm": 0.3583193431807195, "learning_rate": 8.025771397045421e-06, "loss": 0.5225, "step": 2623 }, { "epoch": 1.8196948682385576, "grad_norm": 1.1135076037341172, "learning_rate": 8.02384389831667e-06, "loss": 0.5502, "step": 2624 }, { "epoch": 1.820388349514563, "grad_norm": 0.324063155336584, "learning_rate": 8.021915690820808e-06, "loss": 0.4955, "step": 2625 }, { "epoch": 1.8210818307905687, "grad_norm": 0.3496820628240374, "learning_rate": 8.019986775009795e-06, "loss": 0.4811, "step": 2626 }, { "epoch": 1.821775312066574, "grad_norm": 0.3447998554678887, "learning_rate": 8.018057151335752e-06, "loss": 0.5276, "step": 2627 }, { "epoch": 1.8224687933425798, "grad_norm": 0.3412124628965962, "learning_rate": 8.016126820250972e-06, "loss": 0.5099, "step": 2628 }, { "epoch": 1.8231622746185852, "grad_norm": 0.35021409461541153, "learning_rate": 8.01419578220791e-06, "loss": 0.4749, "step": 2629 }, { "epoch": 1.823855755894591, "grad_norm": 0.3339581098924237, "learning_rate": 8.012264037659182e-06, "loss": 0.4893, "step": 2630 }, { "epoch": 1.8245492371705962, "grad_norm": 0.3514154973100117, "learning_rate": 8.010331587057585e-06, "loss": 0.4829, "step": 2631 }, { "epoch": 1.825242718446602, "grad_norm": 0.36606702754495957, "learning_rate": 8.008398430856064e-06, "loss": 0.5437, "step": 2632 }, { "epoch": 1.8259361997226073, "grad_norm": 0.3572356362069411, "learning_rate": 8.006464569507737e-06, "loss": 0.5267, "step": 2633 }, { "epoch": 1.8266296809986131, "grad_norm": 0.36667852303146975, "learning_rate": 8.004530003465891e-06, "loss": 0.5074, "step": 2634 }, { "epoch": 1.8273231622746184, "grad_norm": 0.3157941877578861, "learning_rate": 8.002594733183971e-06, "loss": 0.459, "step": 2635 }, { "epoch": 1.8280166435506242, "grad_norm": 0.4112073131139841, "learning_rate": 8.00065875911559e-06, "loss": 0.4834, "step": 2636 }, { "epoch": 1.8287101248266295, "grad_norm": 0.33776473552815095, "learning_rate": 7.99872208171453e-06, "loss": 0.4747, "step": 2637 }, { "epoch": 1.8294036061026353, "grad_norm": 0.33433379311687755, "learning_rate": 7.99678470143473e-06, "loss": 0.4464, "step": 2638 }, { "epoch": 1.8300970873786406, "grad_norm": 0.3425070302442074, "learning_rate": 7.994846618730301e-06, "loss": 0.5321, "step": 2639 }, { "epoch": 1.8307905686546464, "grad_norm": 0.35198261413571635, "learning_rate": 7.992907834055513e-06, "loss": 0.5237, "step": 2640 }, { "epoch": 1.8314840499306517, "grad_norm": 0.3276940133077291, "learning_rate": 7.990968347864804e-06, "loss": 0.4824, "step": 2641 }, { "epoch": 1.8321775312066575, "grad_norm": 0.36387677266071716, "learning_rate": 7.989028160612779e-06, "loss": 0.4771, "step": 2642 }, { "epoch": 1.8328710124826628, "grad_norm": 0.3856124357512831, "learning_rate": 7.987087272754199e-06, "loss": 0.5188, "step": 2643 }, { "epoch": 1.8335644937586686, "grad_norm": 0.32502975843033866, "learning_rate": 7.985145684743993e-06, "loss": 0.4257, "step": 2644 }, { "epoch": 1.834257975034674, "grad_norm": 0.3684901686346137, "learning_rate": 7.983203397037261e-06, "loss": 0.4641, "step": 2645 }, { "epoch": 1.8349514563106797, "grad_norm": 0.3530964106730837, "learning_rate": 7.981260410089258e-06, "loss": 0.569, "step": 2646 }, { "epoch": 1.835644937586685, "grad_norm": 0.45330775632682163, "learning_rate": 7.979316724355406e-06, "loss": 0.5673, "step": 2647 }, { "epoch": 1.8363384188626908, "grad_norm": 0.34899484897550276, "learning_rate": 7.97737234029129e-06, "loss": 0.5419, "step": 2648 }, { "epoch": 1.837031900138696, "grad_norm": 0.36946601962265596, "learning_rate": 7.97542725835266e-06, "loss": 0.4677, "step": 2649 }, { "epoch": 1.8377253814147019, "grad_norm": 0.32280200454263647, "learning_rate": 7.973481478995433e-06, "loss": 0.4647, "step": 2650 }, { "epoch": 1.8384188626907072, "grad_norm": 0.3540458704749818, "learning_rate": 7.97153500267568e-06, "loss": 0.4987, "step": 2651 }, { "epoch": 1.839112343966713, "grad_norm": 0.34560866179222777, "learning_rate": 7.969587829849644e-06, "loss": 0.4661, "step": 2652 }, { "epoch": 1.8398058252427183, "grad_norm": 0.41567913358775405, "learning_rate": 7.967639960973727e-06, "loss": 0.5251, "step": 2653 }, { "epoch": 1.840499306518724, "grad_norm": 0.3361099075546891, "learning_rate": 7.965691396504496e-06, "loss": 0.5489, "step": 2654 }, { "epoch": 1.8411927877947294, "grad_norm": 0.3196820195209324, "learning_rate": 7.96374213689868e-06, "loss": 0.4696, "step": 2655 }, { "epoch": 1.8418862690707352, "grad_norm": 0.33633959801435626, "learning_rate": 7.96179218261317e-06, "loss": 0.4781, "step": 2656 }, { "epoch": 1.8425797503467405, "grad_norm": 0.3434890969631632, "learning_rate": 7.959841534105026e-06, "loss": 0.4616, "step": 2657 }, { "epoch": 1.8432732316227463, "grad_norm": 0.340579571076245, "learning_rate": 7.95789019183146e-06, "loss": 0.4676, "step": 2658 }, { "epoch": 1.8439667128987516, "grad_norm": 0.3941390329546165, "learning_rate": 7.955938156249856e-06, "loss": 0.5465, "step": 2659 }, { "epoch": 1.8446601941747574, "grad_norm": 0.34716317499059784, "learning_rate": 7.953985427817757e-06, "loss": 0.4605, "step": 2660 }, { "epoch": 1.8453536754507627, "grad_norm": 0.3739178657087966, "learning_rate": 7.952032006992865e-06, "loss": 0.5196, "step": 2661 }, { "epoch": 1.8460471567267684, "grad_norm": 0.3835030440585209, "learning_rate": 7.950077894233051e-06, "loss": 0.4727, "step": 2662 }, { "epoch": 1.8467406380027738, "grad_norm": 0.34474733547803854, "learning_rate": 7.948123089996345e-06, "loss": 0.444, "step": 2663 }, { "epoch": 1.8474341192787795, "grad_norm": 0.38383795280040944, "learning_rate": 7.946167594740938e-06, "loss": 0.5605, "step": 2664 }, { "epoch": 1.8481276005547849, "grad_norm": 0.3827559486940056, "learning_rate": 7.944211408925184e-06, "loss": 0.5903, "step": 2665 }, { "epoch": 1.8488210818307906, "grad_norm": 0.3487061847158636, "learning_rate": 7.942254533007597e-06, "loss": 0.5603, "step": 2666 }, { "epoch": 1.849514563106796, "grad_norm": 0.3170466391970917, "learning_rate": 7.94029696744686e-06, "loss": 0.4559, "step": 2667 }, { "epoch": 1.8502080443828017, "grad_norm": 0.37841259797232923, "learning_rate": 7.938338712701805e-06, "loss": 0.513, "step": 2668 }, { "epoch": 1.850901525658807, "grad_norm": 0.3661516732247181, "learning_rate": 7.936379769231436e-06, "loss": 0.5056, "step": 2669 }, { "epoch": 1.8515950069348128, "grad_norm": 0.3288161189708249, "learning_rate": 7.934420137494917e-06, "loss": 0.4683, "step": 2670 }, { "epoch": 1.8522884882108182, "grad_norm": 0.42463178798517265, "learning_rate": 7.93245981795157e-06, "loss": 0.4649, "step": 2671 }, { "epoch": 1.852981969486824, "grad_norm": 0.32121837261737696, "learning_rate": 7.930498811060879e-06, "loss": 0.4566, "step": 2672 }, { "epoch": 1.8536754507628292, "grad_norm": 0.33612977400716404, "learning_rate": 7.92853711728249e-06, "loss": 0.4899, "step": 2673 }, { "epoch": 1.854368932038835, "grad_norm": 0.34811870221940905, "learning_rate": 7.92657473707621e-06, "loss": 0.4946, "step": 2674 }, { "epoch": 1.8550624133148403, "grad_norm": 0.45890294460618614, "learning_rate": 7.924611670902008e-06, "loss": 0.4614, "step": 2675 }, { "epoch": 1.8557558945908461, "grad_norm": 0.3438035043765943, "learning_rate": 7.92264791922001e-06, "loss": 0.4855, "step": 2676 }, { "epoch": 1.8564493758668514, "grad_norm": 0.3354919563928631, "learning_rate": 7.92068348249051e-06, "loss": 0.4654, "step": 2677 }, { "epoch": 1.8571428571428572, "grad_norm": 0.33641671481189644, "learning_rate": 7.918718361173951e-06, "loss": 0.4763, "step": 2678 }, { "epoch": 1.8578363384188625, "grad_norm": 0.32552095581966767, "learning_rate": 7.916752555730946e-06, "loss": 0.4618, "step": 2679 }, { "epoch": 1.8585298196948683, "grad_norm": 0.43523484512876065, "learning_rate": 7.914786066622268e-06, "loss": 0.4844, "step": 2680 }, { "epoch": 1.8592233009708736, "grad_norm": 0.30467028020508086, "learning_rate": 7.912818894308845e-06, "loss": 0.4429, "step": 2681 }, { "epoch": 1.8599167822468794, "grad_norm": 0.3227150624697633, "learning_rate": 7.910851039251768e-06, "loss": 0.4908, "step": 2682 }, { "epoch": 1.8606102635228847, "grad_norm": 0.33384052436968936, "learning_rate": 7.908882501912289e-06, "loss": 0.4476, "step": 2683 }, { "epoch": 1.8613037447988905, "grad_norm": 0.3714640930412473, "learning_rate": 7.90691328275182e-06, "loss": 0.4669, "step": 2684 }, { "epoch": 1.8619972260748958, "grad_norm": 0.3087761374772754, "learning_rate": 7.90494338223193e-06, "loss": 0.4333, "step": 2685 }, { "epoch": 1.8626907073509016, "grad_norm": 0.36038541634353743, "learning_rate": 7.90297280081435e-06, "loss": 0.4655, "step": 2686 }, { "epoch": 1.863384188626907, "grad_norm": 0.3465616915030342, "learning_rate": 7.901001538960968e-06, "loss": 0.4793, "step": 2687 }, { "epoch": 1.8640776699029127, "grad_norm": 0.33590736181306824, "learning_rate": 7.899029597133836e-06, "loss": 0.4266, "step": 2688 }, { "epoch": 1.864771151178918, "grad_norm": 0.31658115418413885, "learning_rate": 7.897056975795163e-06, "loss": 0.4345, "step": 2689 }, { "epoch": 1.8654646324549238, "grad_norm": 0.34315383712519093, "learning_rate": 7.895083675407316e-06, "loss": 0.4863, "step": 2690 }, { "epoch": 1.866158113730929, "grad_norm": 0.34222369247746415, "learning_rate": 7.893109696432824e-06, "loss": 0.539, "step": 2691 }, { "epoch": 1.8668515950069349, "grad_norm": 0.33203178507466397, "learning_rate": 7.89113503933437e-06, "loss": 0.4939, "step": 2692 }, { "epoch": 1.8675450762829402, "grad_norm": 0.36536191361376724, "learning_rate": 7.889159704574803e-06, "loss": 0.4936, "step": 2693 }, { "epoch": 1.868238557558946, "grad_norm": 0.33926462202525187, "learning_rate": 7.887183692617125e-06, "loss": 0.4961, "step": 2694 }, { "epoch": 1.8689320388349513, "grad_norm": 0.35716225248485955, "learning_rate": 7.885207003924498e-06, "loss": 0.5053, "step": 2695 }, { "epoch": 1.869625520110957, "grad_norm": 0.3258382192439157, "learning_rate": 7.883229638960246e-06, "loss": 0.4656, "step": 2696 }, { "epoch": 1.8703190013869624, "grad_norm": 0.34032656016382073, "learning_rate": 7.881251598187848e-06, "loss": 0.5263, "step": 2697 }, { "epoch": 1.8710124826629682, "grad_norm": 0.36476247725557487, "learning_rate": 7.879272882070942e-06, "loss": 0.5284, "step": 2698 }, { "epoch": 1.8717059639389735, "grad_norm": 0.3724168644047688, "learning_rate": 7.877293491073325e-06, "loss": 0.4892, "step": 2699 }, { "epoch": 1.8723994452149793, "grad_norm": 0.38303081629382435, "learning_rate": 7.875313425658955e-06, "loss": 0.4655, "step": 2700 }, { "epoch": 1.8730929264909846, "grad_norm": 0.3091757085341014, "learning_rate": 7.873332686291939e-06, "loss": 0.4978, "step": 2701 }, { "epoch": 1.8737864077669903, "grad_norm": 0.3370798668074445, "learning_rate": 7.87135127343655e-06, "loss": 0.4282, "step": 2702 }, { "epoch": 1.8744798890429957, "grad_norm": 0.3379343713716113, "learning_rate": 7.86936918755722e-06, "loss": 0.4275, "step": 2703 }, { "epoch": 1.8751733703190014, "grad_norm": 0.3587033618478191, "learning_rate": 7.867386429118533e-06, "loss": 0.4646, "step": 2704 }, { "epoch": 1.875866851595007, "grad_norm": 0.3645155839705385, "learning_rate": 7.865402998585234e-06, "loss": 0.535, "step": 2705 }, { "epoch": 1.8765603328710125, "grad_norm": 0.3685151032638385, "learning_rate": 7.863418896422223e-06, "loss": 0.5579, "step": 2706 }, { "epoch": 1.877253814147018, "grad_norm": 0.3315053069842496, "learning_rate": 7.86143412309456e-06, "loss": 0.4734, "step": 2707 }, { "epoch": 1.8779472954230236, "grad_norm": 0.4015552806113017, "learning_rate": 7.859448679067465e-06, "loss": 0.5254, "step": 2708 }, { "epoch": 1.8786407766990292, "grad_norm": 0.38131337231205575, "learning_rate": 7.857462564806306e-06, "loss": 0.5127, "step": 2709 }, { "epoch": 1.8793342579750347, "grad_norm": 0.31246765375989466, "learning_rate": 7.855475780776618e-06, "loss": 0.4548, "step": 2710 }, { "epoch": 1.8800277392510403, "grad_norm": 0.3371769696481026, "learning_rate": 7.853488327444085e-06, "loss": 0.5073, "step": 2711 }, { "epoch": 1.8807212205270458, "grad_norm": 0.38750013754468865, "learning_rate": 7.851500205274556e-06, "loss": 0.549, "step": 2712 }, { "epoch": 1.8814147018030514, "grad_norm": 0.3704598385623532, "learning_rate": 7.849511414734031e-06, "loss": 0.4886, "step": 2713 }, { "epoch": 1.882108183079057, "grad_norm": 0.3680224526882509, "learning_rate": 7.847521956288667e-06, "loss": 0.4985, "step": 2714 }, { "epoch": 1.8828016643550625, "grad_norm": 0.32918344876562927, "learning_rate": 7.845531830404779e-06, "loss": 0.4803, "step": 2715 }, { "epoch": 1.883495145631068, "grad_norm": 0.3624569516420403, "learning_rate": 7.84354103754884e-06, "loss": 0.4642, "step": 2716 }, { "epoch": 1.8841886269070736, "grad_norm": 0.31070697654179563, "learning_rate": 7.841549578187472e-06, "loss": 0.4699, "step": 2717 }, { "epoch": 1.884882108183079, "grad_norm": 0.35520493491293814, "learning_rate": 7.839557452787465e-06, "loss": 0.5316, "step": 2718 }, { "epoch": 1.8855755894590847, "grad_norm": 0.3570877311507715, "learning_rate": 7.837564661815755e-06, "loss": 0.4928, "step": 2719 }, { "epoch": 1.8862690707350902, "grad_norm": 0.36233011008533733, "learning_rate": 7.835571205739438e-06, "loss": 0.4261, "step": 2720 }, { "epoch": 1.8869625520110958, "grad_norm": 0.33391896874979543, "learning_rate": 7.833577085025768e-06, "loss": 0.4577, "step": 2721 }, { "epoch": 1.8876560332871013, "grad_norm": 0.3316454736722919, "learning_rate": 7.831582300142151e-06, "loss": 0.5035, "step": 2722 }, { "epoch": 1.8883495145631068, "grad_norm": 0.3850383328808543, "learning_rate": 7.82958685155615e-06, "loss": 0.4705, "step": 2723 }, { "epoch": 1.8890429958391124, "grad_norm": 0.36773321773866596, "learning_rate": 7.827590739735483e-06, "loss": 0.4951, "step": 2724 }, { "epoch": 1.889736477115118, "grad_norm": 0.4207221739505761, "learning_rate": 7.825593965148027e-06, "loss": 0.5365, "step": 2725 }, { "epoch": 1.8904299583911235, "grad_norm": 0.3405928361906911, "learning_rate": 7.823596528261808e-06, "loss": 0.5039, "step": 2726 }, { "epoch": 1.891123439667129, "grad_norm": 0.3453213609250516, "learning_rate": 7.821598429545011e-06, "loss": 0.484, "step": 2727 }, { "epoch": 1.8918169209431346, "grad_norm": 0.3330266683001898, "learning_rate": 7.819599669465979e-06, "loss": 0.4482, "step": 2728 }, { "epoch": 1.8925104022191401, "grad_norm": 0.3650550445029445, "learning_rate": 7.817600248493205e-06, "loss": 0.5187, "step": 2729 }, { "epoch": 1.8932038834951457, "grad_norm": 0.358788925844345, "learning_rate": 7.815600167095338e-06, "loss": 0.5525, "step": 2730 }, { "epoch": 1.8938973647711512, "grad_norm": 0.32218523816677525, "learning_rate": 7.813599425741183e-06, "loss": 0.4722, "step": 2731 }, { "epoch": 1.8945908460471568, "grad_norm": 0.3456065736851976, "learning_rate": 7.8115980248997e-06, "loss": 0.5284, "step": 2732 }, { "epoch": 1.8952843273231623, "grad_norm": 0.3073424730771214, "learning_rate": 7.809595965040002e-06, "loss": 0.4835, "step": 2733 }, { "epoch": 1.8959778085991679, "grad_norm": 0.3547197711091453, "learning_rate": 7.80759324663136e-06, "loss": 0.4857, "step": 2734 }, { "epoch": 1.8966712898751734, "grad_norm": 0.3563225377706775, "learning_rate": 7.805589870143193e-06, "loss": 0.5348, "step": 2735 }, { "epoch": 1.897364771151179, "grad_norm": 0.3603077268316012, "learning_rate": 7.80358583604508e-06, "loss": 0.4679, "step": 2736 }, { "epoch": 1.8980582524271845, "grad_norm": 0.3724255462235156, "learning_rate": 7.801581144806752e-06, "loss": 0.5121, "step": 2737 }, { "epoch": 1.89875173370319, "grad_norm": 0.3706606750205616, "learning_rate": 7.799575796898091e-06, "loss": 0.4483, "step": 2738 }, { "epoch": 1.8994452149791956, "grad_norm": 0.3514347423782457, "learning_rate": 7.797569792789142e-06, "loss": 0.4928, "step": 2739 }, { "epoch": 1.9001386962552012, "grad_norm": 0.3723169049743826, "learning_rate": 7.795563132950092e-06, "loss": 0.5125, "step": 2740 }, { "epoch": 1.9008321775312067, "grad_norm": 0.3568896023824196, "learning_rate": 7.79355581785129e-06, "loss": 0.5542, "step": 2741 }, { "epoch": 1.9015256588072122, "grad_norm": 0.3478798711559774, "learning_rate": 7.791547847963237e-06, "loss": 0.5586, "step": 2742 }, { "epoch": 1.9022191400832178, "grad_norm": 0.3853549614511584, "learning_rate": 7.789539223756588e-06, "loss": 0.5559, "step": 2743 }, { "epoch": 1.9029126213592233, "grad_norm": 0.3342837293657271, "learning_rate": 7.787529945702145e-06, "loss": 0.4549, "step": 2744 }, { "epoch": 1.903606102635229, "grad_norm": 0.32087241344602785, "learning_rate": 7.785520014270872e-06, "loss": 0.4428, "step": 2745 }, { "epoch": 1.9042995839112344, "grad_norm": 0.3186490035876303, "learning_rate": 7.78350942993388e-06, "loss": 0.455, "step": 2746 }, { "epoch": 1.90499306518724, "grad_norm": 0.31509241310589053, "learning_rate": 7.781498193162438e-06, "loss": 0.4628, "step": 2747 }, { "epoch": 1.9056865464632455, "grad_norm": 0.3553360158393702, "learning_rate": 7.779486304427963e-06, "loss": 0.5081, "step": 2748 }, { "epoch": 1.906380027739251, "grad_norm": 0.3566553822952972, "learning_rate": 7.77747376420203e-06, "loss": 0.5137, "step": 2749 }, { "epoch": 1.9070735090152566, "grad_norm": 0.3722863664868598, "learning_rate": 7.775460572956361e-06, "loss": 0.5842, "step": 2750 }, { "epoch": 1.9077669902912622, "grad_norm": 0.3543461820550464, "learning_rate": 7.773446731162835e-06, "loss": 0.4936, "step": 2751 }, { "epoch": 1.9084604715672677, "grad_norm": 0.32936054817425064, "learning_rate": 7.771432239293481e-06, "loss": 0.514, "step": 2752 }, { "epoch": 1.9091539528432733, "grad_norm": 0.3136306514466323, "learning_rate": 7.769417097820481e-06, "loss": 0.4096, "step": 2753 }, { "epoch": 1.9098474341192788, "grad_norm": 0.3516954106782875, "learning_rate": 7.767401307216172e-06, "loss": 0.4762, "step": 2754 }, { "epoch": 1.9105409153952844, "grad_norm": 0.9020798668799241, "learning_rate": 7.765384867953038e-06, "loss": 0.4829, "step": 2755 }, { "epoch": 1.91123439667129, "grad_norm": 0.3463716414672137, "learning_rate": 7.763367780503719e-06, "loss": 0.5018, "step": 2756 }, { "epoch": 1.9119278779472955, "grad_norm": 0.38085216185148424, "learning_rate": 7.761350045341008e-06, "loss": 0.5202, "step": 2757 }, { "epoch": 1.912621359223301, "grad_norm": 0.3353323256554222, "learning_rate": 7.759331662937841e-06, "loss": 0.4762, "step": 2758 }, { "epoch": 1.9133148404993066, "grad_norm": 0.34961297442086087, "learning_rate": 7.757312633767318e-06, "loss": 0.4961, "step": 2759 }, { "epoch": 1.914008321775312, "grad_norm": 0.362212565909931, "learning_rate": 7.755292958302683e-06, "loss": 0.4875, "step": 2760 }, { "epoch": 1.9147018030513177, "grad_norm": 0.32826385896123367, "learning_rate": 7.753272637017333e-06, "loss": 0.4604, "step": 2761 }, { "epoch": 1.9153952843273232, "grad_norm": 0.3843771857848706, "learning_rate": 7.751251670384818e-06, "loss": 0.4562, "step": 2762 }, { "epoch": 1.9160887656033287, "grad_norm": 0.33096456418013687, "learning_rate": 7.749230058878836e-06, "loss": 0.5049, "step": 2763 }, { "epoch": 1.9167822468793343, "grad_norm": 0.39058020155023987, "learning_rate": 7.74720780297324e-06, "loss": 0.5548, "step": 2764 }, { "epoch": 1.9174757281553398, "grad_norm": 0.33683295308446404, "learning_rate": 7.745184903142029e-06, "loss": 0.4681, "step": 2765 }, { "epoch": 1.9181692094313454, "grad_norm": 0.34708818095467187, "learning_rate": 7.74316135985936e-06, "loss": 0.4647, "step": 2766 }, { "epoch": 1.918862690707351, "grad_norm": 0.4075859168031003, "learning_rate": 7.741137173599535e-06, "loss": 0.5048, "step": 2767 }, { "epoch": 1.9195561719833565, "grad_norm": 0.38467243108916777, "learning_rate": 7.73911234483701e-06, "loss": 0.5401, "step": 2768 }, { "epoch": 1.920249653259362, "grad_norm": 0.32085151563485503, "learning_rate": 7.737086874046387e-06, "loss": 0.4932, "step": 2769 }, { "epoch": 1.9209431345353676, "grad_norm": 0.38608992478875187, "learning_rate": 7.735060761702425e-06, "loss": 0.4777, "step": 2770 }, { "epoch": 1.9216366158113731, "grad_norm": 0.35737555911257957, "learning_rate": 7.733034008280027e-06, "loss": 0.5644, "step": 2771 }, { "epoch": 1.9223300970873787, "grad_norm": 0.3064138618678669, "learning_rate": 7.731006614254252e-06, "loss": 0.4465, "step": 2772 }, { "epoch": 1.9230235783633842, "grad_norm": 0.3286043684447899, "learning_rate": 7.728978580100304e-06, "loss": 0.4594, "step": 2773 }, { "epoch": 1.9237170596393898, "grad_norm": 0.3768782061453021, "learning_rate": 7.726949906293544e-06, "loss": 0.4771, "step": 2774 }, { "epoch": 1.9244105409153953, "grad_norm": 0.364390945751054, "learning_rate": 7.724920593309474e-06, "loss": 0.5307, "step": 2775 }, { "epoch": 1.9251040221914009, "grad_norm": 0.40577039878012, "learning_rate": 7.722890641623752e-06, "loss": 0.518, "step": 2776 }, { "epoch": 1.9257975034674064, "grad_norm": 0.33581686460478327, "learning_rate": 7.720860051712183e-06, "loss": 0.4859, "step": 2777 }, { "epoch": 1.926490984743412, "grad_norm": 0.46543626832142315, "learning_rate": 7.718828824050722e-06, "loss": 0.4795, "step": 2778 }, { "epoch": 1.9271844660194175, "grad_norm": 0.35468745606402036, "learning_rate": 7.716796959115479e-06, "loss": 0.4779, "step": 2779 }, { "epoch": 1.927877947295423, "grad_norm": 0.3788801104084805, "learning_rate": 7.714764457382702e-06, "loss": 0.5067, "step": 2780 }, { "epoch": 1.9285714285714286, "grad_norm": 0.364114949919868, "learning_rate": 7.712731319328798e-06, "loss": 0.4666, "step": 2781 }, { "epoch": 1.9292649098474342, "grad_norm": 0.3718239953310221, "learning_rate": 7.71069754543032e-06, "loss": 0.4694, "step": 2782 }, { "epoch": 1.9299583911234397, "grad_norm": 0.36806678477105553, "learning_rate": 7.708663136163967e-06, "loss": 0.5129, "step": 2783 }, { "epoch": 1.9306518723994452, "grad_norm": 0.326818402462561, "learning_rate": 7.706628092006594e-06, "loss": 0.4398, "step": 2784 }, { "epoch": 1.9313453536754508, "grad_norm": 0.3223274110953189, "learning_rate": 7.7045924134352e-06, "loss": 0.4829, "step": 2785 }, { "epoch": 1.9320388349514563, "grad_norm": 0.3977613310599147, "learning_rate": 7.70255610092693e-06, "loss": 0.5025, "step": 2786 }, { "epoch": 1.9327323162274619, "grad_norm": 0.3254185114008018, "learning_rate": 7.700519154959081e-06, "loss": 0.5241, "step": 2787 }, { "epoch": 1.9334257975034674, "grad_norm": 0.3183943101006737, "learning_rate": 7.698481576009102e-06, "loss": 0.4771, "step": 2788 }, { "epoch": 1.934119278779473, "grad_norm": 0.36246545128956614, "learning_rate": 7.696443364554584e-06, "loss": 0.4734, "step": 2789 }, { "epoch": 1.9348127600554785, "grad_norm": 0.3484678616251616, "learning_rate": 7.694404521073273e-06, "loss": 0.495, "step": 2790 }, { "epoch": 1.935506241331484, "grad_norm": 0.3668175506071657, "learning_rate": 7.692365046043053e-06, "loss": 0.4889, "step": 2791 }, { "epoch": 1.9361997226074896, "grad_norm": 0.3463910744025464, "learning_rate": 7.690324939941964e-06, "loss": 0.5102, "step": 2792 }, { "epoch": 1.9368932038834952, "grad_norm": 0.35160264974419514, "learning_rate": 7.688284203248197e-06, "loss": 0.4805, "step": 2793 }, { "epoch": 1.9375866851595007, "grad_norm": 0.3512818815864015, "learning_rate": 7.686242836440081e-06, "loss": 0.4844, "step": 2794 }, { "epoch": 1.9382801664355063, "grad_norm": 0.4050336666596714, "learning_rate": 7.684200839996099e-06, "loss": 0.5719, "step": 2795 }, { "epoch": 1.9389736477115118, "grad_norm": 0.36132159999999863, "learning_rate": 7.682158214394878e-06, "loss": 0.5198, "step": 2796 }, { "epoch": 1.9396671289875174, "grad_norm": 0.3638888963990286, "learning_rate": 7.680114960115198e-06, "loss": 0.5061, "step": 2797 }, { "epoch": 1.940360610263523, "grad_norm": 0.33836739852120684, "learning_rate": 7.678071077635981e-06, "loss": 0.5627, "step": 2798 }, { "epoch": 1.9410540915395285, "grad_norm": 0.3396319108032648, "learning_rate": 7.676026567436301e-06, "loss": 0.4828, "step": 2799 }, { "epoch": 1.941747572815534, "grad_norm": 0.3530008796442068, "learning_rate": 7.673981429995372e-06, "loss": 0.5012, "step": 2800 }, { "epoch": 1.9424410540915396, "grad_norm": 0.38349473669569306, "learning_rate": 7.671935665792563e-06, "loss": 0.5359, "step": 2801 }, { "epoch": 1.943134535367545, "grad_norm": 0.3742179351323327, "learning_rate": 7.669889275307384e-06, "loss": 0.4488, "step": 2802 }, { "epoch": 1.9438280166435506, "grad_norm": 0.3367164020056258, "learning_rate": 7.667842259019495e-06, "loss": 0.4947, "step": 2803 }, { "epoch": 1.9445214979195562, "grad_norm": 0.3188297402918376, "learning_rate": 7.665794617408703e-06, "loss": 0.4908, "step": 2804 }, { "epoch": 1.9452149791955617, "grad_norm": 0.34876096692317154, "learning_rate": 7.663746350954957e-06, "loss": 0.4422, "step": 2805 }, { "epoch": 1.9459084604715673, "grad_norm": 0.47167493465687316, "learning_rate": 7.661697460138362e-06, "loss": 0.4648, "step": 2806 }, { "epoch": 1.9466019417475728, "grad_norm": 0.3340119781612177, "learning_rate": 7.659647945439157e-06, "loss": 0.491, "step": 2807 }, { "epoch": 1.9472954230235784, "grad_norm": 0.35635117982991515, "learning_rate": 7.657597807337735e-06, "loss": 0.4986, "step": 2808 }, { "epoch": 1.947988904299584, "grad_norm": 0.3099310922886787, "learning_rate": 7.655547046314635e-06, "loss": 0.4429, "step": 2809 }, { "epoch": 1.9486823855755895, "grad_norm": 0.335658427457914, "learning_rate": 7.65349566285054e-06, "loss": 0.4762, "step": 2810 }, { "epoch": 1.949375866851595, "grad_norm": 0.36516184574224014, "learning_rate": 7.651443657426279e-06, "loss": 0.4886, "step": 2811 }, { "epoch": 1.9500693481276006, "grad_norm": 0.3334221155337291, "learning_rate": 7.649391030522828e-06, "loss": 0.4539, "step": 2812 }, { "epoch": 1.9507628294036061, "grad_norm": 0.3628991245166753, "learning_rate": 7.647337782621308e-06, "loss": 0.5172, "step": 2813 }, { "epoch": 1.9514563106796117, "grad_norm": 0.3456110006514414, "learning_rate": 7.645283914202981e-06, "loss": 0.4333, "step": 2814 }, { "epoch": 1.9521497919556172, "grad_norm": 0.31888828958161214, "learning_rate": 7.643229425749265e-06, "loss": 0.4501, "step": 2815 }, { "epoch": 1.9528432732316228, "grad_norm": 0.3622790204057956, "learning_rate": 7.641174317741716e-06, "loss": 0.4869, "step": 2816 }, { "epoch": 1.9535367545076283, "grad_norm": 0.3419542518804844, "learning_rate": 7.639118590662033e-06, "loss": 0.5091, "step": 2817 }, { "epoch": 1.9542302357836339, "grad_norm": 0.3599515415634431, "learning_rate": 7.637062244992065e-06, "loss": 0.487, "step": 2818 }, { "epoch": 1.9549237170596394, "grad_norm": 0.4461919939769592, "learning_rate": 7.635005281213808e-06, "loss": 0.5199, "step": 2819 }, { "epoch": 1.955617198335645, "grad_norm": 0.3228715182026137, "learning_rate": 7.632947699809395e-06, "loss": 0.4787, "step": 2820 }, { "epoch": 1.9563106796116505, "grad_norm": 0.3538224220591444, "learning_rate": 7.63088950126111e-06, "loss": 0.5129, "step": 2821 }, { "epoch": 1.957004160887656, "grad_norm": 0.32099229921982103, "learning_rate": 7.6288306860513804e-06, "loss": 0.4536, "step": 2822 }, { "epoch": 1.9576976421636616, "grad_norm": 0.3698865009052739, "learning_rate": 7.626771254662776e-06, "loss": 0.5426, "step": 2823 }, { "epoch": 1.9583911234396671, "grad_norm": 0.32399056399673626, "learning_rate": 7.624711207578015e-06, "loss": 0.4455, "step": 2824 }, { "epoch": 1.9590846047156727, "grad_norm": 0.29898733204345623, "learning_rate": 7.622650545279954e-06, "loss": 0.4694, "step": 2825 }, { "epoch": 1.9597780859916782, "grad_norm": 0.3401556086147256, "learning_rate": 7.6205892682516e-06, "loss": 0.4763, "step": 2826 }, { "epoch": 1.9604715672676838, "grad_norm": 0.36168017056263757, "learning_rate": 7.6185273769761015e-06, "loss": 0.4781, "step": 2827 }, { "epoch": 1.9611650485436893, "grad_norm": 0.3569963385716288, "learning_rate": 7.616464871936748e-06, "loss": 0.5229, "step": 2828 }, { "epoch": 1.9618585298196949, "grad_norm": 0.5028901182808291, "learning_rate": 7.61440175361698e-06, "loss": 0.5477, "step": 2829 }, { "epoch": 1.9625520110957004, "grad_norm": 0.3862379396196428, "learning_rate": 7.612338022500375e-06, "loss": 0.5427, "step": 2830 }, { "epoch": 1.963245492371706, "grad_norm": 0.3175003879623789, "learning_rate": 7.6102736790706575e-06, "loss": 0.4993, "step": 2831 }, { "epoch": 1.9639389736477115, "grad_norm": 0.375401967459228, "learning_rate": 7.608208723811693e-06, "loss": 0.4883, "step": 2832 }, { "epoch": 1.964632454923717, "grad_norm": 0.3399053720557915, "learning_rate": 7.606143157207493e-06, "loss": 0.5016, "step": 2833 }, { "epoch": 1.9653259361997226, "grad_norm": 0.41120677640801506, "learning_rate": 7.604076979742212e-06, "loss": 0.5139, "step": 2834 }, { "epoch": 1.9660194174757282, "grad_norm": 0.3378315201752423, "learning_rate": 7.602010191900147e-06, "loss": 0.4636, "step": 2835 }, { "epoch": 1.9667128987517337, "grad_norm": 0.33524211782526003, "learning_rate": 7.599942794165738e-06, "loss": 0.5007, "step": 2836 }, { "epoch": 1.9674063800277393, "grad_norm": 0.3440131442432083, "learning_rate": 7.597874787023565e-06, "loss": 0.5117, "step": 2837 }, { "epoch": 1.9680998613037448, "grad_norm": 0.34427218589372166, "learning_rate": 7.59580617095836e-06, "loss": 0.4298, "step": 2838 }, { "epoch": 1.9687933425797504, "grad_norm": 0.3498259579987603, "learning_rate": 7.593736946454986e-06, "loss": 0.4979, "step": 2839 }, { "epoch": 1.969486823855756, "grad_norm": 0.31895484693353576, "learning_rate": 7.591667113998458e-06, "loss": 0.4851, "step": 2840 }, { "epoch": 1.9701803051317615, "grad_norm": 0.3017840035013218, "learning_rate": 7.589596674073927e-06, "loss": 0.4467, "step": 2841 }, { "epoch": 1.970873786407767, "grad_norm": 0.37740803352290486, "learning_rate": 7.587525627166691e-06, "loss": 0.4592, "step": 2842 }, { "epoch": 1.9715672676837726, "grad_norm": 0.36929200701190473, "learning_rate": 7.585453973762188e-06, "loss": 0.4949, "step": 2843 }, { "epoch": 1.972260748959778, "grad_norm": 0.32701615822251884, "learning_rate": 7.583381714345999e-06, "loss": 0.5043, "step": 2844 }, { "epoch": 1.9729542302357836, "grad_norm": 0.38039091569376876, "learning_rate": 7.581308849403843e-06, "loss": 0.4584, "step": 2845 }, { "epoch": 1.9736477115117892, "grad_norm": 0.3740860012794304, "learning_rate": 7.5792353794215885e-06, "loss": 0.5545, "step": 2846 }, { "epoch": 1.9743411927877947, "grad_norm": 0.31243044334493275, "learning_rate": 7.577161304885242e-06, "loss": 0.4663, "step": 2847 }, { "epoch": 1.9750346740638003, "grad_norm": 0.3380881854703883, "learning_rate": 7.575086626280951e-06, "loss": 0.4747, "step": 2848 }, { "epoch": 1.9757281553398058, "grad_norm": 0.37490035147652145, "learning_rate": 7.573011344095002e-06, "loss": 0.4645, "step": 2849 }, { "epoch": 1.9764216366158114, "grad_norm": 0.3485353886235364, "learning_rate": 7.5709354588138296e-06, "loss": 0.517, "step": 2850 }, { "epoch": 1.977115117891817, "grad_norm": 0.3654716702171952, "learning_rate": 7.568858970924006e-06, "loss": 0.5205, "step": 2851 }, { "epoch": 1.9778085991678225, "grad_norm": 0.3639664302165563, "learning_rate": 7.566781880912244e-06, "loss": 0.4633, "step": 2852 }, { "epoch": 1.978502080443828, "grad_norm": 0.34349319790231575, "learning_rate": 7.564704189265397e-06, "loss": 0.4813, "step": 2853 }, { "epoch": 1.9791955617198336, "grad_norm": 0.3361512833833259, "learning_rate": 7.5626258964704634e-06, "loss": 0.4829, "step": 2854 }, { "epoch": 1.9798890429958391, "grad_norm": 0.33618350305353856, "learning_rate": 7.56054700301458e-06, "loss": 0.4414, "step": 2855 }, { "epoch": 1.9805825242718447, "grad_norm": 0.6431040010129025, "learning_rate": 7.558467509385023e-06, "loss": 0.4686, "step": 2856 }, { "epoch": 1.9812760055478502, "grad_norm": 0.30993907367331835, "learning_rate": 7.5563874160692105e-06, "loss": 0.4699, "step": 2857 }, { "epoch": 1.9819694868238558, "grad_norm": 0.3230497093400171, "learning_rate": 7.554306723554702e-06, "loss": 0.4792, "step": 2858 }, { "epoch": 1.9826629680998613, "grad_norm": 0.38336764278694396, "learning_rate": 7.552225432329196e-06, "loss": 0.4523, "step": 2859 }, { "epoch": 1.9833564493758669, "grad_norm": 0.3489004199044597, "learning_rate": 7.5501435428805345e-06, "loss": 0.4555, "step": 2860 }, { "epoch": 1.9840499306518724, "grad_norm": 0.3386246511215877, "learning_rate": 7.548061055696696e-06, "loss": 0.504, "step": 2861 }, { "epoch": 1.984743411927878, "grad_norm": 0.3369402473820756, "learning_rate": 7.545977971265799e-06, "loss": 0.4438, "step": 2862 }, { "epoch": 1.9854368932038835, "grad_norm": 0.30702527260405243, "learning_rate": 7.5438942900761035e-06, "loss": 0.4479, "step": 2863 }, { "epoch": 1.986130374479889, "grad_norm": 0.31792988533349564, "learning_rate": 7.541810012616011e-06, "loss": 0.4393, "step": 2864 }, { "epoch": 1.9868238557558946, "grad_norm": 0.331116213260764, "learning_rate": 7.53972513937406e-06, "loss": 0.5213, "step": 2865 }, { "epoch": 1.9875173370319001, "grad_norm": 0.35078977085481916, "learning_rate": 7.53763967083893e-06, "loss": 0.4797, "step": 2866 }, { "epoch": 1.9882108183079057, "grad_norm": 0.34163638207233277, "learning_rate": 7.535553607499438e-06, "loss": 0.4716, "step": 2867 }, { "epoch": 1.9889042995839112, "grad_norm": 0.33940154559423774, "learning_rate": 7.5334669498445454e-06, "loss": 0.5085, "step": 2868 }, { "epoch": 1.9895977808599168, "grad_norm": 0.3333087061478696, "learning_rate": 7.531379698363348e-06, "loss": 0.5063, "step": 2869 }, { "epoch": 1.9902912621359223, "grad_norm": 0.3649615626540554, "learning_rate": 7.529291853545082e-06, "loss": 0.4847, "step": 2870 }, { "epoch": 1.9909847434119279, "grad_norm": 0.7002542693293046, "learning_rate": 7.527203415879125e-06, "loss": 0.482, "step": 2871 }, { "epoch": 1.9916782246879334, "grad_norm": 0.345445271912478, "learning_rate": 7.525114385854988e-06, "loss": 0.4726, "step": 2872 }, { "epoch": 1.992371705963939, "grad_norm": 0.3549213985304822, "learning_rate": 7.523024763962328e-06, "loss": 0.4848, "step": 2873 }, { "epoch": 1.9930651872399445, "grad_norm": 0.41542124087712934, "learning_rate": 7.5209345506909346e-06, "loss": 0.4491, "step": 2874 }, { "epoch": 1.99375866851595, "grad_norm": 0.34636334795885454, "learning_rate": 7.5188437465307415e-06, "loss": 0.5321, "step": 2875 }, { "epoch": 1.9944521497919556, "grad_norm": 0.34611763216721525, "learning_rate": 7.5167523519718155e-06, "loss": 0.4643, "step": 2876 }, { "epoch": 1.9951456310679612, "grad_norm": 0.3557109697301609, "learning_rate": 7.514660367504368e-06, "loss": 0.4692, "step": 2877 }, { "epoch": 1.9958391123439667, "grad_norm": 0.4323397683262994, "learning_rate": 7.512567793618738e-06, "loss": 0.4938, "step": 2878 }, { "epoch": 1.9965325936199723, "grad_norm": 0.3561912840695031, "learning_rate": 7.5104746308054165e-06, "loss": 0.4396, "step": 2879 }, { "epoch": 1.9972260748959778, "grad_norm": 0.34254835637409276, "learning_rate": 7.508380879555024e-06, "loss": 0.5006, "step": 2880 }, { "epoch": 1.9979195561719834, "grad_norm": 0.38501081852223085, "learning_rate": 7.506286540358318e-06, "loss": 0.4974, "step": 2881 }, { "epoch": 1.998613037447989, "grad_norm": 0.3069095824305059, "learning_rate": 7.5041916137062e-06, "loss": 0.4315, "step": 2882 }, { "epoch": 1.9993065187239945, "grad_norm": 0.36986927906164246, "learning_rate": 7.502096100089702e-06, "loss": 0.5097, "step": 2883 }, { "epoch": 2.0, "grad_norm": 0.3397957650017145, "learning_rate": 7.500000000000001e-06, "loss": 0.5056, "step": 2884 }, { "epoch": 2.0006934812760058, "grad_norm": 0.7146834103015289, "learning_rate": 7.497903313928405e-06, "loss": 0.4523, "step": 2885 }, { "epoch": 2.001386962552011, "grad_norm": 0.6180314412448197, "learning_rate": 7.495806042366363e-06, "loss": 0.4056, "step": 2886 }, { "epoch": 2.002080443828017, "grad_norm": 0.29687979662832115, "learning_rate": 7.493708185805459e-06, "loss": 0.348, "step": 2887 }, { "epoch": 2.002773925104022, "grad_norm": 0.3614447867498667, "learning_rate": 7.4916097447374194e-06, "loss": 0.4276, "step": 2888 }, { "epoch": 2.003467406380028, "grad_norm": 0.4058802506700272, "learning_rate": 7.4895107196541e-06, "loss": 0.441, "step": 2889 }, { "epoch": 2.0041608876560333, "grad_norm": 0.3420417130227295, "learning_rate": 7.4874111110474955e-06, "loss": 0.4234, "step": 2890 }, { "epoch": 2.004854368932039, "grad_norm": 0.38846000992080426, "learning_rate": 7.485310919409742e-06, "loss": 0.4373, "step": 2891 }, { "epoch": 2.0055478502080444, "grad_norm": 0.3623736861191617, "learning_rate": 7.48321014523311e-06, "loss": 0.4106, "step": 2892 }, { "epoch": 2.00624133148405, "grad_norm": 0.3642800458680427, "learning_rate": 7.481108789010003e-06, "loss": 0.433, "step": 2893 }, { "epoch": 2.0069348127600555, "grad_norm": 0.36524010205980534, "learning_rate": 7.479006851232965e-06, "loss": 0.4436, "step": 2894 }, { "epoch": 2.0076282940360612, "grad_norm": 0.3429227978002902, "learning_rate": 7.4769043323946746e-06, "loss": 0.428, "step": 2895 }, { "epoch": 2.0083217753120666, "grad_norm": 0.35987936381576785, "learning_rate": 7.474801232987948e-06, "loss": 0.4653, "step": 2896 }, { "epoch": 2.0090152565880723, "grad_norm": 0.332590635210399, "learning_rate": 7.472697553505736e-06, "loss": 0.3612, "step": 2897 }, { "epoch": 2.0097087378640777, "grad_norm": 0.3490804631637133, "learning_rate": 7.470593294441124e-06, "loss": 0.4747, "step": 2898 }, { "epoch": 2.0104022191400834, "grad_norm": 0.4125588583396638, "learning_rate": 7.4684884562873375e-06, "loss": 0.4373, "step": 2899 }, { "epoch": 2.0110957004160888, "grad_norm": 0.34728420195893356, "learning_rate": 7.466383039537735e-06, "loss": 0.4514, "step": 2900 }, { "epoch": 2.0117891816920945, "grad_norm": 0.33132310844990925, "learning_rate": 7.46427704468581e-06, "loss": 0.3716, "step": 2901 }, { "epoch": 2.0124826629681, "grad_norm": 0.37546869539441957, "learning_rate": 7.462170472225194e-06, "loss": 0.4145, "step": 2902 }, { "epoch": 2.0131761442441056, "grad_norm": 0.4315665627849969, "learning_rate": 7.4600633226496485e-06, "loss": 0.4715, "step": 2903 }, { "epoch": 2.013869625520111, "grad_norm": 0.3437342558007185, "learning_rate": 7.4579555964530795e-06, "loss": 0.3959, "step": 2904 }, { "epoch": 2.0145631067961167, "grad_norm": 0.34810430787405483, "learning_rate": 7.455847294129519e-06, "loss": 0.4211, "step": 2905 }, { "epoch": 2.015256588072122, "grad_norm": 0.37888935111198124, "learning_rate": 7.453738416173139e-06, "loss": 0.4392, "step": 2906 }, { "epoch": 2.015950069348128, "grad_norm": 0.4734181119166493, "learning_rate": 7.451628963078245e-06, "loss": 0.3913, "step": 2907 }, { "epoch": 2.016643550624133, "grad_norm": 0.34919890086071553, "learning_rate": 7.449518935339276e-06, "loss": 0.4501, "step": 2908 }, { "epoch": 2.017337031900139, "grad_norm": 0.3703329840280313, "learning_rate": 7.447408333450811e-06, "loss": 0.5084, "step": 2909 }, { "epoch": 2.0180305131761442, "grad_norm": 0.3525130938500406, "learning_rate": 7.445297157907557e-06, "loss": 0.4458, "step": 2910 }, { "epoch": 2.01872399445215, "grad_norm": 0.37238934327990236, "learning_rate": 7.443185409204359e-06, "loss": 0.4836, "step": 2911 }, { "epoch": 2.0194174757281553, "grad_norm": 0.361261183126272, "learning_rate": 7.4410730878361936e-06, "loss": 0.407, "step": 2912 }, { "epoch": 2.020110957004161, "grad_norm": 0.37564813665004965, "learning_rate": 7.438960194298178e-06, "loss": 0.4606, "step": 2913 }, { "epoch": 2.0208044382801664, "grad_norm": 0.3352371928825262, "learning_rate": 7.436846729085556e-06, "loss": 0.4128, "step": 2914 }, { "epoch": 2.021497919556172, "grad_norm": 0.4015755166593209, "learning_rate": 7.434732692693708e-06, "loss": 0.459, "step": 2915 }, { "epoch": 2.0221914008321775, "grad_norm": 0.36629021998122735, "learning_rate": 7.432618085618152e-06, "loss": 0.4833, "step": 2916 }, { "epoch": 2.0228848821081833, "grad_norm": 0.3774399648148289, "learning_rate": 7.430502908354532e-06, "loss": 0.4579, "step": 2917 }, { "epoch": 2.0235783633841886, "grad_norm": 0.37726337936849413, "learning_rate": 7.428387161398635e-06, "loss": 0.3907, "step": 2918 }, { "epoch": 2.0242718446601944, "grad_norm": 0.34428719555303283, "learning_rate": 7.426270845246373e-06, "loss": 0.4596, "step": 2919 }, { "epoch": 2.0249653259361997, "grad_norm": 0.33825621691918, "learning_rate": 7.424153960393798e-06, "loss": 0.4181, "step": 2920 }, { "epoch": 2.0256588072122055, "grad_norm": 0.33770870571869777, "learning_rate": 7.42203650733709e-06, "loss": 0.4201, "step": 2921 }, { "epoch": 2.026352288488211, "grad_norm": 0.3687196751817226, "learning_rate": 7.419918486572568e-06, "loss": 0.3984, "step": 2922 }, { "epoch": 2.0270457697642166, "grad_norm": 0.42522280921204547, "learning_rate": 7.417799898596676e-06, "loss": 0.4014, "step": 2923 }, { "epoch": 2.027739251040222, "grad_norm": 0.3531416005902581, "learning_rate": 7.415680743906001e-06, "loss": 0.3954, "step": 2924 }, { "epoch": 2.0284327323162277, "grad_norm": 0.3540475558874119, "learning_rate": 7.413561022997253e-06, "loss": 0.4317, "step": 2925 }, { "epoch": 2.029126213592233, "grad_norm": 0.3910373342437148, "learning_rate": 7.411440736367281e-06, "loss": 0.4213, "step": 2926 }, { "epoch": 2.0298196948682388, "grad_norm": 0.35841904355533144, "learning_rate": 7.4093198845130666e-06, "loss": 0.4636, "step": 2927 }, { "epoch": 2.030513176144244, "grad_norm": 0.3835539168709186, "learning_rate": 7.407198467931718e-06, "loss": 0.4555, "step": 2928 }, { "epoch": 2.03120665742025, "grad_norm": 0.3556215946429487, "learning_rate": 7.405076487120484e-06, "loss": 0.4876, "step": 2929 }, { "epoch": 2.031900138696255, "grad_norm": 0.39795708209783903, "learning_rate": 7.402953942576738e-06, "loss": 0.4596, "step": 2930 }, { "epoch": 2.032593619972261, "grad_norm": 0.35453873412030984, "learning_rate": 7.400830834797993e-06, "loss": 0.4156, "step": 2931 }, { "epoch": 2.0332871012482663, "grad_norm": 0.36434897526467763, "learning_rate": 7.398707164281887e-06, "loss": 0.4472, "step": 2932 }, { "epoch": 2.033980582524272, "grad_norm": 0.35989755605118917, "learning_rate": 7.396582931526194e-06, "loss": 0.4459, "step": 2933 }, { "epoch": 2.0346740638002774, "grad_norm": 0.38672833092948306, "learning_rate": 7.394458137028818e-06, "loss": 0.4441, "step": 2934 }, { "epoch": 2.035367545076283, "grad_norm": 0.32193021063115684, "learning_rate": 7.392332781287798e-06, "loss": 0.4076, "step": 2935 }, { "epoch": 2.0360610263522885, "grad_norm": 0.3484149984725337, "learning_rate": 7.390206864801298e-06, "loss": 0.4895, "step": 2936 }, { "epoch": 2.0367545076282942, "grad_norm": 0.37047634197176654, "learning_rate": 7.388080388067621e-06, "loss": 0.457, "step": 2937 }, { "epoch": 2.0374479889042996, "grad_norm": 0.35108587056971163, "learning_rate": 7.3859533515851955e-06, "loss": 0.4553, "step": 2938 }, { "epoch": 2.0381414701803053, "grad_norm": 0.3703643889712212, "learning_rate": 7.383825755852585e-06, "loss": 0.483, "step": 2939 }, { "epoch": 2.0388349514563107, "grad_norm": 0.3630344879653978, "learning_rate": 7.381697601368481e-06, "loss": 0.4121, "step": 2940 }, { "epoch": 2.0395284327323164, "grad_norm": 0.3302945348077549, "learning_rate": 7.37956888863171e-06, "loss": 0.4192, "step": 2941 }, { "epoch": 2.0402219140083218, "grad_norm": 0.33815624190891896, "learning_rate": 7.3774396181412235e-06, "loss": 0.4395, "step": 2942 }, { "epoch": 2.0409153952843275, "grad_norm": 0.4470430985635722, "learning_rate": 7.375309790396108e-06, "loss": 0.3777, "step": 2943 }, { "epoch": 2.041608876560333, "grad_norm": 0.36314519372452403, "learning_rate": 7.373179405895582e-06, "loss": 0.4091, "step": 2944 }, { "epoch": 2.0423023578363386, "grad_norm": 0.3673466840452378, "learning_rate": 7.37104846513899e-06, "loss": 0.453, "step": 2945 }, { "epoch": 2.042995839112344, "grad_norm": 0.3522877191809448, "learning_rate": 7.3689169686258096e-06, "loss": 0.4332, "step": 2946 }, { "epoch": 2.0436893203883497, "grad_norm": 0.37520075390767993, "learning_rate": 7.36678491685565e-06, "loss": 0.4642, "step": 2947 }, { "epoch": 2.044382801664355, "grad_norm": 0.3578421132400116, "learning_rate": 7.364652310328244e-06, "loss": 0.4158, "step": 2948 }, { "epoch": 2.045076282940361, "grad_norm": 0.3632511967074461, "learning_rate": 7.362519149543464e-06, "loss": 0.467, "step": 2949 }, { "epoch": 2.045769764216366, "grad_norm": 0.355434381152111, "learning_rate": 7.360385435001306e-06, "loss": 0.4266, "step": 2950 }, { "epoch": 2.046463245492372, "grad_norm": 0.38927581263690186, "learning_rate": 7.358251167201896e-06, "loss": 0.5017, "step": 2951 }, { "epoch": 2.0471567267683772, "grad_norm": 0.35293879801406647, "learning_rate": 7.356116346645491e-06, "loss": 0.4259, "step": 2952 }, { "epoch": 2.047850208044383, "grad_norm": 0.3249376480950482, "learning_rate": 7.353980973832479e-06, "loss": 0.3646, "step": 2953 }, { "epoch": 2.0485436893203883, "grad_norm": 0.35646154115316836, "learning_rate": 7.351845049263374e-06, "loss": 0.4893, "step": 2954 }, { "epoch": 2.049237170596394, "grad_norm": 0.3946491137363803, "learning_rate": 7.349708573438824e-06, "loss": 0.4026, "step": 2955 }, { "epoch": 2.0499306518723994, "grad_norm": 0.42958126090594545, "learning_rate": 7.3475715468596e-06, "loss": 0.4654, "step": 2956 }, { "epoch": 2.050624133148405, "grad_norm": 0.37665937795516446, "learning_rate": 7.345433970026607e-06, "loss": 0.3921, "step": 2957 }, { "epoch": 2.0513176144244105, "grad_norm": 0.36466148379806207, "learning_rate": 7.3432958434408806e-06, "loss": 0.4573, "step": 2958 }, { "epoch": 2.0520110957004163, "grad_norm": 0.3213186297254778, "learning_rate": 7.341157167603579e-06, "loss": 0.441, "step": 2959 }, { "epoch": 2.0527045769764216, "grad_norm": 0.36908593397004924, "learning_rate": 7.33901794301599e-06, "loss": 0.4569, "step": 2960 }, { "epoch": 2.0533980582524274, "grad_norm": 0.36794370552676486, "learning_rate": 7.3368781701795365e-06, "loss": 0.4259, "step": 2961 }, { "epoch": 2.0540915395284327, "grad_norm": 0.3894273342876289, "learning_rate": 7.3347378495957655e-06, "loss": 0.3893, "step": 2962 }, { "epoch": 2.0547850208044385, "grad_norm": 0.34623911584994665, "learning_rate": 7.332596981766351e-06, "loss": 0.4332, "step": 2963 }, { "epoch": 2.055478502080444, "grad_norm": 0.34570402910906584, "learning_rate": 7.330455567193095e-06, "loss": 0.4265, "step": 2964 }, { "epoch": 2.0561719833564496, "grad_norm": 0.35371820203398097, "learning_rate": 7.328313606377936e-06, "loss": 0.4415, "step": 2965 }, { "epoch": 2.056865464632455, "grad_norm": 0.34726814199184525, "learning_rate": 7.326171099822928e-06, "loss": 0.4295, "step": 2966 }, { "epoch": 2.0575589459084607, "grad_norm": 0.32356472950830084, "learning_rate": 7.324028048030261e-06, "loss": 0.3873, "step": 2967 }, { "epoch": 2.058252427184466, "grad_norm": 0.38854895295362485, "learning_rate": 7.321884451502252e-06, "loss": 0.4454, "step": 2968 }, { "epoch": 2.0589459084604718, "grad_norm": 0.34463233013673783, "learning_rate": 7.319740310741342e-06, "loss": 0.4627, "step": 2969 }, { "epoch": 2.059639389736477, "grad_norm": 0.3522306545160089, "learning_rate": 7.3175956262501035e-06, "loss": 0.4286, "step": 2970 }, { "epoch": 2.060332871012483, "grad_norm": 0.36957615734998583, "learning_rate": 7.3154503985312366e-06, "loss": 0.4693, "step": 2971 }, { "epoch": 2.061026352288488, "grad_norm": 0.3656599956480459, "learning_rate": 7.313304628087566e-06, "loss": 0.4578, "step": 2972 }, { "epoch": 2.061719833564494, "grad_norm": 0.40913095723021503, "learning_rate": 7.311158315422041e-06, "loss": 0.4654, "step": 2973 }, { "epoch": 2.0624133148404993, "grad_norm": 0.3586285719040597, "learning_rate": 7.309011461037749e-06, "loss": 0.4297, "step": 2974 }, { "epoch": 2.063106796116505, "grad_norm": 0.5045937777824696, "learning_rate": 7.30686406543789e-06, "loss": 0.4833, "step": 2975 }, { "epoch": 2.0638002773925104, "grad_norm": 1.0804079677102074, "learning_rate": 7.304716129125803e-06, "loss": 0.4735, "step": 2976 }, { "epoch": 2.064493758668516, "grad_norm": 0.3309159134019149, "learning_rate": 7.302567652604945e-06, "loss": 0.3879, "step": 2977 }, { "epoch": 2.0651872399445215, "grad_norm": 0.37351848375722413, "learning_rate": 7.300418636378907e-06, "loss": 0.3958, "step": 2978 }, { "epoch": 2.0658807212205272, "grad_norm": 0.3927459166930335, "learning_rate": 7.2982690809514e-06, "loss": 0.396, "step": 2979 }, { "epoch": 2.0665742024965326, "grad_norm": 0.399726667879919, "learning_rate": 7.296118986826266e-06, "loss": 0.4771, "step": 2980 }, { "epoch": 2.0672676837725383, "grad_norm": 0.36708468950617, "learning_rate": 7.29396835450747e-06, "loss": 0.4551, "step": 2981 }, { "epoch": 2.0679611650485437, "grad_norm": 0.3902038836383935, "learning_rate": 7.291817184499107e-06, "loss": 0.4051, "step": 2982 }, { "epoch": 2.0686546463245494, "grad_norm": 0.3954656237623094, "learning_rate": 7.289665477305393e-06, "loss": 0.4873, "step": 2983 }, { "epoch": 2.0693481276005548, "grad_norm": 0.39177952831587315, "learning_rate": 7.287513233430674e-06, "loss": 0.4209, "step": 2984 }, { "epoch": 2.0700416088765605, "grad_norm": 0.37088711369824945, "learning_rate": 7.285360453379418e-06, "loss": 0.4154, "step": 2985 }, { "epoch": 2.070735090152566, "grad_norm": 0.36442790991643875, "learning_rate": 7.283207137656226e-06, "loss": 0.4753, "step": 2986 }, { "epoch": 2.0714285714285716, "grad_norm": 0.41817756911810805, "learning_rate": 7.281053286765816e-06, "loss": 0.4955, "step": 2987 }, { "epoch": 2.072122052704577, "grad_norm": 0.3751629121032016, "learning_rate": 7.278898901213035e-06, "loss": 0.4896, "step": 2988 }, { "epoch": 2.0728155339805827, "grad_norm": 0.3294482776197221, "learning_rate": 7.276743981502856e-06, "loss": 0.3993, "step": 2989 }, { "epoch": 2.073509015256588, "grad_norm": 0.3636883644535339, "learning_rate": 7.274588528140378e-06, "loss": 0.4578, "step": 2990 }, { "epoch": 2.074202496532594, "grad_norm": 0.38090055533168865, "learning_rate": 7.27243254163082e-06, "loss": 0.4367, "step": 2991 }, { "epoch": 2.074895977808599, "grad_norm": 0.3376329026686144, "learning_rate": 7.270276022479534e-06, "loss": 0.4756, "step": 2992 }, { "epoch": 2.075589459084605, "grad_norm": 0.3642370726837966, "learning_rate": 7.2681189711919896e-06, "loss": 0.4669, "step": 2993 }, { "epoch": 2.0762829403606102, "grad_norm": 0.4183134048378031, "learning_rate": 7.265961388273785e-06, "loss": 0.4387, "step": 2994 }, { "epoch": 2.076976421636616, "grad_norm": 0.33477538317487254, "learning_rate": 7.263803274230643e-06, "loss": 0.3811, "step": 2995 }, { "epoch": 2.0776699029126213, "grad_norm": 0.35281962859729005, "learning_rate": 7.2616446295684075e-06, "loss": 0.4388, "step": 2996 }, { "epoch": 2.078363384188627, "grad_norm": 0.33350983930832584, "learning_rate": 7.25948545479305e-06, "loss": 0.4044, "step": 2997 }, { "epoch": 2.0790568654646324, "grad_norm": 0.42407451571664223, "learning_rate": 7.2573257504106665e-06, "loss": 0.4759, "step": 2998 }, { "epoch": 2.079750346740638, "grad_norm": 0.3510795526353381, "learning_rate": 7.255165516927476e-06, "loss": 0.4362, "step": 2999 }, { "epoch": 2.0804438280166435, "grad_norm": 0.3317839288446993, "learning_rate": 7.2530047548498205e-06, "loss": 0.4157, "step": 3000 }, { "epoch": 2.0811373092926493, "grad_norm": 0.3629897746754837, "learning_rate": 7.2508434646841665e-06, "loss": 0.4377, "step": 3001 }, { "epoch": 2.0818307905686546, "grad_norm": 0.34710015516895404, "learning_rate": 7.248681646937106e-06, "loss": 0.4182, "step": 3002 }, { "epoch": 2.0825242718446604, "grad_norm": 0.37606986087672556, "learning_rate": 7.246519302115355e-06, "loss": 0.4569, "step": 3003 }, { "epoch": 2.0832177531206657, "grad_norm": 0.44421841377060706, "learning_rate": 7.244356430725748e-06, "loss": 0.4541, "step": 3004 }, { "epoch": 2.0839112343966715, "grad_norm": 0.34865634699191783, "learning_rate": 7.242193033275249e-06, "loss": 0.4422, "step": 3005 }, { "epoch": 2.084604715672677, "grad_norm": 0.3865292049377577, "learning_rate": 7.24002911027094e-06, "loss": 0.4824, "step": 3006 }, { "epoch": 2.0852981969486826, "grad_norm": 0.4306864280596324, "learning_rate": 7.237864662220032e-06, "loss": 0.4513, "step": 3007 }, { "epoch": 2.085991678224688, "grad_norm": 0.35316900899834214, "learning_rate": 7.235699689629855e-06, "loss": 0.4556, "step": 3008 }, { "epoch": 2.0866851595006937, "grad_norm": 0.46068476114365564, "learning_rate": 7.2335341930078614e-06, "loss": 0.423, "step": 3009 }, { "epoch": 2.087378640776699, "grad_norm": 0.3652607083383284, "learning_rate": 7.23136817286163e-06, "loss": 0.4481, "step": 3010 }, { "epoch": 2.0880721220527048, "grad_norm": 0.34670932988684466, "learning_rate": 7.229201629698857e-06, "loss": 0.4677, "step": 3011 }, { "epoch": 2.08876560332871, "grad_norm": 0.346006830243124, "learning_rate": 7.22703456402737e-06, "loss": 0.3987, "step": 3012 }, { "epoch": 2.089459084604716, "grad_norm": 0.3699684146508989, "learning_rate": 7.224866976355108e-06, "loss": 0.3972, "step": 3013 }, { "epoch": 2.090152565880721, "grad_norm": 0.36646039093412713, "learning_rate": 7.22269886719014e-06, "loss": 0.4152, "step": 3014 }, { "epoch": 2.090846047156727, "grad_norm": 0.37413942838093034, "learning_rate": 7.220530237040655e-06, "loss": 0.4508, "step": 3015 }, { "epoch": 2.0915395284327323, "grad_norm": 0.3934824522530865, "learning_rate": 7.2183610864149655e-06, "loss": 0.4506, "step": 3016 }, { "epoch": 2.092233009708738, "grad_norm": 0.3404808850344954, "learning_rate": 7.216191415821503e-06, "loss": 0.4035, "step": 3017 }, { "epoch": 2.0929264909847434, "grad_norm": 0.3326069502831703, "learning_rate": 7.214021225768821e-06, "loss": 0.41, "step": 3018 }, { "epoch": 2.093619972260749, "grad_norm": 0.3519911319952195, "learning_rate": 7.211850516765602e-06, "loss": 0.4132, "step": 3019 }, { "epoch": 2.0943134535367545, "grad_norm": 0.327221436188564, "learning_rate": 7.209679289320638e-06, "loss": 0.4113, "step": 3020 }, { "epoch": 2.0950069348127602, "grad_norm": 0.36968738188853695, "learning_rate": 7.2075075439428535e-06, "loss": 0.438, "step": 3021 }, { "epoch": 2.0957004160887656, "grad_norm": 0.37323195061462966, "learning_rate": 7.205335281141287e-06, "loss": 0.4161, "step": 3022 }, { "epoch": 2.0963938973647713, "grad_norm": 0.3723826405101407, "learning_rate": 7.203162501425103e-06, "loss": 0.4414, "step": 3023 }, { "epoch": 2.0970873786407767, "grad_norm": 0.5976546534809862, "learning_rate": 7.200989205303583e-06, "loss": 0.4725, "step": 3024 }, { "epoch": 2.0977808599167824, "grad_norm": 0.394353759827912, "learning_rate": 7.198815393286136e-06, "loss": 0.456, "step": 3025 }, { "epoch": 2.0984743411927878, "grad_norm": 0.42285231924559064, "learning_rate": 7.196641065882285e-06, "loss": 0.4505, "step": 3026 }, { "epoch": 2.0991678224687935, "grad_norm": 0.44456228602603215, "learning_rate": 7.1944662236016774e-06, "loss": 0.4412, "step": 3027 }, { "epoch": 2.099861303744799, "grad_norm": 0.35094056985690847, "learning_rate": 7.192290866954078e-06, "loss": 0.4403, "step": 3028 }, { "epoch": 2.1005547850208046, "grad_norm": 0.3526100530140754, "learning_rate": 7.190114996449378e-06, "loss": 0.4057, "step": 3029 }, { "epoch": 2.10124826629681, "grad_norm": 0.38574775919106535, "learning_rate": 7.1879386125975836e-06, "loss": 0.4768, "step": 3030 }, { "epoch": 2.1019417475728157, "grad_norm": 0.3460237338522369, "learning_rate": 7.185761715908826e-06, "loss": 0.4319, "step": 3031 }, { "epoch": 2.102635228848821, "grad_norm": 0.4088404125103431, "learning_rate": 7.183584306893352e-06, "loss": 0.4302, "step": 3032 }, { "epoch": 2.103328710124827, "grad_norm": 0.3724657627204574, "learning_rate": 7.181406386061529e-06, "loss": 0.422, "step": 3033 }, { "epoch": 2.104022191400832, "grad_norm": 0.39440336400991, "learning_rate": 7.179227953923848e-06, "loss": 0.4325, "step": 3034 }, { "epoch": 2.104715672676838, "grad_norm": 0.4836264746543931, "learning_rate": 7.177049010990917e-06, "loss": 0.4715, "step": 3035 }, { "epoch": 2.1054091539528432, "grad_norm": 0.34367887699703425, "learning_rate": 7.174869557773467e-06, "loss": 0.4061, "step": 3036 }, { "epoch": 2.106102635228849, "grad_norm": 0.36229201910808145, "learning_rate": 7.172689594782342e-06, "loss": 0.4573, "step": 3037 }, { "epoch": 2.1067961165048543, "grad_norm": 0.36976526386395087, "learning_rate": 7.170509122528511e-06, "loss": 0.4284, "step": 3038 }, { "epoch": 2.10748959778086, "grad_norm": 0.4125742569348614, "learning_rate": 7.168328141523062e-06, "loss": 0.4333, "step": 3039 }, { "epoch": 2.1081830790568654, "grad_norm": 0.34810355488800704, "learning_rate": 7.1661466522772e-06, "loss": 0.3828, "step": 3040 }, { "epoch": 2.108876560332871, "grad_norm": 0.3790962333779141, "learning_rate": 7.163964655302252e-06, "loss": 0.5177, "step": 3041 }, { "epoch": 2.1095700416088765, "grad_norm": 0.3665157730115209, "learning_rate": 7.161782151109659e-06, "loss": 0.4403, "step": 3042 }, { "epoch": 2.1102635228848823, "grad_norm": 0.39432150364010454, "learning_rate": 7.1595991402109865e-06, "loss": 0.4574, "step": 3043 }, { "epoch": 2.1109570041608876, "grad_norm": 0.40865901683254635, "learning_rate": 7.157415623117917e-06, "loss": 0.4795, "step": 3044 }, { "epoch": 2.1116504854368934, "grad_norm": 0.3764617126662991, "learning_rate": 7.15523160034225e-06, "loss": 0.426, "step": 3045 }, { "epoch": 2.1123439667128987, "grad_norm": 0.44733140346031974, "learning_rate": 7.1530470723959045e-06, "loss": 0.4029, "step": 3046 }, { "epoch": 2.1130374479889045, "grad_norm": 0.371084336094575, "learning_rate": 7.1508620397909175e-06, "loss": 0.5242, "step": 3047 }, { "epoch": 2.11373092926491, "grad_norm": 0.34374048081023245, "learning_rate": 7.148676503039448e-06, "loss": 0.42, "step": 3048 }, { "epoch": 2.1144244105409156, "grad_norm": 0.3990430602739235, "learning_rate": 7.146490462653767e-06, "loss": 0.4535, "step": 3049 }, { "epoch": 2.115117891816921, "grad_norm": 0.3780455875402749, "learning_rate": 7.144303919146265e-06, "loss": 0.479, "step": 3050 }, { "epoch": 2.1158113730929267, "grad_norm": 0.36899951610153414, "learning_rate": 7.142116873029455e-06, "loss": 0.4002, "step": 3051 }, { "epoch": 2.116504854368932, "grad_norm": 0.4093650961730342, "learning_rate": 7.139929324815965e-06, "loss": 0.4425, "step": 3052 }, { "epoch": 2.1171983356449378, "grad_norm": 0.40942250431003674, "learning_rate": 7.137741275018539e-06, "loss": 0.4427, "step": 3053 }, { "epoch": 2.117891816920943, "grad_norm": 0.3488974713103121, "learning_rate": 7.135552724150041e-06, "loss": 0.4239, "step": 3054 }, { "epoch": 2.118585298196949, "grad_norm": 0.35935995772783713, "learning_rate": 7.133363672723449e-06, "loss": 0.3947, "step": 3055 }, { "epoch": 2.119278779472954, "grad_norm": 0.35203242767318643, "learning_rate": 7.131174121251864e-06, "loss": 0.4452, "step": 3056 }, { "epoch": 2.11997226074896, "grad_norm": 0.36801362436190616, "learning_rate": 7.128984070248499e-06, "loss": 0.4358, "step": 3057 }, { "epoch": 2.1206657420249653, "grad_norm": 0.35498634891990455, "learning_rate": 7.126793520226688e-06, "loss": 0.3871, "step": 3058 }, { "epoch": 2.121359223300971, "grad_norm": 0.3449513968787113, "learning_rate": 7.124602471699878e-06, "loss": 0.3842, "step": 3059 }, { "epoch": 2.1220527045769764, "grad_norm": 0.3560759248219683, "learning_rate": 7.1224109251816355e-06, "loss": 0.3537, "step": 3060 }, { "epoch": 2.122746185852982, "grad_norm": 0.3991859818076799, "learning_rate": 7.120218881185644e-06, "loss": 0.4124, "step": 3061 }, { "epoch": 2.1234396671289875, "grad_norm": 0.3428981209932839, "learning_rate": 7.118026340225701e-06, "loss": 0.4028, "step": 3062 }, { "epoch": 2.1241331484049932, "grad_norm": 0.3527433179290824, "learning_rate": 7.115833302815724e-06, "loss": 0.4375, "step": 3063 }, { "epoch": 2.1248266296809986, "grad_norm": 0.4246884696511276, "learning_rate": 7.113639769469744e-06, "loss": 0.3882, "step": 3064 }, { "epoch": 2.1255201109570043, "grad_norm": 0.35714650509889484, "learning_rate": 7.11144574070191e-06, "loss": 0.4215, "step": 3065 }, { "epoch": 2.1262135922330097, "grad_norm": 0.34687010359136733, "learning_rate": 7.109251217026487e-06, "loss": 0.3993, "step": 3066 }, { "epoch": 2.1269070735090154, "grad_norm": 0.33213104312134606, "learning_rate": 7.1070561989578535e-06, "loss": 0.3956, "step": 3067 }, { "epoch": 2.1276005547850207, "grad_norm": 0.3612567948278647, "learning_rate": 7.104860687010507e-06, "loss": 0.4095, "step": 3068 }, { "epoch": 2.1282940360610265, "grad_norm": 0.3743454949185115, "learning_rate": 7.1026646816990596e-06, "loss": 0.4732, "step": 3069 }, { "epoch": 2.128987517337032, "grad_norm": 0.36277235569194677, "learning_rate": 7.100468183538241e-06, "loss": 0.3942, "step": 3070 }, { "epoch": 2.1296809986130376, "grad_norm": 0.42424603861477894, "learning_rate": 7.098271193042889e-06, "loss": 0.4516, "step": 3071 }, { "epoch": 2.130374479889043, "grad_norm": 0.3738954919270784, "learning_rate": 7.096073710727968e-06, "loss": 0.4001, "step": 3072 }, { "epoch": 2.1310679611650487, "grad_norm": 0.3676848335117262, "learning_rate": 7.0938757371085485e-06, "loss": 0.3992, "step": 3073 }, { "epoch": 2.131761442441054, "grad_norm": 0.3394703406308582, "learning_rate": 7.091677272699823e-06, "loss": 0.4266, "step": 3074 }, { "epoch": 2.13245492371706, "grad_norm": 0.4136822507153506, "learning_rate": 7.089478318017091e-06, "loss": 0.4704, "step": 3075 }, { "epoch": 2.133148404993065, "grad_norm": 0.36330960243837956, "learning_rate": 7.0872788735757755e-06, "loss": 0.443, "step": 3076 }, { "epoch": 2.133841886269071, "grad_norm": 0.36941866023569536, "learning_rate": 7.085078939891409e-06, "loss": 0.4165, "step": 3077 }, { "epoch": 2.1345353675450762, "grad_norm": 0.3435100398963316, "learning_rate": 7.082878517479639e-06, "loss": 0.421, "step": 3078 }, { "epoch": 2.135228848821082, "grad_norm": 0.3762597369490691, "learning_rate": 7.08067760685623e-06, "loss": 0.4293, "step": 3079 }, { "epoch": 2.1359223300970873, "grad_norm": 0.38771522549082293, "learning_rate": 7.078476208537057e-06, "loss": 0.4548, "step": 3080 }, { "epoch": 2.136615811373093, "grad_norm": 0.47538159903011057, "learning_rate": 7.076274323038117e-06, "loss": 0.3915, "step": 3081 }, { "epoch": 2.1373092926490984, "grad_norm": 0.40870763751556194, "learning_rate": 7.074071950875509e-06, "loss": 0.4298, "step": 3082 }, { "epoch": 2.138002773925104, "grad_norm": 0.43511553462940733, "learning_rate": 7.07186909256546e-06, "loss": 0.4606, "step": 3083 }, { "epoch": 2.1386962552011095, "grad_norm": 0.33652915412924644, "learning_rate": 7.069665748624299e-06, "loss": 0.4129, "step": 3084 }, { "epoch": 2.1393897364771153, "grad_norm": 0.3931416637998521, "learning_rate": 7.067461919568477e-06, "loss": 0.4228, "step": 3085 }, { "epoch": 2.1400832177531206, "grad_norm": 0.41083223367930966, "learning_rate": 7.065257605914555e-06, "loss": 0.4713, "step": 3086 }, { "epoch": 2.1407766990291264, "grad_norm": 0.8211237999837143, "learning_rate": 7.063052808179205e-06, "loss": 0.4443, "step": 3087 }, { "epoch": 2.1414701803051317, "grad_norm": 0.38815529192990306, "learning_rate": 7.0608475268792186e-06, "loss": 0.4617, "step": 3088 }, { "epoch": 2.1421636615811375, "grad_norm": 0.348726077841462, "learning_rate": 7.0586417625315e-06, "loss": 0.4333, "step": 3089 }, { "epoch": 2.142857142857143, "grad_norm": 0.3715224852636519, "learning_rate": 7.056435515653059e-06, "loss": 0.4456, "step": 3090 }, { "epoch": 2.1435506241331486, "grad_norm": 0.384158212156197, "learning_rate": 7.054228786761027e-06, "loss": 0.4033, "step": 3091 }, { "epoch": 2.144244105409154, "grad_norm": 0.374678479276744, "learning_rate": 7.0520215763726444e-06, "loss": 0.4377, "step": 3092 }, { "epoch": 2.1449375866851597, "grad_norm": 0.394246413472569, "learning_rate": 7.049813885005267e-06, "loss": 0.4191, "step": 3093 }, { "epoch": 2.145631067961165, "grad_norm": 0.4953657444002148, "learning_rate": 7.04760571317636e-06, "loss": 0.4662, "step": 3094 }, { "epoch": 2.1463245492371708, "grad_norm": 0.35962145722718386, "learning_rate": 7.0453970614035025e-06, "loss": 0.428, "step": 3095 }, { "epoch": 2.147018030513176, "grad_norm": 0.340209505876518, "learning_rate": 7.043187930204387e-06, "loss": 0.4125, "step": 3096 }, { "epoch": 2.147711511789182, "grad_norm": 0.3697826337989141, "learning_rate": 7.040978320096819e-06, "loss": 0.4005, "step": 3097 }, { "epoch": 2.148404993065187, "grad_norm": 0.35727219624826106, "learning_rate": 7.038768231598715e-06, "loss": 0.4242, "step": 3098 }, { "epoch": 2.149098474341193, "grad_norm": 0.47038992732935203, "learning_rate": 7.036557665228103e-06, "loss": 0.4336, "step": 3099 }, { "epoch": 2.1497919556171983, "grad_norm": 0.33951178416801997, "learning_rate": 7.034346621503121e-06, "loss": 0.446, "step": 3100 }, { "epoch": 2.150485436893204, "grad_norm": 0.39454754315666807, "learning_rate": 7.032135100942027e-06, "loss": 0.3949, "step": 3101 }, { "epoch": 2.1511789181692094, "grad_norm": 0.3608277708887644, "learning_rate": 7.029923104063182e-06, "loss": 0.4358, "step": 3102 }, { "epoch": 2.151872399445215, "grad_norm": 0.363761875264272, "learning_rate": 7.027710631385063e-06, "loss": 0.412, "step": 3103 }, { "epoch": 2.1525658807212205, "grad_norm": 0.3800460349549785, "learning_rate": 7.025497683426257e-06, "loss": 0.4185, "step": 3104 }, { "epoch": 2.1532593619972262, "grad_norm": 0.329733700696235, "learning_rate": 7.023284260705463e-06, "loss": 0.3696, "step": 3105 }, { "epoch": 2.1539528432732316, "grad_norm": 0.3743548356607152, "learning_rate": 7.021070363741492e-06, "loss": 0.407, "step": 3106 }, { "epoch": 2.1546463245492373, "grad_norm": 0.4253620493594505, "learning_rate": 7.018855993053266e-06, "loss": 0.4243, "step": 3107 }, { "epoch": 2.1553398058252426, "grad_norm": 0.3729463446838696, "learning_rate": 7.016641149159816e-06, "loss": 0.4135, "step": 3108 }, { "epoch": 2.1560332871012484, "grad_norm": 0.37350361298635437, "learning_rate": 7.0144258325802835e-06, "loss": 0.3981, "step": 3109 }, { "epoch": 2.1567267683772537, "grad_norm": 0.35199874919180735, "learning_rate": 7.012210043833927e-06, "loss": 0.446, "step": 3110 }, { "epoch": 2.1574202496532595, "grad_norm": 0.3669594222490185, "learning_rate": 7.00999378344011e-06, "loss": 0.4265, "step": 3111 }, { "epoch": 2.158113730929265, "grad_norm": 0.3508520756570443, "learning_rate": 7.007777051918306e-06, "loss": 0.4096, "step": 3112 }, { "epoch": 2.1588072122052706, "grad_norm": 0.35348859791564874, "learning_rate": 7.005559849788101e-06, "loss": 0.3865, "step": 3113 }, { "epoch": 2.159500693481276, "grad_norm": 0.3655679514995117, "learning_rate": 7.003342177569195e-06, "loss": 0.4527, "step": 3114 }, { "epoch": 2.1601941747572817, "grad_norm": 0.375376852563299, "learning_rate": 7.00112403578139e-06, "loss": 0.466, "step": 3115 }, { "epoch": 2.160887656033287, "grad_norm": 0.42216667649516487, "learning_rate": 6.998905424944605e-06, "loss": 0.3593, "step": 3116 }, { "epoch": 2.161581137309293, "grad_norm": 0.3662857201076887, "learning_rate": 6.996686345578863e-06, "loss": 0.4118, "step": 3117 }, { "epoch": 2.162274618585298, "grad_norm": 0.5380173495766039, "learning_rate": 6.994466798204303e-06, "loss": 0.3992, "step": 3118 }, { "epoch": 2.162968099861304, "grad_norm": 0.3447286846349849, "learning_rate": 6.992246783341171e-06, "loss": 0.3635, "step": 3119 }, { "epoch": 2.163661581137309, "grad_norm": 0.4352242220473789, "learning_rate": 6.99002630150982e-06, "loss": 0.4518, "step": 3120 }, { "epoch": 2.164355062413315, "grad_norm": 0.3729191189855568, "learning_rate": 6.987805353230719e-06, "loss": 0.4443, "step": 3121 }, { "epoch": 2.1650485436893203, "grad_norm": 0.42897560953798314, "learning_rate": 6.985583939024436e-06, "loss": 0.4223, "step": 3122 }, { "epoch": 2.165742024965326, "grad_norm": 0.3828341124882771, "learning_rate": 6.983362059411661e-06, "loss": 0.4533, "step": 3123 }, { "epoch": 2.1664355062413314, "grad_norm": 0.5212452572134227, "learning_rate": 6.9811397149131835e-06, "loss": 0.449, "step": 3124 }, { "epoch": 2.167128987517337, "grad_norm": 0.3735744987624875, "learning_rate": 6.978916906049903e-06, "loss": 0.3937, "step": 3125 }, { "epoch": 2.1678224687933425, "grad_norm": 0.4093629777835829, "learning_rate": 6.976693633342833e-06, "loss": 0.3823, "step": 3126 }, { "epoch": 2.1685159500693483, "grad_norm": 0.3645001820211574, "learning_rate": 6.97446989731309e-06, "loss": 0.3985, "step": 3127 }, { "epoch": 2.1692094313453536, "grad_norm": 0.32968239868725185, "learning_rate": 6.972245698481903e-06, "loss": 0.3925, "step": 3128 }, { "epoch": 2.1699029126213594, "grad_norm": 0.4065184879704676, "learning_rate": 6.970021037370609e-06, "loss": 0.4927, "step": 3129 }, { "epoch": 2.1705963938973647, "grad_norm": 0.3603595652767137, "learning_rate": 6.967795914500651e-06, "loss": 0.4516, "step": 3130 }, { "epoch": 2.1712898751733705, "grad_norm": 0.4198167248069969, "learning_rate": 6.965570330393582e-06, "loss": 0.4499, "step": 3131 }, { "epoch": 2.171983356449376, "grad_norm": 0.39279017404085115, "learning_rate": 6.963344285571063e-06, "loss": 0.3915, "step": 3132 }, { "epoch": 2.1726768377253816, "grad_norm": 0.3635314062003914, "learning_rate": 6.961117780554862e-06, "loss": 0.445, "step": 3133 }, { "epoch": 2.173370319001387, "grad_norm": 0.3832597576776832, "learning_rate": 6.958890815866857e-06, "loss": 0.4097, "step": 3134 }, { "epoch": 2.1740638002773927, "grad_norm": 0.36993096949813076, "learning_rate": 6.956663392029033e-06, "loss": 0.4345, "step": 3135 }, { "epoch": 2.174757281553398, "grad_norm": 0.3733518675027611, "learning_rate": 6.9544355095634775e-06, "loss": 0.396, "step": 3136 }, { "epoch": 2.1754507628294038, "grad_norm": 0.35577485861920793, "learning_rate": 6.9522071689923955e-06, "loss": 0.4446, "step": 3137 }, { "epoch": 2.176144244105409, "grad_norm": 0.345487235319404, "learning_rate": 6.9499783708380904e-06, "loss": 0.3872, "step": 3138 }, { "epoch": 2.176837725381415, "grad_norm": 0.36688561634303574, "learning_rate": 6.947749115622979e-06, "loss": 0.4859, "step": 3139 }, { "epoch": 2.17753120665742, "grad_norm": 0.35631342115427483, "learning_rate": 6.945519403869581e-06, "loss": 0.4116, "step": 3140 }, { "epoch": 2.178224687933426, "grad_norm": 0.39777269551571115, "learning_rate": 6.943289236100523e-06, "loss": 0.4601, "step": 3141 }, { "epoch": 2.1789181692094313, "grad_norm": 0.4227153104614663, "learning_rate": 6.941058612838544e-06, "loss": 0.4305, "step": 3142 }, { "epoch": 2.179611650485437, "grad_norm": 0.34143561557334956, "learning_rate": 6.938827534606484e-06, "loss": 0.3897, "step": 3143 }, { "epoch": 2.1803051317614424, "grad_norm": 0.39993084472906265, "learning_rate": 6.936596001927292e-06, "loss": 0.406, "step": 3144 }, { "epoch": 2.180998613037448, "grad_norm": 0.35637437963580954, "learning_rate": 6.93436401532402e-06, "loss": 0.375, "step": 3145 }, { "epoch": 2.1816920943134535, "grad_norm": 0.3766557843328465, "learning_rate": 6.932131575319834e-06, "loss": 0.3975, "step": 3146 }, { "epoch": 2.1823855755894592, "grad_norm": 0.42950999063047757, "learning_rate": 6.929898682437999e-06, "loss": 0.419, "step": 3147 }, { "epoch": 2.1830790568654646, "grad_norm": 0.49033590261343324, "learning_rate": 6.927665337201891e-06, "loss": 0.4568, "step": 3148 }, { "epoch": 2.1837725381414703, "grad_norm": 0.38923798848248387, "learning_rate": 6.925431540134988e-06, "loss": 0.4744, "step": 3149 }, { "epoch": 2.1844660194174756, "grad_norm": 0.37569690836507236, "learning_rate": 6.923197291760876e-06, "loss": 0.3993, "step": 3150 }, { "epoch": 2.1851595006934814, "grad_norm": 0.3872062746291295, "learning_rate": 6.9209625926032485e-06, "loss": 0.4952, "step": 3151 }, { "epoch": 2.1858529819694867, "grad_norm": 0.3755709994284284, "learning_rate": 6.918727443185902e-06, "loss": 0.438, "step": 3152 }, { "epoch": 2.1865464632454925, "grad_norm": 0.37274394488620805, "learning_rate": 6.916491844032736e-06, "loss": 0.4165, "step": 3153 }, { "epoch": 2.187239944521498, "grad_norm": 0.3671859855245455, "learning_rate": 6.914255795667763e-06, "loss": 0.467, "step": 3154 }, { "epoch": 2.1879334257975036, "grad_norm": 0.34563104164031566, "learning_rate": 6.912019298615097e-06, "loss": 0.4284, "step": 3155 }, { "epoch": 2.188626907073509, "grad_norm": 0.36065990000831494, "learning_rate": 6.909782353398955e-06, "loss": 0.4364, "step": 3156 }, { "epoch": 2.1893203883495147, "grad_norm": 0.42302861767406164, "learning_rate": 6.907544960543659e-06, "loss": 0.4005, "step": 3157 }, { "epoch": 2.19001386962552, "grad_norm": 0.4016607283196581, "learning_rate": 6.905307120573639e-06, "loss": 0.3988, "step": 3158 }, { "epoch": 2.190707350901526, "grad_norm": 0.39093334562382, "learning_rate": 6.903068834013429e-06, "loss": 0.4261, "step": 3159 }, { "epoch": 2.191400832177531, "grad_norm": 0.36793766843058306, "learning_rate": 6.900830101387667e-06, "loss": 0.4016, "step": 3160 }, { "epoch": 2.192094313453537, "grad_norm": 0.3243424542807762, "learning_rate": 6.8985909232210965e-06, "loss": 0.3572, "step": 3161 }, { "epoch": 2.192787794729542, "grad_norm": 0.3775776131699167, "learning_rate": 6.896351300038564e-06, "loss": 0.4612, "step": 3162 }, { "epoch": 2.193481276005548, "grad_norm": 0.3818596641017086, "learning_rate": 6.89411123236502e-06, "loss": 0.4939, "step": 3163 }, { "epoch": 2.1941747572815533, "grad_norm": 0.37812123779394696, "learning_rate": 6.891870720725522e-06, "loss": 0.414, "step": 3164 }, { "epoch": 2.194868238557559, "grad_norm": 0.3835042224657585, "learning_rate": 6.8896297656452286e-06, "loss": 0.4332, "step": 3165 }, { "epoch": 2.1955617198335644, "grad_norm": 0.34305892206165406, "learning_rate": 6.887388367649402e-06, "loss": 0.3728, "step": 3166 }, { "epoch": 2.19625520110957, "grad_norm": 0.3799511406233889, "learning_rate": 6.885146527263411e-06, "loss": 0.4177, "step": 3167 }, { "epoch": 2.1969486823855755, "grad_norm": 0.36465839462895966, "learning_rate": 6.882904245012728e-06, "loss": 0.3953, "step": 3168 }, { "epoch": 2.1976421636615813, "grad_norm": 0.36502262785244294, "learning_rate": 6.8806615214229275e-06, "loss": 0.3942, "step": 3169 }, { "epoch": 2.1983356449375866, "grad_norm": 0.44813633262880787, "learning_rate": 6.878418357019685e-06, "loss": 0.4682, "step": 3170 }, { "epoch": 2.1990291262135924, "grad_norm": 1.1150107948643713, "learning_rate": 6.8761747523287845e-06, "loss": 0.4878, "step": 3171 }, { "epoch": 2.1997226074895977, "grad_norm": 0.37817388216565617, "learning_rate": 6.87393070787611e-06, "loss": 0.4528, "step": 3172 }, { "epoch": 2.2004160887656035, "grad_norm": 0.3481644514461239, "learning_rate": 6.871686224187649e-06, "loss": 0.3869, "step": 3173 }, { "epoch": 2.201109570041609, "grad_norm": 0.6472600110428178, "learning_rate": 6.869441301789492e-06, "loss": 0.4672, "step": 3174 }, { "epoch": 2.2018030513176146, "grad_norm": 0.396462841976969, "learning_rate": 6.867195941207834e-06, "loss": 0.5001, "step": 3175 }, { "epoch": 2.20249653259362, "grad_norm": 0.33504670662848984, "learning_rate": 6.864950142968969e-06, "loss": 0.4202, "step": 3176 }, { "epoch": 2.2031900138696257, "grad_norm": 0.48692526877310366, "learning_rate": 6.862703907599298e-06, "loss": 0.4479, "step": 3177 }, { "epoch": 2.203883495145631, "grad_norm": 0.36751944278978205, "learning_rate": 6.860457235625322e-06, "loss": 0.4227, "step": 3178 }, { "epoch": 2.2045769764216367, "grad_norm": 0.4051893471548496, "learning_rate": 6.8582101275736436e-06, "loss": 0.4523, "step": 3179 }, { "epoch": 2.205270457697642, "grad_norm": 0.3556882051436896, "learning_rate": 6.855962583970969e-06, "loss": 0.4396, "step": 3180 }, { "epoch": 2.205963938973648, "grad_norm": 0.37866140201299847, "learning_rate": 6.853714605344105e-06, "loss": 0.4349, "step": 3181 }, { "epoch": 2.206657420249653, "grad_norm": 0.3734076208223684, "learning_rate": 6.851466192219963e-06, "loss": 0.382, "step": 3182 }, { "epoch": 2.207350901525659, "grad_norm": 0.3435981580517485, "learning_rate": 6.849217345125556e-06, "loss": 0.4263, "step": 3183 }, { "epoch": 2.2080443828016643, "grad_norm": 0.43374269513443797, "learning_rate": 6.846968064587995e-06, "loss": 0.4193, "step": 3184 }, { "epoch": 2.20873786407767, "grad_norm": 0.42876485085711014, "learning_rate": 6.844718351134496e-06, "loss": 0.4792, "step": 3185 }, { "epoch": 2.2094313453536754, "grad_norm": 0.3567072347550561, "learning_rate": 6.842468205292375e-06, "loss": 0.4143, "step": 3186 }, { "epoch": 2.210124826629681, "grad_norm": 0.37299923097904636, "learning_rate": 6.840217627589052e-06, "loss": 0.4521, "step": 3187 }, { "epoch": 2.2108183079056865, "grad_norm": 0.34832336457669794, "learning_rate": 6.837966618552045e-06, "loss": 0.4336, "step": 3188 }, { "epoch": 2.2115117891816922, "grad_norm": 0.3679922938183328, "learning_rate": 6.835715178708973e-06, "loss": 0.4121, "step": 3189 }, { "epoch": 2.2122052704576975, "grad_norm": 0.3867529363489136, "learning_rate": 6.8334633085875564e-06, "loss": 0.4892, "step": 3190 }, { "epoch": 2.2128987517337033, "grad_norm": 0.4172097099888967, "learning_rate": 6.831211008715619e-06, "loss": 0.4199, "step": 3191 }, { "epoch": 2.2135922330097086, "grad_norm": 0.3484205019547079, "learning_rate": 6.828958279621085e-06, "loss": 0.4068, "step": 3192 }, { "epoch": 2.2142857142857144, "grad_norm": 0.37711309043688035, "learning_rate": 6.8267051218319766e-06, "loss": 0.4407, "step": 3193 }, { "epoch": 2.2149791955617197, "grad_norm": 0.38901539219632253, "learning_rate": 6.824451535876415e-06, "loss": 0.4645, "step": 3194 }, { "epoch": 2.2156726768377255, "grad_norm": 0.3797025186452667, "learning_rate": 6.8221975222826276e-06, "loss": 0.4624, "step": 3195 }, { "epoch": 2.216366158113731, "grad_norm": 0.38908439812531065, "learning_rate": 6.819943081578939e-06, "loss": 0.4008, "step": 3196 }, { "epoch": 2.2170596393897366, "grad_norm": 0.33759666688484663, "learning_rate": 6.817688214293773e-06, "loss": 0.3768, "step": 3197 }, { "epoch": 2.217753120665742, "grad_norm": 0.36114655983941857, "learning_rate": 6.815432920955652e-06, "loss": 0.4129, "step": 3198 }, { "epoch": 2.2184466019417477, "grad_norm": 0.37733284468929934, "learning_rate": 6.813177202093203e-06, "loss": 0.4333, "step": 3199 }, { "epoch": 2.219140083217753, "grad_norm": 0.3751447894147838, "learning_rate": 6.81092105823515e-06, "loss": 0.4139, "step": 3200 }, { "epoch": 2.219833564493759, "grad_norm": 0.39087858551090465, "learning_rate": 6.808664489910317e-06, "loss": 0.4604, "step": 3201 }, { "epoch": 2.220527045769764, "grad_norm": 0.38706165970167433, "learning_rate": 6.806407497647625e-06, "loss": 0.4602, "step": 3202 }, { "epoch": 2.22122052704577, "grad_norm": 0.3512169622422497, "learning_rate": 6.8041500819760976e-06, "loss": 0.3964, "step": 3203 }, { "epoch": 2.221914008321775, "grad_norm": 0.3898499803767229, "learning_rate": 6.801892243424859e-06, "loss": 0.467, "step": 3204 }, { "epoch": 2.222607489597781, "grad_norm": 0.3439941460324588, "learning_rate": 6.799633982523128e-06, "loss": 0.4175, "step": 3205 }, { "epoch": 2.2233009708737863, "grad_norm": 0.38551815405469103, "learning_rate": 6.797375299800224e-06, "loss": 0.3867, "step": 3206 }, { "epoch": 2.223994452149792, "grad_norm": 0.35032279732716465, "learning_rate": 6.795116195785567e-06, "loss": 0.4235, "step": 3207 }, { "epoch": 2.2246879334257974, "grad_norm": 0.36497372750026075, "learning_rate": 6.792856671008676e-06, "loss": 0.4484, "step": 3208 }, { "epoch": 2.225381414701803, "grad_norm": 0.3480387233277934, "learning_rate": 6.790596725999166e-06, "loss": 0.4292, "step": 3209 }, { "epoch": 2.2260748959778085, "grad_norm": 0.33245022179722605, "learning_rate": 6.788336361286751e-06, "loss": 0.4586, "step": 3210 }, { "epoch": 2.2267683772538143, "grad_norm": 0.3679765680882823, "learning_rate": 6.786075577401243e-06, "loss": 0.4184, "step": 3211 }, { "epoch": 2.2274618585298196, "grad_norm": 0.3829055470881608, "learning_rate": 6.7838143748725574e-06, "loss": 0.4545, "step": 3212 }, { "epoch": 2.2281553398058254, "grad_norm": 0.3952697984732193, "learning_rate": 6.7815527542307e-06, "loss": 0.4426, "step": 3213 }, { "epoch": 2.2288488210818307, "grad_norm": 0.365894439012572, "learning_rate": 6.7792907160057796e-06, "loss": 0.49, "step": 3214 }, { "epoch": 2.2295423023578365, "grad_norm": 0.39561522589371145, "learning_rate": 6.777028260728002e-06, "loss": 0.4203, "step": 3215 }, { "epoch": 2.230235783633842, "grad_norm": 0.4230834702410087, "learning_rate": 6.774765388927669e-06, "loss": 0.4901, "step": 3216 }, { "epoch": 2.2309292649098476, "grad_norm": 0.3983835450531227, "learning_rate": 6.772502101135183e-06, "loss": 0.4546, "step": 3217 }, { "epoch": 2.231622746185853, "grad_norm": 0.39458703276296103, "learning_rate": 6.7702383978810424e-06, "loss": 0.4164, "step": 3218 }, { "epoch": 2.2323162274618586, "grad_norm": 0.3532660353792984, "learning_rate": 6.767974279695842e-06, "loss": 0.4211, "step": 3219 }, { "epoch": 2.233009708737864, "grad_norm": 0.35081341241152886, "learning_rate": 6.765709747110274e-06, "loss": 0.3935, "step": 3220 }, { "epoch": 2.2337031900138697, "grad_norm": 0.36711314575138754, "learning_rate": 6.763444800655128e-06, "loss": 0.4287, "step": 3221 }, { "epoch": 2.234396671289875, "grad_norm": 0.3893473906612097, "learning_rate": 6.761179440861294e-06, "loss": 0.4565, "step": 3222 }, { "epoch": 2.235090152565881, "grad_norm": 0.33315561721493103, "learning_rate": 6.758913668259753e-06, "loss": 0.4101, "step": 3223 }, { "epoch": 2.235783633841886, "grad_norm": 0.3735604377255463, "learning_rate": 6.756647483381588e-06, "loss": 0.4334, "step": 3224 }, { "epoch": 2.236477115117892, "grad_norm": 0.35767189078117967, "learning_rate": 6.754380886757973e-06, "loss": 0.4336, "step": 3225 }, { "epoch": 2.2371705963938973, "grad_norm": 0.42486391288728126, "learning_rate": 6.752113878920186e-06, "loss": 0.3996, "step": 3226 }, { "epoch": 2.237864077669903, "grad_norm": 0.3689275305319877, "learning_rate": 6.749846460399594e-06, "loss": 0.3904, "step": 3227 }, { "epoch": 2.2385575589459084, "grad_norm": 0.45038343291428184, "learning_rate": 6.747578631727666e-06, "loss": 0.3958, "step": 3228 }, { "epoch": 2.239251040221914, "grad_norm": 0.3553606619890095, "learning_rate": 6.745310393435962e-06, "loss": 0.4125, "step": 3229 }, { "epoch": 2.2399445214979194, "grad_norm": 0.4021476653867645, "learning_rate": 6.743041746056142e-06, "loss": 0.4599, "step": 3230 }, { "epoch": 2.240638002773925, "grad_norm": 0.3509861883633808, "learning_rate": 6.740772690119961e-06, "loss": 0.4337, "step": 3231 }, { "epoch": 2.2413314840499305, "grad_norm": 0.35051440733846, "learning_rate": 6.738503226159269e-06, "loss": 0.4098, "step": 3232 }, { "epoch": 2.2420249653259363, "grad_norm": 0.3889774976379475, "learning_rate": 6.736233354706011e-06, "loss": 0.4487, "step": 3233 }, { "epoch": 2.2427184466019416, "grad_norm": 0.3810766233493372, "learning_rate": 6.7339630762922295e-06, "loss": 0.4673, "step": 3234 }, { "epoch": 2.2434119278779474, "grad_norm": 0.34061956942943267, "learning_rate": 6.731692391450061e-06, "loss": 0.4228, "step": 3235 }, { "epoch": 2.2441054091539527, "grad_norm": 0.34316598103004414, "learning_rate": 6.729421300711736e-06, "loss": 0.3806, "step": 3236 }, { "epoch": 2.2447988904299585, "grad_norm": 0.43164398669665005, "learning_rate": 6.727149804609585e-06, "loss": 0.4451, "step": 3237 }, { "epoch": 2.245492371705964, "grad_norm": 0.35099462456805025, "learning_rate": 6.724877903676028e-06, "loss": 0.4492, "step": 3238 }, { "epoch": 2.2461858529819696, "grad_norm": 0.3996144361019322, "learning_rate": 6.722605598443581e-06, "loss": 0.4268, "step": 3239 }, { "epoch": 2.246879334257975, "grad_norm": 0.38121833561607527, "learning_rate": 6.720332889444858e-06, "loss": 0.4651, "step": 3240 }, { "epoch": 2.2475728155339807, "grad_norm": 0.33121543000194664, "learning_rate": 6.7180597772125665e-06, "loss": 0.4076, "step": 3241 }, { "epoch": 2.248266296809986, "grad_norm": 0.35819296532384437, "learning_rate": 6.7157862622795044e-06, "loss": 0.4598, "step": 3242 }, { "epoch": 2.248959778085992, "grad_norm": 0.36257211594140176, "learning_rate": 6.71351234517857e-06, "loss": 0.4323, "step": 3243 }, { "epoch": 2.249653259361997, "grad_norm": 0.4290610247647165, "learning_rate": 6.71123802644275e-06, "loss": 0.3887, "step": 3244 }, { "epoch": 2.250346740638003, "grad_norm": 0.34849749579416356, "learning_rate": 6.7089633066051315e-06, "loss": 0.4401, "step": 3245 }, { "epoch": 2.251040221914008, "grad_norm": 0.3576875889894574, "learning_rate": 6.706688186198891e-06, "loss": 0.4177, "step": 3246 }, { "epoch": 2.251733703190014, "grad_norm": 0.3553490004661506, "learning_rate": 6.7044126657572985e-06, "loss": 0.4276, "step": 3247 }, { "epoch": 2.2524271844660193, "grad_norm": 0.3371123295813205, "learning_rate": 6.702136745813721e-06, "loss": 0.3941, "step": 3248 }, { "epoch": 2.253120665742025, "grad_norm": 0.33522352253075743, "learning_rate": 6.69986042690162e-06, "loss": 0.4505, "step": 3249 }, { "epoch": 2.2538141470180304, "grad_norm": 0.3566783443007843, "learning_rate": 6.697583709554545e-06, "loss": 0.4071, "step": 3250 }, { "epoch": 2.254507628294036, "grad_norm": 0.34829843827033774, "learning_rate": 6.695306594306142e-06, "loss": 0.3952, "step": 3251 }, { "epoch": 2.2552011095700415, "grad_norm": 0.3502322200532421, "learning_rate": 6.6930290816901515e-06, "loss": 0.4552, "step": 3252 }, { "epoch": 2.2558945908460473, "grad_norm": 0.37785150765385184, "learning_rate": 6.6907511722404065e-06, "loss": 0.4182, "step": 3253 }, { "epoch": 2.2565880721220526, "grad_norm": 0.35506041130851373, "learning_rate": 6.688472866490832e-06, "loss": 0.4045, "step": 3254 }, { "epoch": 2.2572815533980584, "grad_norm": 0.3930893511399995, "learning_rate": 6.686194164975446e-06, "loss": 0.4436, "step": 3255 }, { "epoch": 2.2579750346740637, "grad_norm": 0.4037885650269651, "learning_rate": 6.683915068228357e-06, "loss": 0.4388, "step": 3256 }, { "epoch": 2.2586685159500695, "grad_norm": 0.6676777687031498, "learning_rate": 6.681635576783774e-06, "loss": 0.4533, "step": 3257 }, { "epoch": 2.259361997226075, "grad_norm": 0.3591696255670284, "learning_rate": 6.679355691175991e-06, "loss": 0.4628, "step": 3258 }, { "epoch": 2.2600554785020806, "grad_norm": 0.3681087949489095, "learning_rate": 6.677075411939396e-06, "loss": 0.4764, "step": 3259 }, { "epoch": 2.260748959778086, "grad_norm": 0.35689530521389246, "learning_rate": 6.67479473960847e-06, "loss": 0.4254, "step": 3260 }, { "epoch": 2.2614424410540916, "grad_norm": 0.3710448540773542, "learning_rate": 6.672513674717785e-06, "loss": 0.4276, "step": 3261 }, { "epoch": 2.262135922330097, "grad_norm": 0.39646594317541917, "learning_rate": 6.670232217802011e-06, "loss": 0.4152, "step": 3262 }, { "epoch": 2.2628294036061027, "grad_norm": 0.3429166393044668, "learning_rate": 6.6679503693959e-06, "loss": 0.3593, "step": 3263 }, { "epoch": 2.263522884882108, "grad_norm": 0.3544717212123084, "learning_rate": 6.665668130034302e-06, "loss": 0.456, "step": 3264 }, { "epoch": 2.264216366158114, "grad_norm": 0.3574817978636266, "learning_rate": 6.663385500252157e-06, "loss": 0.4192, "step": 3265 }, { "epoch": 2.264909847434119, "grad_norm": 0.34640457132811586, "learning_rate": 6.661102480584498e-06, "loss": 0.4146, "step": 3266 }, { "epoch": 2.265603328710125, "grad_norm": 0.3728471078648733, "learning_rate": 6.658819071566449e-06, "loss": 0.4664, "step": 3267 }, { "epoch": 2.2662968099861303, "grad_norm": 0.39774466842547634, "learning_rate": 6.656535273733222e-06, "loss": 0.4722, "step": 3268 }, { "epoch": 2.266990291262136, "grad_norm": 0.3808029015924205, "learning_rate": 6.654251087620125e-06, "loss": 0.4513, "step": 3269 }, { "epoch": 2.2676837725381414, "grad_norm": 0.37347801231352173, "learning_rate": 6.651966513762552e-06, "loss": 0.4798, "step": 3270 }, { "epoch": 2.268377253814147, "grad_norm": 0.3872374945364614, "learning_rate": 6.649681552695994e-06, "loss": 0.4152, "step": 3271 }, { "epoch": 2.2690707350901524, "grad_norm": 0.36121485886596594, "learning_rate": 6.647396204956027e-06, "loss": 0.4138, "step": 3272 }, { "epoch": 2.269764216366158, "grad_norm": 0.3398182085904718, "learning_rate": 6.6451104710783206e-06, "loss": 0.4279, "step": 3273 }, { "epoch": 2.2704576976421635, "grad_norm": 0.47824207638988286, "learning_rate": 6.6428243515986355e-06, "loss": 0.4477, "step": 3274 }, { "epoch": 2.2711511789181693, "grad_norm": 0.3685727795408177, "learning_rate": 6.640537847052818e-06, "loss": 0.4073, "step": 3275 }, { "epoch": 2.2718446601941746, "grad_norm": 0.3353690864388626, "learning_rate": 6.638250957976813e-06, "loss": 0.434, "step": 3276 }, { "epoch": 2.2725381414701804, "grad_norm": 0.3435652355299191, "learning_rate": 6.635963684906646e-06, "loss": 0.4111, "step": 3277 }, { "epoch": 2.2732316227461857, "grad_norm": 0.4221229601666925, "learning_rate": 6.6336760283784395e-06, "loss": 0.4429, "step": 3278 }, { "epoch": 2.2739251040221915, "grad_norm": 0.3448233257899806, "learning_rate": 6.631387988928404e-06, "loss": 0.4537, "step": 3279 }, { "epoch": 2.274618585298197, "grad_norm": 0.36684986111303636, "learning_rate": 6.62909956709284e-06, "loss": 0.4281, "step": 3280 }, { "epoch": 2.2753120665742026, "grad_norm": 0.36405331636940524, "learning_rate": 6.626810763408134e-06, "loss": 0.4411, "step": 3281 }, { "epoch": 2.276005547850208, "grad_norm": 0.356841560430753, "learning_rate": 6.6245215784107695e-06, "loss": 0.4239, "step": 3282 }, { "epoch": 2.2766990291262137, "grad_norm": 0.3801864988090061, "learning_rate": 6.6222320126373105e-06, "loss": 0.4606, "step": 3283 }, { "epoch": 2.277392510402219, "grad_norm": 0.364347127375704, "learning_rate": 6.619942066624417e-06, "loss": 0.4189, "step": 3284 }, { "epoch": 2.278085991678225, "grad_norm": 0.34802934040034456, "learning_rate": 6.617651740908835e-06, "loss": 0.4335, "step": 3285 }, { "epoch": 2.27877947295423, "grad_norm": 0.34152045906723943, "learning_rate": 6.6153610360274014e-06, "loss": 0.4255, "step": 3286 }, { "epoch": 2.279472954230236, "grad_norm": 0.33991863437247355, "learning_rate": 6.61306995251704e-06, "loss": 0.4192, "step": 3287 }, { "epoch": 2.280166435506241, "grad_norm": 0.36702944905564105, "learning_rate": 6.610778490914763e-06, "loss": 0.4414, "step": 3288 }, { "epoch": 2.280859916782247, "grad_norm": 0.3363866659981957, "learning_rate": 6.608486651757673e-06, "loss": 0.4132, "step": 3289 }, { "epoch": 2.2815533980582523, "grad_norm": 0.39834279294618163, "learning_rate": 6.6061944355829634e-06, "loss": 0.371, "step": 3290 }, { "epoch": 2.282246879334258, "grad_norm": 0.3590341740384732, "learning_rate": 6.603901842927909e-06, "loss": 0.4607, "step": 3291 }, { "epoch": 2.2829403606102634, "grad_norm": 0.38279593231597786, "learning_rate": 6.601608874329879e-06, "loss": 0.403, "step": 3292 }, { "epoch": 2.283633841886269, "grad_norm": 0.35681057740702554, "learning_rate": 6.599315530326328e-06, "loss": 0.4287, "step": 3293 }, { "epoch": 2.2843273231622745, "grad_norm": 0.33483056001079603, "learning_rate": 6.5970218114548e-06, "loss": 0.4258, "step": 3294 }, { "epoch": 2.2850208044382803, "grad_norm": 0.32480905748131034, "learning_rate": 6.594727718252925e-06, "loss": 0.3818, "step": 3295 }, { "epoch": 2.2857142857142856, "grad_norm": 0.36444629498912573, "learning_rate": 6.592433251258423e-06, "loss": 0.4116, "step": 3296 }, { "epoch": 2.2864077669902914, "grad_norm": 0.5245725239891985, "learning_rate": 6.590138411009099e-06, "loss": 0.4148, "step": 3297 }, { "epoch": 2.2871012482662967, "grad_norm": 0.3650205096057206, "learning_rate": 6.587843198042848e-06, "loss": 0.461, "step": 3298 }, { "epoch": 2.2877947295423025, "grad_norm": 0.3732647441193785, "learning_rate": 6.585547612897653e-06, "loss": 0.4461, "step": 3299 }, { "epoch": 2.2884882108183078, "grad_norm": 0.37869872424797363, "learning_rate": 6.583251656111579e-06, "loss": 0.4488, "step": 3300 }, { "epoch": 2.2891816920943135, "grad_norm": 0.32711844570564075, "learning_rate": 6.580955328222782e-06, "loss": 0.4044, "step": 3301 }, { "epoch": 2.289875173370319, "grad_norm": 0.471035521144447, "learning_rate": 6.578658629769507e-06, "loss": 0.4121, "step": 3302 }, { "epoch": 2.2905686546463246, "grad_norm": 0.3469179521041166, "learning_rate": 6.5763615612900834e-06, "loss": 0.4021, "step": 3303 }, { "epoch": 2.29126213592233, "grad_norm": 0.4113219336358887, "learning_rate": 6.574064123322925e-06, "loss": 0.4655, "step": 3304 }, { "epoch": 2.2919556171983357, "grad_norm": 0.36062914616033437, "learning_rate": 6.571766316406537e-06, "loss": 0.4355, "step": 3305 }, { "epoch": 2.292649098474341, "grad_norm": 0.34683938613322324, "learning_rate": 6.569468141079507e-06, "loss": 0.4104, "step": 3306 }, { "epoch": 2.293342579750347, "grad_norm": 0.345232532727373, "learning_rate": 6.567169597880512e-06, "loss": 0.3571, "step": 3307 }, { "epoch": 2.294036061026352, "grad_norm": 0.38826318949514355, "learning_rate": 6.564870687348312e-06, "loss": 0.3943, "step": 3308 }, { "epoch": 2.294729542302358, "grad_norm": 0.3717121038562584, "learning_rate": 6.562571410021758e-06, "loss": 0.4248, "step": 3309 }, { "epoch": 2.2954230235783633, "grad_norm": 0.355063374301673, "learning_rate": 6.5602717664397795e-06, "loss": 0.4876, "step": 3310 }, { "epoch": 2.296116504854369, "grad_norm": 0.3884043233165812, "learning_rate": 6.557971757141402e-06, "loss": 0.4685, "step": 3311 }, { "epoch": 2.2968099861303743, "grad_norm": 0.3513554971994437, "learning_rate": 6.555671382665727e-06, "loss": 0.4386, "step": 3312 }, { "epoch": 2.29750346740638, "grad_norm": 0.3481242369228575, "learning_rate": 6.5533706435519454e-06, "loss": 0.4281, "step": 3313 }, { "epoch": 2.2981969486823854, "grad_norm": 0.36652653214075054, "learning_rate": 6.5510695403393365e-06, "loss": 0.4661, "step": 3314 }, { "epoch": 2.298890429958391, "grad_norm": 0.34063580485073686, "learning_rate": 6.548768073567258e-06, "loss": 0.4056, "step": 3315 }, { "epoch": 2.2995839112343965, "grad_norm": 0.9173466365303112, "learning_rate": 6.5464662437751634e-06, "loss": 0.425, "step": 3316 }, { "epoch": 2.3002773925104023, "grad_norm": 0.3413504222392783, "learning_rate": 6.5441640515025795e-06, "loss": 0.4363, "step": 3317 }, { "epoch": 2.3009708737864076, "grad_norm": 0.45599209442604965, "learning_rate": 6.541861497289126e-06, "loss": 0.4587, "step": 3318 }, { "epoch": 2.3016643550624134, "grad_norm": 0.3560639727612127, "learning_rate": 6.539558581674503e-06, "loss": 0.4377, "step": 3319 }, { "epoch": 2.3023578363384187, "grad_norm": 0.3211526490582845, "learning_rate": 6.5372553051985e-06, "loss": 0.4274, "step": 3320 }, { "epoch": 2.3030513176144245, "grad_norm": 0.35060024821190416, "learning_rate": 6.534951668400986e-06, "loss": 0.4115, "step": 3321 }, { "epoch": 2.30374479889043, "grad_norm": 0.3653406803746035, "learning_rate": 6.5326476718219165e-06, "loss": 0.4351, "step": 3322 }, { "epoch": 2.3044382801664356, "grad_norm": 0.4708909428237561, "learning_rate": 6.530343316001334e-06, "loss": 0.461, "step": 3323 }, { "epoch": 2.305131761442441, "grad_norm": 0.37451279720941105, "learning_rate": 6.52803860147936e-06, "loss": 0.4568, "step": 3324 }, { "epoch": 2.3058252427184467, "grad_norm": 0.3694267698896481, "learning_rate": 6.525733528796207e-06, "loss": 0.4257, "step": 3325 }, { "epoch": 2.306518723994452, "grad_norm": 0.5161161729147663, "learning_rate": 6.523428098492163e-06, "loss": 0.4239, "step": 3326 }, { "epoch": 2.307212205270458, "grad_norm": 0.3886816521838468, "learning_rate": 6.5211223111076065e-06, "loss": 0.4327, "step": 3327 }, { "epoch": 2.307905686546463, "grad_norm": 0.35178441767206575, "learning_rate": 6.518816167182996e-06, "loss": 0.4442, "step": 3328 }, { "epoch": 2.308599167822469, "grad_norm": 0.38474584607835866, "learning_rate": 6.516509667258877e-06, "loss": 0.4708, "step": 3329 }, { "epoch": 2.309292649098474, "grad_norm": 0.3695121458647729, "learning_rate": 6.514202811875874e-06, "loss": 0.4604, "step": 3330 }, { "epoch": 2.30998613037448, "grad_norm": 1.131673848138997, "learning_rate": 6.511895601574698e-06, "loss": 0.4022, "step": 3331 }, { "epoch": 2.3106796116504853, "grad_norm": 0.3587322375640702, "learning_rate": 6.509588036896144e-06, "loss": 0.3987, "step": 3332 }, { "epoch": 2.311373092926491, "grad_norm": 0.3353590598403885, "learning_rate": 6.507280118381085e-06, "loss": 0.4454, "step": 3333 }, { "epoch": 2.3120665742024964, "grad_norm": 0.3714283959907697, "learning_rate": 6.504971846570484e-06, "loss": 0.4124, "step": 3334 }, { "epoch": 2.312760055478502, "grad_norm": 0.3812094155750211, "learning_rate": 6.502663222005382e-06, "loss": 0.4236, "step": 3335 }, { "epoch": 2.3134535367545075, "grad_norm": 0.3452165630974439, "learning_rate": 6.500354245226903e-06, "loss": 0.431, "step": 3336 }, { "epoch": 2.3141470180305133, "grad_norm": 0.3369957844501315, "learning_rate": 6.498044916776255e-06, "loss": 0.3756, "step": 3337 }, { "epoch": 2.3148404993065186, "grad_norm": 0.37378829273373, "learning_rate": 6.495735237194727e-06, "loss": 0.4331, "step": 3338 }, { "epoch": 2.3155339805825244, "grad_norm": 0.35482066447873756, "learning_rate": 6.493425207023693e-06, "loss": 0.3837, "step": 3339 }, { "epoch": 2.3162274618585297, "grad_norm": 0.33713355082890706, "learning_rate": 6.491114826804607e-06, "loss": 0.3712, "step": 3340 }, { "epoch": 2.3169209431345354, "grad_norm": 0.36309135163251544, "learning_rate": 6.488804097079005e-06, "loss": 0.4523, "step": 3341 }, { "epoch": 2.3176144244105408, "grad_norm": 0.3634776405367075, "learning_rate": 6.486493018388502e-06, "loss": 0.3645, "step": 3342 }, { "epoch": 2.3183079056865465, "grad_norm": 0.37242495564367256, "learning_rate": 6.484181591274804e-06, "loss": 0.4409, "step": 3343 }, { "epoch": 2.319001386962552, "grad_norm": 0.35463085393063387, "learning_rate": 6.481869816279689e-06, "loss": 0.3976, "step": 3344 }, { "epoch": 2.3196948682385576, "grad_norm": 0.3624813574473872, "learning_rate": 6.479557693945022e-06, "loss": 0.4743, "step": 3345 }, { "epoch": 2.320388349514563, "grad_norm": 0.348819367646347, "learning_rate": 6.477245224812746e-06, "loss": 0.3851, "step": 3346 }, { "epoch": 2.3210818307905687, "grad_norm": 0.34648568791108286, "learning_rate": 6.474932409424888e-06, "loss": 0.409, "step": 3347 }, { "epoch": 2.321775312066574, "grad_norm": 0.3442746098904869, "learning_rate": 6.4726192483235564e-06, "loss": 0.4248, "step": 3348 }, { "epoch": 2.32246879334258, "grad_norm": 0.3397393802058347, "learning_rate": 6.470305742050938e-06, "loss": 0.4438, "step": 3349 }, { "epoch": 2.323162274618585, "grad_norm": 0.369794195615468, "learning_rate": 6.4679918911493015e-06, "loss": 0.3795, "step": 3350 }, { "epoch": 2.323855755894591, "grad_norm": 0.3521241325240761, "learning_rate": 6.465677696160997e-06, "loss": 0.404, "step": 3351 }, { "epoch": 2.3245492371705962, "grad_norm": 0.37593824832172124, "learning_rate": 6.463363157628456e-06, "loss": 0.389, "step": 3352 }, { "epoch": 2.325242718446602, "grad_norm": 0.35446973660877806, "learning_rate": 6.46104827609419e-06, "loss": 0.4102, "step": 3353 }, { "epoch": 2.3259361997226073, "grad_norm": 0.3778037088411012, "learning_rate": 6.458733052100787e-06, "loss": 0.3919, "step": 3354 }, { "epoch": 2.326629680998613, "grad_norm": 0.36428989535407785, "learning_rate": 6.456417486190923e-06, "loss": 0.4491, "step": 3355 }, { "epoch": 2.3273231622746184, "grad_norm": 0.36507726540493185, "learning_rate": 6.454101578907348e-06, "loss": 0.4083, "step": 3356 }, { "epoch": 2.328016643550624, "grad_norm": 0.36794054531079673, "learning_rate": 6.451785330792894e-06, "loss": 0.4054, "step": 3357 }, { "epoch": 2.3287101248266295, "grad_norm": 0.397215446334308, "learning_rate": 6.449468742390472e-06, "loss": 0.4266, "step": 3358 }, { "epoch": 2.3294036061026353, "grad_norm": 0.3592690505460678, "learning_rate": 6.447151814243075e-06, "loss": 0.445, "step": 3359 }, { "epoch": 2.3300970873786406, "grad_norm": 0.434944116526258, "learning_rate": 6.444834546893773e-06, "loss": 0.4136, "step": 3360 }, { "epoch": 2.3307905686546464, "grad_norm": 0.34509160611793993, "learning_rate": 6.442516940885718e-06, "loss": 0.4011, "step": 3361 }, { "epoch": 2.3314840499306517, "grad_norm": 0.41579465154441725, "learning_rate": 6.440198996762139e-06, "loss": 0.4686, "step": 3362 }, { "epoch": 2.3321775312066575, "grad_norm": 0.36455116066509813, "learning_rate": 6.437880715066346e-06, "loss": 0.3866, "step": 3363 }, { "epoch": 2.332871012482663, "grad_norm": 0.3452314364510656, "learning_rate": 6.435562096341726e-06, "loss": 0.4407, "step": 3364 }, { "epoch": 2.3335644937586686, "grad_norm": 0.3638852141104239, "learning_rate": 6.433243141131748e-06, "loss": 0.3632, "step": 3365 }, { "epoch": 2.334257975034674, "grad_norm": 0.33891432758834644, "learning_rate": 6.430923849979958e-06, "loss": 0.3857, "step": 3366 }, { "epoch": 2.3349514563106797, "grad_norm": 0.3793851285938589, "learning_rate": 6.42860422342998e-06, "loss": 0.4592, "step": 3367 }, { "epoch": 2.335644937586685, "grad_norm": 0.3513547162756251, "learning_rate": 6.426284262025519e-06, "loss": 0.4224, "step": 3368 }, { "epoch": 2.336338418862691, "grad_norm": 0.35658306501930737, "learning_rate": 6.423963966310356e-06, "loss": 0.4145, "step": 3369 }, { "epoch": 2.337031900138696, "grad_norm": 0.3433575800648795, "learning_rate": 6.4216433368283535e-06, "loss": 0.4445, "step": 3370 }, { "epoch": 2.337725381414702, "grad_norm": 0.3446282815830537, "learning_rate": 6.419322374123448e-06, "loss": 0.4129, "step": 3371 }, { "epoch": 2.338418862690707, "grad_norm": 0.3896608090763199, "learning_rate": 6.4170010787396576e-06, "loss": 0.4529, "step": 3372 }, { "epoch": 2.339112343966713, "grad_norm": 0.40974544389265305, "learning_rate": 6.4146794512210755e-06, "loss": 0.4485, "step": 3373 }, { "epoch": 2.3398058252427183, "grad_norm": 0.37262273430385956, "learning_rate": 6.412357492111877e-06, "loss": 0.4875, "step": 3374 }, { "epoch": 2.340499306518724, "grad_norm": 0.42558664420672976, "learning_rate": 6.410035201956311e-06, "loss": 0.4614, "step": 3375 }, { "epoch": 2.3411927877947294, "grad_norm": 0.35238464681827575, "learning_rate": 6.407712581298705e-06, "loss": 0.4675, "step": 3376 }, { "epoch": 2.341886269070735, "grad_norm": 0.3369379914035939, "learning_rate": 6.405389630683465e-06, "loss": 0.4384, "step": 3377 }, { "epoch": 2.3425797503467405, "grad_norm": 0.477112803773968, "learning_rate": 6.403066350655074e-06, "loss": 0.4147, "step": 3378 }, { "epoch": 2.3432732316227463, "grad_norm": 0.36080047173989804, "learning_rate": 6.400742741758092e-06, "loss": 0.3987, "step": 3379 }, { "epoch": 2.3439667128987516, "grad_norm": 0.3736865943997831, "learning_rate": 6.3984188045371566e-06, "loss": 0.4063, "step": 3380 }, { "epoch": 2.3446601941747574, "grad_norm": 0.39806065460955636, "learning_rate": 6.396094539536981e-06, "loss": 0.4049, "step": 3381 }, { "epoch": 2.3453536754507627, "grad_norm": 0.3562460578072664, "learning_rate": 6.393769947302355e-06, "loss": 0.4208, "step": 3382 }, { "epoch": 2.3460471567267684, "grad_norm": 0.408192722083846, "learning_rate": 6.391445028378149e-06, "loss": 0.4524, "step": 3383 }, { "epoch": 2.3467406380027738, "grad_norm": 0.32533670575757584, "learning_rate": 6.389119783309306e-06, "loss": 0.4205, "step": 3384 }, { "epoch": 2.3474341192787795, "grad_norm": 0.37216837987975354, "learning_rate": 6.386794212640846e-06, "loss": 0.4087, "step": 3385 }, { "epoch": 2.348127600554785, "grad_norm": 0.37150027196366225, "learning_rate": 6.384468316917865e-06, "loss": 0.4137, "step": 3386 }, { "epoch": 2.3488210818307906, "grad_norm": 0.38447704576737873, "learning_rate": 6.382142096685538e-06, "loss": 0.3996, "step": 3387 }, { "epoch": 2.349514563106796, "grad_norm": 0.40740775299773047, "learning_rate": 6.379815552489112e-06, "loss": 0.4536, "step": 3388 }, { "epoch": 2.3502080443828017, "grad_norm": 0.36822169913786423, "learning_rate": 6.377488684873917e-06, "loss": 0.4281, "step": 3389 }, { "epoch": 2.350901525658807, "grad_norm": 0.40016362939793576, "learning_rate": 6.375161494385349e-06, "loss": 0.4337, "step": 3390 }, { "epoch": 2.351595006934813, "grad_norm": 0.33381928007705064, "learning_rate": 6.372833981568885e-06, "loss": 0.4451, "step": 3391 }, { "epoch": 2.352288488210818, "grad_norm": 0.3588959932374742, "learning_rate": 6.370506146970078e-06, "loss": 0.4451, "step": 3392 }, { "epoch": 2.352981969486824, "grad_norm": 0.37989940100138275, "learning_rate": 6.368177991134558e-06, "loss": 0.468, "step": 3393 }, { "epoch": 2.3536754507628292, "grad_norm": 0.3410133129775475, "learning_rate": 6.365849514608025e-06, "loss": 0.4192, "step": 3394 }, { "epoch": 2.354368932038835, "grad_norm": 0.44469936051079273, "learning_rate": 6.363520717936256e-06, "loss": 0.4518, "step": 3395 }, { "epoch": 2.3550624133148403, "grad_norm": 0.3635685167424104, "learning_rate": 6.361191601665107e-06, "loss": 0.4033, "step": 3396 }, { "epoch": 2.355755894590846, "grad_norm": 0.36245690943424497, "learning_rate": 6.358862166340505e-06, "loss": 0.4223, "step": 3397 }, { "epoch": 2.3564493758668514, "grad_norm": 0.37394168257935906, "learning_rate": 6.356532412508453e-06, "loss": 0.4353, "step": 3398 }, { "epoch": 2.357142857142857, "grad_norm": 0.3710994035820661, "learning_rate": 6.354202340715027e-06, "loss": 0.4457, "step": 3399 }, { "epoch": 2.3578363384188625, "grad_norm": 0.38630683590497084, "learning_rate": 6.351871951506379e-06, "loss": 0.4431, "step": 3400 }, { "epoch": 2.3585298196948683, "grad_norm": 0.3568683052878388, "learning_rate": 6.349541245428737e-06, "loss": 0.4461, "step": 3401 }, { "epoch": 2.3592233009708736, "grad_norm": 0.38771598700185156, "learning_rate": 6.347210223028403e-06, "loss": 0.3685, "step": 3402 }, { "epoch": 2.3599167822468794, "grad_norm": 0.3544011539020275, "learning_rate": 6.344878884851748e-06, "loss": 0.4533, "step": 3403 }, { "epoch": 2.3606102635228847, "grad_norm": 0.3670236812365391, "learning_rate": 6.342547231445222e-06, "loss": 0.4402, "step": 3404 }, { "epoch": 2.3613037447988905, "grad_norm": 0.36476568118874775, "learning_rate": 6.340215263355348e-06, "loss": 0.4228, "step": 3405 }, { "epoch": 2.361997226074896, "grad_norm": 0.4717730456339519, "learning_rate": 6.337882981128724e-06, "loss": 0.3999, "step": 3406 }, { "epoch": 2.3626907073509016, "grad_norm": 0.35221953625909663, "learning_rate": 6.335550385312018e-06, "loss": 0.3849, "step": 3407 }, { "epoch": 2.363384188626907, "grad_norm": 0.3860207819764881, "learning_rate": 6.3332174764519735e-06, "loss": 0.4189, "step": 3408 }, { "epoch": 2.3640776699029127, "grad_norm": 0.4007386812023215, "learning_rate": 6.330884255095409e-06, "loss": 0.4138, "step": 3409 }, { "epoch": 2.364771151178918, "grad_norm": 0.37862490931227294, "learning_rate": 6.328550721789214e-06, "loss": 0.4621, "step": 3410 }, { "epoch": 2.3654646324549238, "grad_norm": 0.35916781086557287, "learning_rate": 6.326216877080351e-06, "loss": 0.4342, "step": 3411 }, { "epoch": 2.366158113730929, "grad_norm": 0.3577175898636799, "learning_rate": 6.3238827215158575e-06, "loss": 0.4258, "step": 3412 }, { "epoch": 2.366851595006935, "grad_norm": 0.4217988564723029, "learning_rate": 6.32154825564284e-06, "loss": 0.4718, "step": 3413 }, { "epoch": 2.36754507628294, "grad_norm": 0.3472591144956317, "learning_rate": 6.319213480008485e-06, "loss": 0.3976, "step": 3414 }, { "epoch": 2.368238557558946, "grad_norm": 0.39692892084548975, "learning_rate": 6.3168783951600445e-06, "loss": 0.4276, "step": 3415 }, { "epoch": 2.3689320388349513, "grad_norm": 0.48621750097547883, "learning_rate": 6.3145430016448435e-06, "loss": 0.3952, "step": 3416 }, { "epoch": 2.369625520110957, "grad_norm": 0.34995196201467055, "learning_rate": 6.312207300010285e-06, "loss": 0.4212, "step": 3417 }, { "epoch": 2.3703190013869624, "grad_norm": 0.3889657758249156, "learning_rate": 6.309871290803837e-06, "loss": 0.455, "step": 3418 }, { "epoch": 2.371012482662968, "grad_norm": 0.3354965798611206, "learning_rate": 6.307534974573048e-06, "loss": 0.4471, "step": 3419 }, { "epoch": 2.3717059639389735, "grad_norm": 0.35780678124045107, "learning_rate": 6.305198351865527e-06, "loss": 0.417, "step": 3420 }, { "epoch": 2.3723994452149793, "grad_norm": 0.3537815464065854, "learning_rate": 6.302861423228967e-06, "loss": 0.4302, "step": 3421 }, { "epoch": 2.3730929264909846, "grad_norm": 0.3954711687708549, "learning_rate": 6.300524189211124e-06, "loss": 0.4415, "step": 3422 }, { "epoch": 2.3737864077669903, "grad_norm": 0.5590188020954749, "learning_rate": 6.298186650359832e-06, "loss": 0.4832, "step": 3423 }, { "epoch": 2.3744798890429957, "grad_norm": 0.44129155814582244, "learning_rate": 6.2958488072229895e-06, "loss": 0.4444, "step": 3424 }, { "epoch": 2.3751733703190014, "grad_norm": 0.6650221897421527, "learning_rate": 6.293510660348572e-06, "loss": 0.4262, "step": 3425 }, { "epoch": 2.3758668515950068, "grad_norm": 0.3840587363773067, "learning_rate": 6.291172210284624e-06, "loss": 0.3939, "step": 3426 }, { "epoch": 2.3765603328710125, "grad_norm": 0.3935768538349805, "learning_rate": 6.288833457579261e-06, "loss": 0.4241, "step": 3427 }, { "epoch": 2.377253814147018, "grad_norm": 0.39560772177394354, "learning_rate": 6.2864944027806684e-06, "loss": 0.4424, "step": 3428 }, { "epoch": 2.3779472954230236, "grad_norm": 0.34737704553925264, "learning_rate": 6.284155046437107e-06, "loss": 0.4113, "step": 3429 }, { "epoch": 2.378640776699029, "grad_norm": 0.47403656541012373, "learning_rate": 6.281815389096903e-06, "loss": 0.4088, "step": 3430 }, { "epoch": 2.3793342579750347, "grad_norm": 0.4200280447992286, "learning_rate": 6.279475431308453e-06, "loss": 0.4243, "step": 3431 }, { "epoch": 2.38002773925104, "grad_norm": 0.43662749967588077, "learning_rate": 6.2771351736202306e-06, "loss": 0.4589, "step": 3432 }, { "epoch": 2.380721220527046, "grad_norm": 0.39979458496586723, "learning_rate": 6.27479461658077e-06, "loss": 0.462, "step": 3433 }, { "epoch": 2.381414701803051, "grad_norm": 0.3682603187605637, "learning_rate": 6.272453760738686e-06, "loss": 0.3895, "step": 3434 }, { "epoch": 2.382108183079057, "grad_norm": 0.36024963440519453, "learning_rate": 6.270112606642656e-06, "loss": 0.4127, "step": 3435 }, { "epoch": 2.3828016643550622, "grad_norm": 0.3903070950000419, "learning_rate": 6.267771154841429e-06, "loss": 0.4087, "step": 3436 }, { "epoch": 2.383495145631068, "grad_norm": 0.3391321662642895, "learning_rate": 6.265429405883825e-06, "loss": 0.4119, "step": 3437 }, { "epoch": 2.3841886269070733, "grad_norm": 0.3686041953326949, "learning_rate": 6.2630873603187335e-06, "loss": 0.391, "step": 3438 }, { "epoch": 2.384882108183079, "grad_norm": 0.39924365282291413, "learning_rate": 6.260745018695112e-06, "loss": 0.4986, "step": 3439 }, { "epoch": 2.3855755894590844, "grad_norm": 0.31873868385405696, "learning_rate": 6.258402381561989e-06, "loss": 0.3269, "step": 3440 }, { "epoch": 2.38626907073509, "grad_norm": 0.3468427356660674, "learning_rate": 6.256059449468462e-06, "loss": 0.4511, "step": 3441 }, { "epoch": 2.3869625520110955, "grad_norm": 0.3476113659455627, "learning_rate": 6.253716222963695e-06, "loss": 0.4157, "step": 3442 }, { "epoch": 2.3876560332871013, "grad_norm": 0.39463376375605047, "learning_rate": 6.251372702596927e-06, "loss": 0.4244, "step": 3443 }, { "epoch": 2.3883495145631066, "grad_norm": 0.3323736685099904, "learning_rate": 6.24902888891746e-06, "loss": 0.4304, "step": 3444 }, { "epoch": 2.3890429958391124, "grad_norm": 0.385009719696181, "learning_rate": 6.246684782474665e-06, "loss": 0.4809, "step": 3445 }, { "epoch": 2.3897364771151177, "grad_norm": 0.3946421860197158, "learning_rate": 6.244340383817989e-06, "loss": 0.4577, "step": 3446 }, { "epoch": 2.3904299583911235, "grad_norm": 0.39202685932477294, "learning_rate": 6.241995693496939e-06, "loss": 0.3802, "step": 3447 }, { "epoch": 2.391123439667129, "grad_norm": 0.3638305833865608, "learning_rate": 6.239650712061093e-06, "loss": 0.4427, "step": 3448 }, { "epoch": 2.3918169209431346, "grad_norm": 0.39198565540149627, "learning_rate": 6.237305440060096e-06, "loss": 0.4174, "step": 3449 }, { "epoch": 2.39251040221914, "grad_norm": 0.37992019651591874, "learning_rate": 6.234959878043667e-06, "loss": 0.4051, "step": 3450 }, { "epoch": 2.3932038834951457, "grad_norm": 0.38178358197512036, "learning_rate": 6.232614026561586e-06, "loss": 0.4343, "step": 3451 }, { "epoch": 2.393897364771151, "grad_norm": 0.35388101649029896, "learning_rate": 6.2302678861637044e-06, "loss": 0.3979, "step": 3452 }, { "epoch": 2.3945908460471568, "grad_norm": 0.3296789306417218, "learning_rate": 6.2279214573999405e-06, "loss": 0.4479, "step": 3453 }, { "epoch": 2.395284327323162, "grad_norm": 0.35184432654620773, "learning_rate": 6.225574740820278e-06, "loss": 0.4292, "step": 3454 }, { "epoch": 2.395977808599168, "grad_norm": 0.36142021844379674, "learning_rate": 6.2232277369747755e-06, "loss": 0.4408, "step": 3455 }, { "epoch": 2.396671289875173, "grad_norm": 0.33277075131702893, "learning_rate": 6.220880446413548e-06, "loss": 0.4184, "step": 3456 }, { "epoch": 2.397364771151179, "grad_norm": 0.3574119895278261, "learning_rate": 6.2185328696867866e-06, "loss": 0.4641, "step": 3457 }, { "epoch": 2.3980582524271843, "grad_norm": 0.3700609396267541, "learning_rate": 6.216185007344745e-06, "loss": 0.4284, "step": 3458 }, { "epoch": 2.39875173370319, "grad_norm": 0.3549329881454592, "learning_rate": 6.2138368599377465e-06, "loss": 0.3992, "step": 3459 }, { "epoch": 2.3994452149791954, "grad_norm": 0.3568537084222683, "learning_rate": 6.211488428016179e-06, "loss": 0.4096, "step": 3460 }, { "epoch": 2.400138696255201, "grad_norm": 0.3430531484905292, "learning_rate": 6.209139712130499e-06, "loss": 0.4464, "step": 3461 }, { "epoch": 2.4008321775312065, "grad_norm": 0.38566858567711926, "learning_rate": 6.206790712831225e-06, "loss": 0.4135, "step": 3462 }, { "epoch": 2.4015256588072122, "grad_norm": 0.4019088519638128, "learning_rate": 6.204441430668949e-06, "loss": 0.4297, "step": 3463 }, { "epoch": 2.4022191400832176, "grad_norm": 0.35686918217779046, "learning_rate": 6.2020918661943265e-06, "loss": 0.3962, "step": 3464 }, { "epoch": 2.4029126213592233, "grad_norm": 0.34747328665351357, "learning_rate": 6.199742019958074e-06, "loss": 0.4198, "step": 3465 }, { "epoch": 2.4036061026352287, "grad_norm": 0.3719162458419108, "learning_rate": 6.197391892510982e-06, "loss": 0.4299, "step": 3466 }, { "epoch": 2.4042995839112344, "grad_norm": 0.3310709790808494, "learning_rate": 6.195041484403902e-06, "loss": 0.4139, "step": 3467 }, { "epoch": 2.4049930651872398, "grad_norm": 0.3798672827503458, "learning_rate": 6.192690796187753e-06, "loss": 0.426, "step": 3468 }, { "epoch": 2.4056865464632455, "grad_norm": 0.3819894485018657, "learning_rate": 6.19033982841352e-06, "loss": 0.3987, "step": 3469 }, { "epoch": 2.406380027739251, "grad_norm": 0.39491406632810266, "learning_rate": 6.1879885816322515e-06, "loss": 0.4372, "step": 3470 }, { "epoch": 2.4070735090152566, "grad_norm": 0.35578187182088616, "learning_rate": 6.1856370563950615e-06, "loss": 0.4168, "step": 3471 }, { "epoch": 2.407766990291262, "grad_norm": 0.41040564938520374, "learning_rate": 6.183285253253135e-06, "loss": 0.4544, "step": 3472 }, { "epoch": 2.4084604715672677, "grad_norm": 0.3829588038488335, "learning_rate": 6.180933172757715e-06, "loss": 0.4788, "step": 3473 }, { "epoch": 2.409153952843273, "grad_norm": 0.36089509034582007, "learning_rate": 6.17858081546011e-06, "loss": 0.4531, "step": 3474 }, { "epoch": 2.409847434119279, "grad_norm": 0.3335533790964443, "learning_rate": 6.176228181911699e-06, "loss": 0.4461, "step": 3475 }, { "epoch": 2.410540915395284, "grad_norm": 0.3577332596693892, "learning_rate": 6.173875272663919e-06, "loss": 0.4372, "step": 3476 }, { "epoch": 2.41123439667129, "grad_norm": 0.35065257131004135, "learning_rate": 6.171522088268279e-06, "loss": 0.4337, "step": 3477 }, { "epoch": 2.4119278779472952, "grad_norm": 0.3656127154704781, "learning_rate": 6.169168629276344e-06, "loss": 0.3947, "step": 3478 }, { "epoch": 2.412621359223301, "grad_norm": 0.35555102413957235, "learning_rate": 6.1668148962397525e-06, "loss": 0.429, "step": 3479 }, { "epoch": 2.4133148404993063, "grad_norm": 0.39728286825182824, "learning_rate": 6.164460889710196e-06, "loss": 0.4534, "step": 3480 }, { "epoch": 2.414008321775312, "grad_norm": 0.3775536266401251, "learning_rate": 6.162106610239444e-06, "loss": 0.4532, "step": 3481 }, { "epoch": 2.4147018030513174, "grad_norm": 0.3848447949989429, "learning_rate": 6.159752058379317e-06, "loss": 0.4142, "step": 3482 }, { "epoch": 2.415395284327323, "grad_norm": 0.3969949138569901, "learning_rate": 6.157397234681708e-06, "loss": 0.4592, "step": 3483 }, { "epoch": 2.4160887656033285, "grad_norm": 0.3798449635887271, "learning_rate": 6.155042139698568e-06, "loss": 0.4945, "step": 3484 }, { "epoch": 2.4167822468793343, "grad_norm": 0.4143446886855249, "learning_rate": 6.152686773981916e-06, "loss": 0.4362, "step": 3485 }, { "epoch": 2.4174757281553396, "grad_norm": 0.38027909160910556, "learning_rate": 6.150331138083833e-06, "loss": 0.4299, "step": 3486 }, { "epoch": 2.4181692094313454, "grad_norm": 0.3584462250679504, "learning_rate": 6.147975232556463e-06, "loss": 0.4575, "step": 3487 }, { "epoch": 2.4188626907073507, "grad_norm": 0.36196191936942634, "learning_rate": 6.145619057952012e-06, "loss": 0.4479, "step": 3488 }, { "epoch": 2.4195561719833565, "grad_norm": 0.3739003319311505, "learning_rate": 6.14326261482275e-06, "loss": 0.4457, "step": 3489 }, { "epoch": 2.420249653259362, "grad_norm": 0.403643357356033, "learning_rate": 6.1409059037210095e-06, "loss": 0.4502, "step": 3490 }, { "epoch": 2.4209431345353676, "grad_norm": 0.3882920801237256, "learning_rate": 6.13854892519919e-06, "loss": 0.4625, "step": 3491 }, { "epoch": 2.421636615811373, "grad_norm": 0.3433730370421034, "learning_rate": 6.136191679809749e-06, "loss": 0.4126, "step": 3492 }, { "epoch": 2.4223300970873787, "grad_norm": 0.37221985738254343, "learning_rate": 6.133834168105206e-06, "loss": 0.4085, "step": 3493 }, { "epoch": 2.423023578363384, "grad_norm": 0.370193556439351, "learning_rate": 6.131476390638145e-06, "loss": 0.4347, "step": 3494 }, { "epoch": 2.4237170596393898, "grad_norm": 0.4214519545995449, "learning_rate": 6.129118347961214e-06, "loss": 0.4533, "step": 3495 }, { "epoch": 2.424410540915395, "grad_norm": 0.37541750233962984, "learning_rate": 6.126760040627119e-06, "loss": 0.4329, "step": 3496 }, { "epoch": 2.425104022191401, "grad_norm": 0.3888424675714201, "learning_rate": 6.124401469188631e-06, "loss": 0.4654, "step": 3497 }, { "epoch": 2.425797503467406, "grad_norm": 0.35927907410069454, "learning_rate": 6.12204263419858e-06, "loss": 0.4069, "step": 3498 }, { "epoch": 2.426490984743412, "grad_norm": 0.37705068260350294, "learning_rate": 6.119683536209864e-06, "loss": 0.4952, "step": 3499 }, { "epoch": 2.4271844660194173, "grad_norm": 0.47835491713331174, "learning_rate": 6.117324175775435e-06, "loss": 0.4289, "step": 3500 }, { "epoch": 2.427877947295423, "grad_norm": 0.3462358718952392, "learning_rate": 6.114964553448313e-06, "loss": 0.3877, "step": 3501 }, { "epoch": 2.4285714285714284, "grad_norm": 0.4277926935878686, "learning_rate": 6.112604669781572e-06, "loss": 0.4543, "step": 3502 }, { "epoch": 2.429264909847434, "grad_norm": 0.4495055019010333, "learning_rate": 6.110244525328356e-06, "loss": 0.4604, "step": 3503 }, { "epoch": 2.4299583911234395, "grad_norm": 0.3625289822831497, "learning_rate": 6.107884120641863e-06, "loss": 0.4029, "step": 3504 }, { "epoch": 2.4306518723994452, "grad_norm": 0.3693588124354737, "learning_rate": 6.105523456275358e-06, "loss": 0.4685, "step": 3505 }, { "epoch": 2.4313453536754506, "grad_norm": 0.3685339665885883, "learning_rate": 6.10316253278216e-06, "loss": 0.427, "step": 3506 }, { "epoch": 2.4320388349514563, "grad_norm": 0.3697019807561721, "learning_rate": 6.100801350715652e-06, "loss": 0.3954, "step": 3507 }, { "epoch": 2.4327323162274617, "grad_norm": 0.363318103111065, "learning_rate": 6.098439910629282e-06, "loss": 0.4114, "step": 3508 }, { "epoch": 2.4334257975034674, "grad_norm": 0.3266315487180377, "learning_rate": 6.096078213076553e-06, "loss": 0.419, "step": 3509 }, { "epoch": 2.4341192787794728, "grad_norm": 0.4050230145362845, "learning_rate": 6.093716258611028e-06, "loss": 0.4872, "step": 3510 }, { "epoch": 2.4348127600554785, "grad_norm": 0.38873262828825567, "learning_rate": 6.091354047786333e-06, "loss": 0.3918, "step": 3511 }, { "epoch": 2.435506241331484, "grad_norm": 0.36588040289423995, "learning_rate": 6.088991581156152e-06, "loss": 0.41, "step": 3512 }, { "epoch": 2.4361997226074896, "grad_norm": 0.3774239461687723, "learning_rate": 6.086628859274233e-06, "loss": 0.4458, "step": 3513 }, { "epoch": 2.436893203883495, "grad_norm": 0.35692562807169503, "learning_rate": 6.084265882694378e-06, "loss": 0.3932, "step": 3514 }, { "epoch": 2.4375866851595007, "grad_norm": 0.37598880536831636, "learning_rate": 6.081902651970453e-06, "loss": 0.429, "step": 3515 }, { "epoch": 2.438280166435506, "grad_norm": 0.37778554976321177, "learning_rate": 6.079539167656382e-06, "loss": 0.4405, "step": 3516 }, { "epoch": 2.438973647711512, "grad_norm": 0.41604532500207625, "learning_rate": 6.077175430306148e-06, "loss": 0.4417, "step": 3517 }, { "epoch": 2.4396671289875176, "grad_norm": 0.37668432889024267, "learning_rate": 6.074811440473795e-06, "loss": 0.4255, "step": 3518 }, { "epoch": 2.440360610263523, "grad_norm": 0.34923433939653253, "learning_rate": 6.0724471987134245e-06, "loss": 0.3966, "step": 3519 }, { "epoch": 2.4410540915395282, "grad_norm": 0.37713864245797907, "learning_rate": 6.070082705579198e-06, "loss": 0.4256, "step": 3520 }, { "epoch": 2.441747572815534, "grad_norm": 0.47284460768485814, "learning_rate": 6.0677179616253345e-06, "loss": 0.4094, "step": 3521 }, { "epoch": 2.4424410540915398, "grad_norm": 0.3865985984685952, "learning_rate": 6.065352967406114e-06, "loss": 0.4521, "step": 3522 }, { "epoch": 2.443134535367545, "grad_norm": 0.3400910164485317, "learning_rate": 6.062987723475873e-06, "loss": 0.4126, "step": 3523 }, { "epoch": 2.4438280166435504, "grad_norm": 0.36605467331989594, "learning_rate": 6.060622230389008e-06, "loss": 0.4153, "step": 3524 }, { "epoch": 2.444521497919556, "grad_norm": 0.34121558679763603, "learning_rate": 6.058256488699974e-06, "loss": 0.4017, "step": 3525 }, { "epoch": 2.445214979195562, "grad_norm": 0.39246465973234645, "learning_rate": 6.055890498963284e-06, "loss": 0.4629, "step": 3526 }, { "epoch": 2.4459084604715673, "grad_norm": 0.3637550735351632, "learning_rate": 6.053524261733508e-06, "loss": 0.4641, "step": 3527 }, { "epoch": 2.4466019417475726, "grad_norm": 0.3841111199229264, "learning_rate": 6.0511577775652744e-06, "loss": 0.4158, "step": 3528 }, { "epoch": 2.4472954230235784, "grad_norm": 0.36406369231437513, "learning_rate": 6.048791047013272e-06, "loss": 0.4249, "step": 3529 }, { "epoch": 2.447988904299584, "grad_norm": 0.37439954664911407, "learning_rate": 6.046424070632241e-06, "loss": 0.4294, "step": 3530 }, { "epoch": 2.4486823855755895, "grad_norm": 0.33899110882039546, "learning_rate": 6.044056848976988e-06, "loss": 0.384, "step": 3531 }, { "epoch": 2.449375866851595, "grad_norm": 0.4084797866489152, "learning_rate": 6.041689382602372e-06, "loss": 0.4447, "step": 3532 }, { "epoch": 2.4500693481276006, "grad_norm": 0.4280099837633069, "learning_rate": 6.039321672063308e-06, "loss": 0.4501, "step": 3533 }, { "epoch": 2.4507628294036063, "grad_norm": 0.3928584550836353, "learning_rate": 6.036953717914771e-06, "loss": 0.4167, "step": 3534 }, { "epoch": 2.4514563106796117, "grad_norm": 0.34285367268244193, "learning_rate": 6.034585520711792e-06, "loss": 0.3974, "step": 3535 }, { "epoch": 2.452149791955617, "grad_norm": 0.3484333096468712, "learning_rate": 6.0322170810094606e-06, "loss": 0.4106, "step": 3536 }, { "epoch": 2.4528432732316228, "grad_norm": 0.36921100923026773, "learning_rate": 6.029848399362921e-06, "loss": 0.3723, "step": 3537 }, { "epoch": 2.4535367545076285, "grad_norm": 0.3725268524285565, "learning_rate": 6.027479476327376e-06, "loss": 0.3896, "step": 3538 }, { "epoch": 2.454230235783634, "grad_norm": 0.3655224259408961, "learning_rate": 6.02511031245808e-06, "loss": 0.3995, "step": 3539 }, { "epoch": 2.454923717059639, "grad_norm": 0.3598975146850662, "learning_rate": 6.022740908310354e-06, "loss": 0.4762, "step": 3540 }, { "epoch": 2.455617198335645, "grad_norm": 0.38659460067365026, "learning_rate": 6.020371264439566e-06, "loss": 0.4543, "step": 3541 }, { "epoch": 2.4563106796116507, "grad_norm": 0.37124608830701855, "learning_rate": 6.018001381401143e-06, "loss": 0.4279, "step": 3542 }, { "epoch": 2.457004160887656, "grad_norm": 0.4545050738953536, "learning_rate": 6.015631259750568e-06, "loss": 0.4661, "step": 3543 }, { "epoch": 2.4576976421636614, "grad_norm": 0.4153803989283625, "learning_rate": 6.013260900043381e-06, "loss": 0.4452, "step": 3544 }, { "epoch": 2.458391123439667, "grad_norm": 0.4653826703965434, "learning_rate": 6.01089030283518e-06, "loss": 0.4573, "step": 3545 }, { "epoch": 2.459084604715673, "grad_norm": 0.3557503352742577, "learning_rate": 6.008519468681612e-06, "loss": 0.4483, "step": 3546 }, { "epoch": 2.4597780859916782, "grad_norm": 0.3781927730213355, "learning_rate": 6.006148398138383e-06, "loss": 0.4116, "step": 3547 }, { "epoch": 2.4604715672676836, "grad_norm": 0.34923264673005194, "learning_rate": 6.003777091761257e-06, "loss": 0.4043, "step": 3548 }, { "epoch": 2.4611650485436893, "grad_norm": 0.36707061518528994, "learning_rate": 6.001405550106052e-06, "loss": 0.4249, "step": 3549 }, { "epoch": 2.461858529819695, "grad_norm": 0.3506262194345391, "learning_rate": 5.999033773728637e-06, "loss": 0.4103, "step": 3550 }, { "epoch": 2.4625520110957004, "grad_norm": 0.35118606152034737, "learning_rate": 5.996661763184941e-06, "loss": 0.4691, "step": 3551 }, { "epoch": 2.4632454923717058, "grad_norm": 0.3696119369979769, "learning_rate": 5.994289519030946e-06, "loss": 0.4536, "step": 3552 }, { "epoch": 2.4639389736477115, "grad_norm": 0.3368421069519799, "learning_rate": 5.991917041822689e-06, "loss": 0.3999, "step": 3553 }, { "epoch": 2.4646324549237173, "grad_norm": 0.38561838631313217, "learning_rate": 5.9895443321162615e-06, "loss": 0.4684, "step": 3554 }, { "epoch": 2.4653259361997226, "grad_norm": 0.3903469011689506, "learning_rate": 5.987171390467808e-06, "loss": 0.4104, "step": 3555 }, { "epoch": 2.466019417475728, "grad_norm": 0.36896222945766727, "learning_rate": 5.9847982174335314e-06, "loss": 0.4161, "step": 3556 }, { "epoch": 2.4667128987517337, "grad_norm": 0.38581153329654083, "learning_rate": 5.982424813569684e-06, "loss": 0.4483, "step": 3557 }, { "epoch": 2.4674063800277395, "grad_norm": 0.3822456506248116, "learning_rate": 5.980051179432575e-06, "loss": 0.4406, "step": 3558 }, { "epoch": 2.468099861303745, "grad_norm": 0.35559358171512856, "learning_rate": 5.97767731557857e-06, "loss": 0.4235, "step": 3559 }, { "epoch": 2.46879334257975, "grad_norm": 0.34699049854280395, "learning_rate": 5.975303222564079e-06, "loss": 0.3956, "step": 3560 }, { "epoch": 2.469486823855756, "grad_norm": 0.3590407573688313, "learning_rate": 5.972928900945578e-06, "loss": 0.4391, "step": 3561 }, { "epoch": 2.4701803051317617, "grad_norm": 0.3690075310976927, "learning_rate": 5.97055435127959e-06, "loss": 0.4586, "step": 3562 }, { "epoch": 2.470873786407767, "grad_norm": 0.37842562549053155, "learning_rate": 5.96817957412269e-06, "loss": 0.4242, "step": 3563 }, { "epoch": 2.4715672676837723, "grad_norm": 0.32643519567856305, "learning_rate": 5.965804570031508e-06, "loss": 0.4156, "step": 3564 }, { "epoch": 2.472260748959778, "grad_norm": 0.36356557827169317, "learning_rate": 5.963429339562731e-06, "loss": 0.4422, "step": 3565 }, { "epoch": 2.472954230235784, "grad_norm": 0.32838023738047295, "learning_rate": 5.961053883273095e-06, "loss": 0.4091, "step": 3566 }, { "epoch": 2.473647711511789, "grad_norm": 0.4418368573176722, "learning_rate": 5.958678201719389e-06, "loss": 0.3887, "step": 3567 }, { "epoch": 2.4743411927877945, "grad_norm": 0.3456252120828457, "learning_rate": 5.9563022954584545e-06, "loss": 0.4132, "step": 3568 }, { "epoch": 2.4750346740638003, "grad_norm": 0.39403659649425854, "learning_rate": 5.953926165047189e-06, "loss": 0.4126, "step": 3569 }, { "epoch": 2.475728155339806, "grad_norm": 0.3497396008806446, "learning_rate": 5.951549811042539e-06, "loss": 0.4301, "step": 3570 }, { "epoch": 2.4764216366158114, "grad_norm": 0.36668609408187486, "learning_rate": 5.949173234001504e-06, "loss": 0.4193, "step": 3571 }, { "epoch": 2.4771151178918167, "grad_norm": 0.38766503952118897, "learning_rate": 5.946796434481137e-06, "loss": 0.4382, "step": 3572 }, { "epoch": 2.4778085991678225, "grad_norm": 0.3745107517147705, "learning_rate": 5.944419413038544e-06, "loss": 0.3907, "step": 3573 }, { "epoch": 2.4785020804438282, "grad_norm": 0.35816419270557776, "learning_rate": 5.942042170230879e-06, "loss": 0.415, "step": 3574 }, { "epoch": 2.4791955617198336, "grad_norm": 0.36245015715227935, "learning_rate": 5.939664706615352e-06, "loss": 0.4235, "step": 3575 }, { "epoch": 2.479889042995839, "grad_norm": 0.42959334504062235, "learning_rate": 5.937287022749223e-06, "loss": 0.4475, "step": 3576 }, { "epoch": 2.4805825242718447, "grad_norm": 0.3883778674941454, "learning_rate": 5.934909119189806e-06, "loss": 0.3753, "step": 3577 }, { "epoch": 2.4812760055478504, "grad_norm": 0.34801657442294465, "learning_rate": 5.932530996494461e-06, "loss": 0.407, "step": 3578 }, { "epoch": 2.4819694868238558, "grad_norm": 0.3699175010091076, "learning_rate": 5.930152655220603e-06, "loss": 0.389, "step": 3579 }, { "epoch": 2.482662968099861, "grad_norm": 0.43406862954324715, "learning_rate": 5.9277740959257e-06, "loss": 0.4265, "step": 3580 }, { "epoch": 2.483356449375867, "grad_norm": 0.3753728386909298, "learning_rate": 5.925395319167268e-06, "loss": 0.4493, "step": 3581 }, { "epoch": 2.4840499306518726, "grad_norm": 0.35952262852375005, "learning_rate": 5.923016325502877e-06, "loss": 0.3805, "step": 3582 }, { "epoch": 2.484743411927878, "grad_norm": 0.37871090159875964, "learning_rate": 5.920637115490142e-06, "loss": 0.4933, "step": 3583 }, { "epoch": 2.4854368932038833, "grad_norm": 0.34136792331318594, "learning_rate": 5.918257689686736e-06, "loss": 0.374, "step": 3584 }, { "epoch": 2.486130374479889, "grad_norm": 0.3621451250815319, "learning_rate": 5.915878048650376e-06, "loss": 0.4056, "step": 3585 }, { "epoch": 2.486823855755895, "grad_norm": 0.37449972640169954, "learning_rate": 5.9134981929388365e-06, "loss": 0.4089, "step": 3586 }, { "epoch": 2.4875173370319, "grad_norm": 0.3590037832570276, "learning_rate": 5.911118123109937e-06, "loss": 0.4493, "step": 3587 }, { "epoch": 2.4882108183079055, "grad_norm": 0.3463147956300701, "learning_rate": 5.9087378397215454e-06, "loss": 0.4313, "step": 3588 }, { "epoch": 2.4889042995839112, "grad_norm": 0.3676590254111761, "learning_rate": 5.906357343331587e-06, "loss": 0.3864, "step": 3589 }, { "epoch": 2.489597780859917, "grad_norm": 0.33792631019747793, "learning_rate": 5.903976634498032e-06, "loss": 0.3962, "step": 3590 }, { "epoch": 2.4902912621359223, "grad_norm": 0.3908004843816045, "learning_rate": 5.9015957137789006e-06, "loss": 0.4741, "step": 3591 }, { "epoch": 2.4909847434119277, "grad_norm": 0.39982125493541454, "learning_rate": 5.899214581732262e-06, "loss": 0.4227, "step": 3592 }, { "epoch": 2.4916782246879334, "grad_norm": 0.3714182417065424, "learning_rate": 5.8968332389162395e-06, "loss": 0.4359, "step": 3593 }, { "epoch": 2.492371705963939, "grad_norm": 0.3659110301562501, "learning_rate": 5.894451685889001e-06, "loss": 0.478, "step": 3594 }, { "epoch": 2.4930651872399445, "grad_norm": 0.3578060360196927, "learning_rate": 5.892069923208765e-06, "loss": 0.4271, "step": 3595 }, { "epoch": 2.49375866851595, "grad_norm": 0.46400941665591694, "learning_rate": 5.889687951433799e-06, "loss": 0.4859, "step": 3596 }, { "epoch": 2.4944521497919556, "grad_norm": 0.34094717603291347, "learning_rate": 5.88730577112242e-06, "loss": 0.4468, "step": 3597 }, { "epoch": 2.4951456310679614, "grad_norm": 0.33934210232918094, "learning_rate": 5.8849233828329964e-06, "loss": 0.4303, "step": 3598 }, { "epoch": 2.4958391123439667, "grad_norm": 0.3756564374773508, "learning_rate": 5.88254078712394e-06, "loss": 0.4781, "step": 3599 }, { "epoch": 2.496532593619972, "grad_norm": 0.32980797659104166, "learning_rate": 5.880157984553714e-06, "loss": 0.3996, "step": 3600 }, { "epoch": 2.497226074895978, "grad_norm": 0.3400914728972975, "learning_rate": 5.877774975680831e-06, "loss": 0.3972, "step": 3601 }, { "epoch": 2.4979195561719836, "grad_norm": 0.3357533296775801, "learning_rate": 5.875391761063851e-06, "loss": 0.4651, "step": 3602 }, { "epoch": 2.498613037447989, "grad_norm": 0.32063844730744445, "learning_rate": 5.873008341261383e-06, "loss": 0.4244, "step": 3603 }, { "epoch": 2.4993065187239942, "grad_norm": 0.375169256208774, "learning_rate": 5.870624716832083e-06, "loss": 0.4301, "step": 3604 }, { "epoch": 2.5, "grad_norm": 0.3475080822390931, "learning_rate": 5.8682408883346535e-06, "loss": 0.4085, "step": 3605 }, { "epoch": 2.5006934812760058, "grad_norm": 0.36772826908546163, "learning_rate": 5.865856856327846e-06, "loss": 0.4092, "step": 3606 }, { "epoch": 2.501386962552011, "grad_norm": 0.3627256418545802, "learning_rate": 5.8634726213704655e-06, "loss": 0.4287, "step": 3607 }, { "epoch": 2.5020804438280164, "grad_norm": 0.4019754729879606, "learning_rate": 5.861088184021355e-06, "loss": 0.4462, "step": 3608 }, { "epoch": 2.502773925104022, "grad_norm": 0.4945247373056535, "learning_rate": 5.858703544839409e-06, "loss": 0.4067, "step": 3609 }, { "epoch": 2.503467406380028, "grad_norm": 0.3735384090853162, "learning_rate": 5.856318704383572e-06, "loss": 0.4371, "step": 3610 }, { "epoch": 2.5041608876560333, "grad_norm": 0.3758964298783903, "learning_rate": 5.853933663212833e-06, "loss": 0.384, "step": 3611 }, { "epoch": 2.5048543689320386, "grad_norm": 0.3642334855044776, "learning_rate": 5.8515484218862286e-06, "loss": 0.3896, "step": 3612 }, { "epoch": 2.5055478502080444, "grad_norm": 0.38987795882478504, "learning_rate": 5.849162980962839e-06, "loss": 0.4152, "step": 3613 }, { "epoch": 2.50624133148405, "grad_norm": 0.35814210764448684, "learning_rate": 5.8467773410017995e-06, "loss": 0.3991, "step": 3614 }, { "epoch": 2.5069348127600555, "grad_norm": 0.4023951436808565, "learning_rate": 5.844391502562281e-06, "loss": 0.4031, "step": 3615 }, { "epoch": 2.507628294036061, "grad_norm": 0.6038430228135157, "learning_rate": 5.842005466203511e-06, "loss": 0.4175, "step": 3616 }, { "epoch": 2.5083217753120666, "grad_norm": 0.36399073534256104, "learning_rate": 5.839619232484758e-06, "loss": 0.4646, "step": 3617 }, { "epoch": 2.5090152565880723, "grad_norm": 0.37808237247968673, "learning_rate": 5.837232801965338e-06, "loss": 0.45, "step": 3618 }, { "epoch": 2.5097087378640777, "grad_norm": 0.4126439871763773, "learning_rate": 5.834846175204612e-06, "loss": 0.4647, "step": 3619 }, { "epoch": 2.510402219140083, "grad_norm": 0.3525077089200721, "learning_rate": 5.832459352761989e-06, "loss": 0.3847, "step": 3620 }, { "epoch": 2.5110957004160888, "grad_norm": 0.3593513739651244, "learning_rate": 5.830072335196921e-06, "loss": 0.4512, "step": 3621 }, { "epoch": 2.5117891816920945, "grad_norm": 1.5823414805917269, "learning_rate": 5.827685123068912e-06, "loss": 0.3924, "step": 3622 }, { "epoch": 2.5124826629681, "grad_norm": 0.389686569362526, "learning_rate": 5.825297716937503e-06, "loss": 0.4216, "step": 3623 }, { "epoch": 2.513176144244105, "grad_norm": 0.35500195083809455, "learning_rate": 5.822910117362287e-06, "loss": 0.4264, "step": 3624 }, { "epoch": 2.513869625520111, "grad_norm": 0.3666874572909834, "learning_rate": 5.820522324902899e-06, "loss": 0.4945, "step": 3625 }, { "epoch": 2.5145631067961167, "grad_norm": 0.36266210708471175, "learning_rate": 5.818134340119021e-06, "loss": 0.4217, "step": 3626 }, { "epoch": 2.515256588072122, "grad_norm": 0.37129892582074814, "learning_rate": 5.815746163570378e-06, "loss": 0.4046, "step": 3627 }, { "epoch": 2.5159500693481274, "grad_norm": 0.3676494083903266, "learning_rate": 5.813357795816742e-06, "loss": 0.3646, "step": 3628 }, { "epoch": 2.516643550624133, "grad_norm": 0.40734484978334934, "learning_rate": 5.81096923741793e-06, "loss": 0.4124, "step": 3629 }, { "epoch": 2.517337031900139, "grad_norm": 0.3497720871251518, "learning_rate": 5.8085804889338014e-06, "loss": 0.4193, "step": 3630 }, { "epoch": 2.5180305131761442, "grad_norm": 0.3842406086139197, "learning_rate": 5.806191550924264e-06, "loss": 0.4444, "step": 3631 }, { "epoch": 2.5187239944521496, "grad_norm": 0.4316227415806176, "learning_rate": 5.803802423949265e-06, "loss": 0.4265, "step": 3632 }, { "epoch": 2.5194174757281553, "grad_norm": 0.3453880931807856, "learning_rate": 5.801413108568798e-06, "loss": 0.4264, "step": 3633 }, { "epoch": 2.520110957004161, "grad_norm": 0.3736426631488774, "learning_rate": 5.7990236053429025e-06, "loss": 0.4394, "step": 3634 }, { "epoch": 2.5208044382801664, "grad_norm": 0.4198448009764268, "learning_rate": 5.7966339148316615e-06, "loss": 0.4521, "step": 3635 }, { "epoch": 2.5214979195561718, "grad_norm": 0.3961006874056316, "learning_rate": 5.7942440375952015e-06, "loss": 0.468, "step": 3636 }, { "epoch": 2.5221914008321775, "grad_norm": 0.36862208361195653, "learning_rate": 5.791853974193688e-06, "loss": 0.3924, "step": 3637 }, { "epoch": 2.5228848821081833, "grad_norm": 0.3970154434075573, "learning_rate": 5.789463725187341e-06, "loss": 0.4583, "step": 3638 }, { "epoch": 2.5235783633841886, "grad_norm": 0.4032495629966687, "learning_rate": 5.787073291136414e-06, "loss": 0.5179, "step": 3639 }, { "epoch": 2.524271844660194, "grad_norm": 0.465356869874365, "learning_rate": 5.7846826726012076e-06, "loss": 0.428, "step": 3640 }, { "epoch": 2.5249653259361997, "grad_norm": 0.3575642988970886, "learning_rate": 5.7822918701420636e-06, "loss": 0.4404, "step": 3641 }, { "epoch": 2.5256588072122055, "grad_norm": 0.41001725877069756, "learning_rate": 5.779900884319372e-06, "loss": 0.3935, "step": 3642 }, { "epoch": 2.526352288488211, "grad_norm": 0.3740055435921111, "learning_rate": 5.777509715693562e-06, "loss": 0.4215, "step": 3643 }, { "epoch": 2.527045769764216, "grad_norm": 0.3964942778362928, "learning_rate": 5.775118364825107e-06, "loss": 0.4949, "step": 3644 }, { "epoch": 2.527739251040222, "grad_norm": 0.4343354336726223, "learning_rate": 5.772726832274519e-06, "loss": 0.4173, "step": 3645 }, { "epoch": 2.5284327323162277, "grad_norm": 0.3953240330347288, "learning_rate": 5.7703351186023575e-06, "loss": 0.4559, "step": 3646 }, { "epoch": 2.529126213592233, "grad_norm": 0.3437620302551727, "learning_rate": 5.767943224369224e-06, "loss": 0.4283, "step": 3647 }, { "epoch": 2.5298196948682383, "grad_norm": 0.3600388539742395, "learning_rate": 5.765551150135761e-06, "loss": 0.4244, "step": 3648 }, { "epoch": 2.530513176144244, "grad_norm": 0.378088768154225, "learning_rate": 5.763158896462653e-06, "loss": 0.4117, "step": 3649 }, { "epoch": 2.53120665742025, "grad_norm": 0.3871220823092367, "learning_rate": 5.760766463910624e-06, "loss": 0.4257, "step": 3650 }, { "epoch": 2.531900138696255, "grad_norm": 0.3975216481332579, "learning_rate": 5.758373853040447e-06, "loss": 0.4102, "step": 3651 }, { "epoch": 2.5325936199722605, "grad_norm": 0.3545417809303507, "learning_rate": 5.755981064412933e-06, "loss": 0.3845, "step": 3652 }, { "epoch": 2.5332871012482663, "grad_norm": 0.3963274566480634, "learning_rate": 5.753588098588931e-06, "loss": 0.4906, "step": 3653 }, { "epoch": 2.533980582524272, "grad_norm": 0.3966180419143601, "learning_rate": 5.751194956129337e-06, "loss": 0.3977, "step": 3654 }, { "epoch": 2.5346740638002774, "grad_norm": 0.33594603471503137, "learning_rate": 5.748801637595085e-06, "loss": 0.4099, "step": 3655 }, { "epoch": 2.5353675450762827, "grad_norm": 0.3748737960899183, "learning_rate": 5.746408143547153e-06, "loss": 0.4, "step": 3656 }, { "epoch": 2.5360610263522885, "grad_norm": 0.3624917302380124, "learning_rate": 5.7440144745465575e-06, "loss": 0.4139, "step": 3657 }, { "epoch": 2.5367545076282942, "grad_norm": 0.35178626204784935, "learning_rate": 5.7416206311543576e-06, "loss": 0.3942, "step": 3658 }, { "epoch": 2.5374479889042996, "grad_norm": 0.34888369461008567, "learning_rate": 5.739226613931652e-06, "loss": 0.4444, "step": 3659 }, { "epoch": 2.538141470180305, "grad_norm": 0.37680720361164716, "learning_rate": 5.736832423439583e-06, "loss": 0.4489, "step": 3660 }, { "epoch": 2.5388349514563107, "grad_norm": 0.3374700867181296, "learning_rate": 5.734438060239331e-06, "loss": 0.3839, "step": 3661 }, { "epoch": 2.5395284327323164, "grad_norm": 0.37486206247935777, "learning_rate": 5.732043524892115e-06, "loss": 0.4187, "step": 3662 }, { "epoch": 2.5402219140083218, "grad_norm": 0.3528700373676645, "learning_rate": 5.7296488179592e-06, "loss": 0.3598, "step": 3663 }, { "epoch": 2.540915395284327, "grad_norm": 0.3405786207476967, "learning_rate": 5.727253940001884e-06, "loss": 0.4334, "step": 3664 }, { "epoch": 2.541608876560333, "grad_norm": 0.37800274693889274, "learning_rate": 5.724858891581515e-06, "loss": 0.4946, "step": 3665 }, { "epoch": 2.5423023578363386, "grad_norm": 0.6256768377791588, "learning_rate": 5.722463673259469e-06, "loss": 0.4233, "step": 3666 }, { "epoch": 2.542995839112344, "grad_norm": 0.3709353796892423, "learning_rate": 5.7200682855971715e-06, "loss": 0.4505, "step": 3667 }, { "epoch": 2.5436893203883493, "grad_norm": 0.3869510608020292, "learning_rate": 5.717672729156082e-06, "loss": 0.4752, "step": 3668 }, { "epoch": 2.544382801664355, "grad_norm": 0.37218033040148146, "learning_rate": 5.715277004497702e-06, "loss": 0.4544, "step": 3669 }, { "epoch": 2.545076282940361, "grad_norm": 0.3533052851495621, "learning_rate": 5.712881112183575e-06, "loss": 0.4383, "step": 3670 }, { "epoch": 2.545769764216366, "grad_norm": 0.36007293429082987, "learning_rate": 5.710485052775275e-06, "loss": 0.42, "step": 3671 }, { "epoch": 2.5464632454923715, "grad_norm": 0.34545982114615587, "learning_rate": 5.708088826834426e-06, "loss": 0.407, "step": 3672 }, { "epoch": 2.5471567267683772, "grad_norm": 0.401933665753541, "learning_rate": 5.705692434922684e-06, "loss": 0.5003, "step": 3673 }, { "epoch": 2.547850208044383, "grad_norm": 0.3793201709540235, "learning_rate": 5.703295877601745e-06, "loss": 0.4904, "step": 3674 }, { "epoch": 2.5485436893203883, "grad_norm": 0.3471811469217267, "learning_rate": 5.700899155433347e-06, "loss": 0.4075, "step": 3675 }, { "epoch": 2.5492371705963937, "grad_norm": 0.3632752848689134, "learning_rate": 5.698502268979263e-06, "loss": 0.4053, "step": 3676 }, { "epoch": 2.5499306518723994, "grad_norm": 0.3611471748014215, "learning_rate": 5.6961052188013055e-06, "loss": 0.4524, "step": 3677 }, { "epoch": 2.550624133148405, "grad_norm": 0.34716061061819636, "learning_rate": 5.693708005461327e-06, "loss": 0.4568, "step": 3678 }, { "epoch": 2.5513176144244105, "grad_norm": 0.3651992123544262, "learning_rate": 5.691310629521215e-06, "loss": 0.4323, "step": 3679 }, { "epoch": 2.552011095700416, "grad_norm": 0.3700645595991681, "learning_rate": 5.688913091542899e-06, "loss": 0.4512, "step": 3680 }, { "epoch": 2.5527045769764216, "grad_norm": 0.40132072059824997, "learning_rate": 5.686515392088344e-06, "loss": 0.4173, "step": 3681 }, { "epoch": 2.5533980582524274, "grad_norm": 0.416106150662046, "learning_rate": 5.684117531719552e-06, "loss": 0.4643, "step": 3682 }, { "epoch": 2.5540915395284327, "grad_norm": 0.33317813353684955, "learning_rate": 5.681719510998565e-06, "loss": 0.3805, "step": 3683 }, { "epoch": 2.554785020804438, "grad_norm": 0.35243769236231487, "learning_rate": 5.6793213304874624e-06, "loss": 0.368, "step": 3684 }, { "epoch": 2.555478502080444, "grad_norm": 0.3344743557379927, "learning_rate": 5.67692299074836e-06, "loss": 0.3887, "step": 3685 }, { "epoch": 2.5561719833564496, "grad_norm": 0.3365237283325583, "learning_rate": 5.674524492343411e-06, "loss": 0.4553, "step": 3686 }, { "epoch": 2.556865464632455, "grad_norm": 0.3610517869545695, "learning_rate": 5.672125835834805e-06, "loss": 0.4153, "step": 3687 }, { "epoch": 2.5575589459084602, "grad_norm": 0.39418427644436477, "learning_rate": 5.669727021784772e-06, "loss": 0.4177, "step": 3688 }, { "epoch": 2.558252427184466, "grad_norm": 0.38356523634243245, "learning_rate": 5.667328050755576e-06, "loss": 0.4408, "step": 3689 }, { "epoch": 2.5589459084604718, "grad_norm": 0.37485358936035373, "learning_rate": 5.664928923309518e-06, "loss": 0.4429, "step": 3690 }, { "epoch": 2.559639389736477, "grad_norm": 0.36326370801839963, "learning_rate": 5.662529640008933e-06, "loss": 0.4442, "step": 3691 }, { "epoch": 2.5603328710124824, "grad_norm": 0.3588050611935666, "learning_rate": 5.660130201416203e-06, "loss": 0.4018, "step": 3692 }, { "epoch": 2.561026352288488, "grad_norm": 0.3808139714513288, "learning_rate": 5.657730608093732e-06, "loss": 0.4372, "step": 3693 }, { "epoch": 2.561719833564494, "grad_norm": 0.4158124410644996, "learning_rate": 5.655330860603971e-06, "loss": 0.4114, "step": 3694 }, { "epoch": 2.5624133148404993, "grad_norm": 0.3726410234351283, "learning_rate": 5.652930959509402e-06, "loss": 0.4471, "step": 3695 }, { "epoch": 2.5631067961165046, "grad_norm": 0.4089846857911151, "learning_rate": 5.650530905372545e-06, "loss": 0.4298, "step": 3696 }, { "epoch": 2.5638002773925104, "grad_norm": 0.38888347350128055, "learning_rate": 5.648130698755954e-06, "loss": 0.4317, "step": 3697 }, { "epoch": 2.564493758668516, "grad_norm": 0.4227994034076653, "learning_rate": 5.645730340222224e-06, "loss": 0.4903, "step": 3698 }, { "epoch": 2.5651872399445215, "grad_norm": 0.3711593143868576, "learning_rate": 5.6433298303339764e-06, "loss": 0.4298, "step": 3699 }, { "epoch": 2.565880721220527, "grad_norm": 0.37733931107737134, "learning_rate": 5.640929169653876e-06, "loss": 0.4166, "step": 3700 }, { "epoch": 2.5665742024965326, "grad_norm": 0.35997706656355766, "learning_rate": 5.638528358744621e-06, "loss": 0.3856, "step": 3701 }, { "epoch": 2.5672676837725383, "grad_norm": 0.37300285393631594, "learning_rate": 5.636127398168942e-06, "loss": 0.4392, "step": 3702 }, { "epoch": 2.5679611650485437, "grad_norm": 0.36269776826786154, "learning_rate": 5.633726288489609e-06, "loss": 0.4053, "step": 3703 }, { "epoch": 2.568654646324549, "grad_norm": 0.43047705361189964, "learning_rate": 5.631325030269422e-06, "loss": 0.4553, "step": 3704 }, { "epoch": 2.5693481276005548, "grad_norm": 0.38034380170016574, "learning_rate": 5.628923624071222e-06, "loss": 0.4124, "step": 3705 }, { "epoch": 2.5700416088765605, "grad_norm": 0.3866072263745971, "learning_rate": 5.626522070457879e-06, "loss": 0.3747, "step": 3706 }, { "epoch": 2.570735090152566, "grad_norm": 0.3873848514008934, "learning_rate": 5.6241203699923e-06, "loss": 0.4257, "step": 3707 }, { "epoch": 2.571428571428571, "grad_norm": 0.41228222972864187, "learning_rate": 5.621718523237427e-06, "loss": 0.4273, "step": 3708 }, { "epoch": 2.572122052704577, "grad_norm": 0.3971077971750073, "learning_rate": 5.619316530756234e-06, "loss": 0.3981, "step": 3709 }, { "epoch": 2.5728155339805827, "grad_norm": 0.381797077329742, "learning_rate": 5.616914393111732e-06, "loss": 0.4329, "step": 3710 }, { "epoch": 2.573509015256588, "grad_norm": 0.3660546560833639, "learning_rate": 5.614512110866963e-06, "loss": 0.3804, "step": 3711 }, { "epoch": 2.5742024965325934, "grad_norm": 0.39255317148981794, "learning_rate": 5.612109684585007e-06, "loss": 0.4257, "step": 3712 }, { "epoch": 2.574895977808599, "grad_norm": 0.3773999077983148, "learning_rate": 5.6097071148289725e-06, "loss": 0.4213, "step": 3713 }, { "epoch": 2.575589459084605, "grad_norm": 0.3768246769612683, "learning_rate": 5.607304402162008e-06, "loss": 0.4416, "step": 3714 }, { "epoch": 2.5762829403606102, "grad_norm": 0.36375337821157094, "learning_rate": 5.604901547147289e-06, "loss": 0.41, "step": 3715 }, { "epoch": 2.5769764216366156, "grad_norm": 0.360967608892819, "learning_rate": 5.602498550348028e-06, "loss": 0.4571, "step": 3716 }, { "epoch": 2.5776699029126213, "grad_norm": 0.3411486721155386, "learning_rate": 5.600095412327471e-06, "loss": 0.3802, "step": 3717 }, { "epoch": 2.578363384188627, "grad_norm": 0.39022463140534003, "learning_rate": 5.597692133648894e-06, "loss": 0.4289, "step": 3718 }, { "epoch": 2.5790568654646324, "grad_norm": 0.37037153721869426, "learning_rate": 5.595288714875612e-06, "loss": 0.3758, "step": 3719 }, { "epoch": 2.5797503467406377, "grad_norm": 0.35030185660699426, "learning_rate": 5.592885156570964e-06, "loss": 0.458, "step": 3720 }, { "epoch": 2.5804438280166435, "grad_norm": 0.35810883640075986, "learning_rate": 5.590481459298332e-06, "loss": 0.454, "step": 3721 }, { "epoch": 2.5811373092926493, "grad_norm": 0.3449845728933635, "learning_rate": 5.588077623621119e-06, "loss": 0.4534, "step": 3722 }, { "epoch": 2.5818307905686546, "grad_norm": 0.33558412575658775, "learning_rate": 5.585673650102772e-06, "loss": 0.406, "step": 3723 }, { "epoch": 2.58252427184466, "grad_norm": 0.35776352115379173, "learning_rate": 5.583269539306762e-06, "loss": 0.4254, "step": 3724 }, { "epoch": 2.5832177531206657, "grad_norm": 0.3671552044271029, "learning_rate": 5.580865291796598e-06, "loss": 0.4048, "step": 3725 }, { "epoch": 2.5839112343966715, "grad_norm": 0.3441193519045531, "learning_rate": 5.578460908135815e-06, "loss": 0.4394, "step": 3726 }, { "epoch": 2.584604715672677, "grad_norm": 0.42984264415314943, "learning_rate": 5.576056388887985e-06, "loss": 0.4469, "step": 3727 }, { "epoch": 2.585298196948682, "grad_norm": 0.3770052828728155, "learning_rate": 5.57365173461671e-06, "loss": 0.4379, "step": 3728 }, { "epoch": 2.585991678224688, "grad_norm": 0.35548290003551913, "learning_rate": 5.5712469458856226e-06, "loss": 0.4313, "step": 3729 }, { "epoch": 2.5866851595006937, "grad_norm": 0.38674569287024546, "learning_rate": 5.568842023258389e-06, "loss": 0.439, "step": 3730 }, { "epoch": 2.587378640776699, "grad_norm": 0.40608562893752964, "learning_rate": 5.5664369672987025e-06, "loss": 0.4199, "step": 3731 }, { "epoch": 2.5880721220527043, "grad_norm": 0.3967768646832322, "learning_rate": 5.564031778570293e-06, "loss": 0.4563, "step": 3732 }, { "epoch": 2.58876560332871, "grad_norm": 0.6433857519437939, "learning_rate": 5.561626457636923e-06, "loss": 0.419, "step": 3733 }, { "epoch": 2.589459084604716, "grad_norm": 0.3345315937284391, "learning_rate": 5.559221005062377e-06, "loss": 0.4125, "step": 3734 }, { "epoch": 2.590152565880721, "grad_norm": 0.36153949866576096, "learning_rate": 5.556815421410479e-06, "loss": 0.3792, "step": 3735 }, { "epoch": 2.5908460471567265, "grad_norm": 0.3656600591312674, "learning_rate": 5.554409707245076e-06, "loss": 0.3912, "step": 3736 }, { "epoch": 2.5915395284327323, "grad_norm": 0.34484330304303534, "learning_rate": 5.552003863130053e-06, "loss": 0.4442, "step": 3737 }, { "epoch": 2.592233009708738, "grad_norm": 0.3951634673635669, "learning_rate": 5.549597889629325e-06, "loss": 0.4141, "step": 3738 }, { "epoch": 2.5929264909847434, "grad_norm": 0.34791603379098446, "learning_rate": 5.54719178730683e-06, "loss": 0.4432, "step": 3739 }, { "epoch": 2.5936199722607487, "grad_norm": 0.4019635224982042, "learning_rate": 5.544785556726544e-06, "loss": 0.4255, "step": 3740 }, { "epoch": 2.5943134535367545, "grad_norm": 0.3527970462135893, "learning_rate": 5.542379198452468e-06, "loss": 0.4089, "step": 3741 }, { "epoch": 2.5950069348127602, "grad_norm": 0.37691743428061764, "learning_rate": 5.5399727130486365e-06, "loss": 0.432, "step": 3742 }, { "epoch": 2.5957004160887656, "grad_norm": 0.3596247988540151, "learning_rate": 5.537566101079113e-06, "loss": 0.4501, "step": 3743 }, { "epoch": 2.596393897364771, "grad_norm": 0.3729150713327292, "learning_rate": 5.535159363107986e-06, "loss": 0.4464, "step": 3744 }, { "epoch": 2.5970873786407767, "grad_norm": 0.3860713401631495, "learning_rate": 5.532752499699381e-06, "loss": 0.4373, "step": 3745 }, { "epoch": 2.5977808599167824, "grad_norm": 0.3716020769185054, "learning_rate": 5.53034551141745e-06, "loss": 0.3894, "step": 3746 }, { "epoch": 2.5984743411927878, "grad_norm": 0.45183884790407536, "learning_rate": 5.527938398826371e-06, "loss": 0.4719, "step": 3747 }, { "epoch": 2.599167822468793, "grad_norm": 0.33219206342278024, "learning_rate": 5.525531162490354e-06, "loss": 0.3978, "step": 3748 }, { "epoch": 2.599861303744799, "grad_norm": 0.3504710087195076, "learning_rate": 5.523123802973639e-06, "loss": 0.4453, "step": 3749 }, { "epoch": 2.6005547850208046, "grad_norm": 0.37587629823776186, "learning_rate": 5.520716320840495e-06, "loss": 0.3941, "step": 3750 }, { "epoch": 2.60124826629681, "grad_norm": 0.6691127730153275, "learning_rate": 5.518308716655216e-06, "loss": 0.3909, "step": 3751 }, { "epoch": 2.6019417475728153, "grad_norm": 0.366401235771044, "learning_rate": 5.515900990982125e-06, "loss": 0.4781, "step": 3752 }, { "epoch": 2.602635228848821, "grad_norm": 0.3688956077443146, "learning_rate": 5.51349314438558e-06, "loss": 0.4083, "step": 3753 }, { "epoch": 2.603328710124827, "grad_norm": 0.35856846365208345, "learning_rate": 5.511085177429961e-06, "loss": 0.4293, "step": 3754 }, { "epoch": 2.604022191400832, "grad_norm": 0.3751803405536985, "learning_rate": 5.508677090679678e-06, "loss": 0.4318, "step": 3755 }, { "epoch": 2.6047156726768375, "grad_norm": 0.36795464504698244, "learning_rate": 5.5062688846991684e-06, "loss": 0.4388, "step": 3756 }, { "epoch": 2.6054091539528432, "grad_norm": 0.3297011393946642, "learning_rate": 5.503860560052898e-06, "loss": 0.4121, "step": 3757 }, { "epoch": 2.606102635228849, "grad_norm": 0.3498062100571866, "learning_rate": 5.501452117305363e-06, "loss": 0.4351, "step": 3758 }, { "epoch": 2.6067961165048543, "grad_norm": 0.3759610636124447, "learning_rate": 5.499043557021083e-06, "loss": 0.4001, "step": 3759 }, { "epoch": 2.6074895977808596, "grad_norm": 0.361913735062871, "learning_rate": 5.496634879764607e-06, "loss": 0.4162, "step": 3760 }, { "epoch": 2.6081830790568654, "grad_norm": 0.60258597968192, "learning_rate": 5.494226086100513e-06, "loss": 0.4537, "step": 3761 }, { "epoch": 2.608876560332871, "grad_norm": 0.37847753758606156, "learning_rate": 5.491817176593402e-06, "loss": 0.4678, "step": 3762 }, { "epoch": 2.6095700416088765, "grad_norm": 0.3625587244406342, "learning_rate": 5.489408151807908e-06, "loss": 0.4089, "step": 3763 }, { "epoch": 2.610263522884882, "grad_norm": 0.4700096526748301, "learning_rate": 5.486999012308688e-06, "loss": 0.4449, "step": 3764 }, { "epoch": 2.6109570041608876, "grad_norm": 0.40812257849765043, "learning_rate": 5.484589758660426e-06, "loss": 0.4594, "step": 3765 }, { "epoch": 2.6116504854368934, "grad_norm": 0.34209366412052367, "learning_rate": 5.482180391427834e-06, "loss": 0.3939, "step": 3766 }, { "epoch": 2.6123439667128987, "grad_norm": 0.3800270302095452, "learning_rate": 5.479770911175649e-06, "loss": 0.4419, "step": 3767 }, { "epoch": 2.613037447988904, "grad_norm": 0.35056882827820857, "learning_rate": 5.4773613184686395e-06, "loss": 0.4048, "step": 3768 }, { "epoch": 2.61373092926491, "grad_norm": 0.34360538237402993, "learning_rate": 5.474951613871593e-06, "loss": 0.3936, "step": 3769 }, { "epoch": 2.6144244105409156, "grad_norm": 0.3568445867350128, "learning_rate": 5.472541797949329e-06, "loss": 0.4234, "step": 3770 }, { "epoch": 2.615117891816921, "grad_norm": 0.3870140610785522, "learning_rate": 5.470131871266687e-06, "loss": 0.4686, "step": 3771 }, { "epoch": 2.615811373092926, "grad_norm": 0.3850573844208301, "learning_rate": 5.467721834388543e-06, "loss": 0.4577, "step": 3772 }, { "epoch": 2.616504854368932, "grad_norm": 0.361784609782144, "learning_rate": 5.465311687879785e-06, "loss": 0.44, "step": 3773 }, { "epoch": 2.6171983356449378, "grad_norm": 0.3682537385488214, "learning_rate": 5.46290143230534e-06, "loss": 0.4247, "step": 3774 }, { "epoch": 2.617891816920943, "grad_norm": 0.37963767194274, "learning_rate": 5.460491068230151e-06, "loss": 0.4396, "step": 3775 }, { "epoch": 2.6185852981969484, "grad_norm": 0.3635303131208463, "learning_rate": 5.45808059621919e-06, "loss": 0.3716, "step": 3776 }, { "epoch": 2.619278779472954, "grad_norm": 0.3518733468155541, "learning_rate": 5.4556700168374545e-06, "loss": 0.4338, "step": 3777 }, { "epoch": 2.61997226074896, "grad_norm": 0.3618489564081225, "learning_rate": 5.453259330649968e-06, "loss": 0.4228, "step": 3778 }, { "epoch": 2.6206657420249653, "grad_norm": 0.3787554840616143, "learning_rate": 5.450848538221778e-06, "loss": 0.4473, "step": 3779 }, { "epoch": 2.6213592233009706, "grad_norm": 0.4644117103205756, "learning_rate": 5.448437640117954e-06, "loss": 0.4532, "step": 3780 }, { "epoch": 2.6220527045769764, "grad_norm": 0.3886094686504835, "learning_rate": 5.446026636903597e-06, "loss": 0.4161, "step": 3781 }, { "epoch": 2.622746185852982, "grad_norm": 0.3534053504637415, "learning_rate": 5.443615529143824e-06, "loss": 0.4568, "step": 3782 }, { "epoch": 2.6234396671289875, "grad_norm": 0.42493276447793155, "learning_rate": 5.441204317403786e-06, "loss": 0.4519, "step": 3783 }, { "epoch": 2.624133148404993, "grad_norm": 0.36455551842590017, "learning_rate": 5.43879300224865e-06, "loss": 0.4692, "step": 3784 }, { "epoch": 2.6248266296809986, "grad_norm": 0.36011237410953445, "learning_rate": 5.436381584243612e-06, "loss": 0.4225, "step": 3785 }, { "epoch": 2.6255201109570043, "grad_norm": 0.3334683283562822, "learning_rate": 5.4339700639538916e-06, "loss": 0.4399, "step": 3786 }, { "epoch": 2.6262135922330097, "grad_norm": 0.4146399380578723, "learning_rate": 5.431558441944731e-06, "loss": 0.447, "step": 3787 }, { "epoch": 2.6269070735090154, "grad_norm": 0.4636544598493252, "learning_rate": 5.429146718781399e-06, "loss": 0.4003, "step": 3788 }, { "epoch": 2.6276005547850207, "grad_norm": 0.37412138567330283, "learning_rate": 5.426734895029181e-06, "loss": 0.4417, "step": 3789 }, { "epoch": 2.6282940360610265, "grad_norm": 0.3722010929181449, "learning_rate": 5.424322971253395e-06, "loss": 0.4565, "step": 3790 }, { "epoch": 2.628987517337032, "grad_norm": 0.4112008967241748, "learning_rate": 5.4219109480193785e-06, "loss": 0.4685, "step": 3791 }, { "epoch": 2.6296809986130376, "grad_norm": 0.37991973696376485, "learning_rate": 5.419498825892492e-06, "loss": 0.4443, "step": 3792 }, { "epoch": 2.630374479889043, "grad_norm": 0.3991715151825618, "learning_rate": 5.417086605438117e-06, "loss": 0.4896, "step": 3793 }, { "epoch": 2.6310679611650487, "grad_norm": 0.3647426870348548, "learning_rate": 5.414674287221663e-06, "loss": 0.4505, "step": 3794 }, { "epoch": 2.631761442441054, "grad_norm": 0.3845405016986538, "learning_rate": 5.412261871808559e-06, "loss": 0.4155, "step": 3795 }, { "epoch": 2.63245492371706, "grad_norm": 0.3631268322169773, "learning_rate": 5.4098493597642595e-06, "loss": 0.4128, "step": 3796 }, { "epoch": 2.633148404993065, "grad_norm": 0.7856996704831801, "learning_rate": 5.407436751654238e-06, "loss": 0.4495, "step": 3797 }, { "epoch": 2.633841886269071, "grad_norm": 0.4233322253719357, "learning_rate": 5.4050240480439906e-06, "loss": 0.4176, "step": 3798 }, { "epoch": 2.6345353675450762, "grad_norm": 0.38586780141610955, "learning_rate": 5.402611249499042e-06, "loss": 0.4427, "step": 3799 }, { "epoch": 2.635228848821082, "grad_norm": 0.3778446149227224, "learning_rate": 5.400198356584932e-06, "loss": 0.4388, "step": 3800 }, { "epoch": 2.6359223300970873, "grad_norm": 0.36889278078010745, "learning_rate": 5.397785369867227e-06, "loss": 0.4256, "step": 3801 }, { "epoch": 2.636615811373093, "grad_norm": 0.3465106412138196, "learning_rate": 5.395372289911509e-06, "loss": 0.4158, "step": 3802 }, { "epoch": 2.6373092926490984, "grad_norm": 0.376075046648369, "learning_rate": 5.392959117283391e-06, "loss": 0.4359, "step": 3803 }, { "epoch": 2.638002773925104, "grad_norm": 0.3903036427794065, "learning_rate": 5.390545852548502e-06, "loss": 0.4346, "step": 3804 }, { "epoch": 2.6386962552011095, "grad_norm": 0.3293396104525081, "learning_rate": 5.388132496272493e-06, "loss": 0.3703, "step": 3805 }, { "epoch": 2.6393897364771153, "grad_norm": 0.42174816417885386, "learning_rate": 5.3857190490210385e-06, "loss": 0.375, "step": 3806 }, { "epoch": 2.6400832177531206, "grad_norm": 0.4618785646157206, "learning_rate": 5.383305511359832e-06, "loss": 0.4177, "step": 3807 }, { "epoch": 2.6407766990291264, "grad_norm": 0.3694977352082934, "learning_rate": 5.380891883854591e-06, "loss": 0.4109, "step": 3808 }, { "epoch": 2.6414701803051317, "grad_norm": 0.3592035568808262, "learning_rate": 5.3784781670710495e-06, "loss": 0.4085, "step": 3809 }, { "epoch": 2.6421636615811375, "grad_norm": 0.43947745945284844, "learning_rate": 5.3760643615749675e-06, "loss": 0.4321, "step": 3810 }, { "epoch": 2.642857142857143, "grad_norm": 0.32092024034246674, "learning_rate": 5.373650467932122e-06, "loss": 0.3822, "step": 3811 }, { "epoch": 2.6435506241331486, "grad_norm": 0.35861530565497973, "learning_rate": 5.3712364867083134e-06, "loss": 0.394, "step": 3812 }, { "epoch": 2.644244105409154, "grad_norm": 0.378026435324307, "learning_rate": 5.368822418469361e-06, "loss": 0.4177, "step": 3813 }, { "epoch": 2.6449375866851597, "grad_norm": 0.3584373070708219, "learning_rate": 5.366408263781104e-06, "loss": 0.375, "step": 3814 }, { "epoch": 2.645631067961165, "grad_norm": 0.41986316255367523, "learning_rate": 5.363994023209404e-06, "loss": 0.4616, "step": 3815 }, { "epoch": 2.6463245492371708, "grad_norm": 0.3451241161415589, "learning_rate": 5.361579697320142e-06, "loss": 0.4281, "step": 3816 }, { "epoch": 2.647018030513176, "grad_norm": 1.4369867788114814, "learning_rate": 5.359165286679218e-06, "loss": 0.3929, "step": 3817 }, { "epoch": 2.647711511789182, "grad_norm": 0.3895089837798235, "learning_rate": 5.35675079185255e-06, "loss": 0.4415, "step": 3818 }, { "epoch": 2.648404993065187, "grad_norm": 0.3626656054242166, "learning_rate": 5.354336213406082e-06, "loss": 0.4074, "step": 3819 }, { "epoch": 2.649098474341193, "grad_norm": 0.37542529805220165, "learning_rate": 5.351921551905771e-06, "loss": 0.4433, "step": 3820 }, { "epoch": 2.6497919556171983, "grad_norm": 0.36070413357304115, "learning_rate": 5.349506807917596e-06, "loss": 0.4098, "step": 3821 }, { "epoch": 2.650485436893204, "grad_norm": 0.3708055315327396, "learning_rate": 5.347091982007557e-06, "loss": 0.4061, "step": 3822 }, { "epoch": 2.6511789181692094, "grad_norm": 0.35116147704308043, "learning_rate": 5.344677074741672e-06, "loss": 0.4528, "step": 3823 }, { "epoch": 2.651872399445215, "grad_norm": 0.36822519637537554, "learning_rate": 5.342262086685978e-06, "loss": 0.4577, "step": 3824 }, { "epoch": 2.6525658807212205, "grad_norm": 0.33138172961978474, "learning_rate": 5.339847018406528e-06, "loss": 0.4056, "step": 3825 }, { "epoch": 2.6532593619972262, "grad_norm": 0.3952774135535713, "learning_rate": 5.337431870469398e-06, "loss": 0.4416, "step": 3826 }, { "epoch": 2.6539528432732316, "grad_norm": 0.37808939390975305, "learning_rate": 5.335016643440682e-06, "loss": 0.4733, "step": 3827 }, { "epoch": 2.6546463245492373, "grad_norm": 0.3883742534975216, "learning_rate": 5.332601337886491e-06, "loss": 0.4753, "step": 3828 }, { "epoch": 2.6553398058252426, "grad_norm": 0.3658034489543328, "learning_rate": 5.330185954372955e-06, "loss": 0.4091, "step": 3829 }, { "epoch": 2.6560332871012484, "grad_norm": 0.352242607905625, "learning_rate": 5.327770493466222e-06, "loss": 0.4351, "step": 3830 }, { "epoch": 2.6567267683772537, "grad_norm": 0.38755288958224077, "learning_rate": 5.325354955732459e-06, "loss": 0.4242, "step": 3831 }, { "epoch": 2.6574202496532595, "grad_norm": 0.4128883676447061, "learning_rate": 5.322939341737853e-06, "loss": 0.4599, "step": 3832 }, { "epoch": 2.658113730929265, "grad_norm": 0.3446035011512247, "learning_rate": 5.320523652048603e-06, "loss": 0.3813, "step": 3833 }, { "epoch": 2.6588072122052706, "grad_norm": 0.44146869732628513, "learning_rate": 5.318107887230929e-06, "loss": 0.4327, "step": 3834 }, { "epoch": 2.659500693481276, "grad_norm": 0.382938696230218, "learning_rate": 5.31569204785107e-06, "loss": 0.4394, "step": 3835 }, { "epoch": 2.6601941747572817, "grad_norm": 0.30243261300851093, "learning_rate": 5.3132761344752825e-06, "loss": 0.3379, "step": 3836 }, { "epoch": 2.660887656033287, "grad_norm": 0.35189179104860097, "learning_rate": 5.3108601476698385e-06, "loss": 0.4075, "step": 3837 }, { "epoch": 2.661581137309293, "grad_norm": 0.35557402150217593, "learning_rate": 5.308444088001027e-06, "loss": 0.4013, "step": 3838 }, { "epoch": 2.662274618585298, "grad_norm": 0.3751145373263566, "learning_rate": 5.3060279560351534e-06, "loss": 0.3767, "step": 3839 }, { "epoch": 2.662968099861304, "grad_norm": 0.40124517323670383, "learning_rate": 5.303611752338545e-06, "loss": 0.4441, "step": 3840 }, { "epoch": 2.663661581137309, "grad_norm": 0.37029711674283333, "learning_rate": 5.301195477477541e-06, "loss": 0.4523, "step": 3841 }, { "epoch": 2.664355062413315, "grad_norm": 0.3457799414696006, "learning_rate": 5.298779132018498e-06, "loss": 0.3814, "step": 3842 }, { "epoch": 2.6650485436893203, "grad_norm": 0.4467982075906699, "learning_rate": 5.2963627165277884e-06, "loss": 0.5013, "step": 3843 }, { "epoch": 2.665742024965326, "grad_norm": 0.3612903049099139, "learning_rate": 5.293946231571806e-06, "loss": 0.4286, "step": 3844 }, { "epoch": 2.6664355062413314, "grad_norm": 0.42096380664232824, "learning_rate": 5.291529677716957e-06, "loss": 0.4524, "step": 3845 }, { "epoch": 2.667128987517337, "grad_norm": 0.37842426275360985, "learning_rate": 5.289113055529662e-06, "loss": 0.4953, "step": 3846 }, { "epoch": 2.6678224687933425, "grad_norm": 0.4527863340602083, "learning_rate": 5.2866963655763585e-06, "loss": 0.4964, "step": 3847 }, { "epoch": 2.6685159500693483, "grad_norm": 0.41038815945225465, "learning_rate": 5.2842796084235056e-06, "loss": 0.4955, "step": 3848 }, { "epoch": 2.6692094313453536, "grad_norm": 0.6243938111017512, "learning_rate": 5.281862784637572e-06, "loss": 0.4459, "step": 3849 }, { "epoch": 2.6699029126213594, "grad_norm": 0.48529753597167835, "learning_rate": 5.279445894785042e-06, "loss": 0.4218, "step": 3850 }, { "epoch": 2.6705963938973647, "grad_norm": 0.39357500392050937, "learning_rate": 5.277028939432417e-06, "loss": 0.4677, "step": 3851 }, { "epoch": 2.6712898751733705, "grad_norm": 0.4158515313337612, "learning_rate": 5.274611919146216e-06, "loss": 0.4871, "step": 3852 }, { "epoch": 2.671983356449376, "grad_norm": 0.3648318014578259, "learning_rate": 5.27219483449297e-06, "loss": 0.4444, "step": 3853 }, { "epoch": 2.6726768377253816, "grad_norm": 0.39245061080790583, "learning_rate": 5.269777686039226e-06, "loss": 0.4346, "step": 3854 }, { "epoch": 2.673370319001387, "grad_norm": 0.37761298304059504, "learning_rate": 5.267360474351546e-06, "loss": 0.51, "step": 3855 }, { "epoch": 2.6740638002773927, "grad_norm": 0.3276778860246595, "learning_rate": 5.264943199996506e-06, "loss": 0.3985, "step": 3856 }, { "epoch": 2.674757281553398, "grad_norm": 0.400475805899134, "learning_rate": 5.2625258635407004e-06, "loss": 0.4936, "step": 3857 }, { "epoch": 2.6754507628294038, "grad_norm": 0.38231088685546327, "learning_rate": 5.2601084655507336e-06, "loss": 0.4338, "step": 3858 }, { "epoch": 2.676144244105409, "grad_norm": 0.3565492326228381, "learning_rate": 5.2576910065932266e-06, "loss": 0.4004, "step": 3859 }, { "epoch": 2.676837725381415, "grad_norm": 0.3823889598181103, "learning_rate": 5.255273487234813e-06, "loss": 0.4451, "step": 3860 }, { "epoch": 2.67753120665742, "grad_norm": 0.353468165004134, "learning_rate": 5.252855908042142e-06, "loss": 0.4164, "step": 3861 }, { "epoch": 2.678224687933426, "grad_norm": 0.3372804452217276, "learning_rate": 5.25043826958188e-06, "loss": 0.4089, "step": 3862 }, { "epoch": 2.6789181692094313, "grad_norm": 0.5830089257063927, "learning_rate": 5.248020572420699e-06, "loss": 0.4095, "step": 3863 }, { "epoch": 2.679611650485437, "grad_norm": 0.4387228300901809, "learning_rate": 5.245602817125294e-06, "loss": 0.4846, "step": 3864 }, { "epoch": 2.6803051317614424, "grad_norm": 0.444289226140612, "learning_rate": 5.243185004262365e-06, "loss": 0.3794, "step": 3865 }, { "epoch": 2.680998613037448, "grad_norm": 0.33049029406589236, "learning_rate": 5.240767134398634e-06, "loss": 0.3988, "step": 3866 }, { "epoch": 2.6816920943134535, "grad_norm": 0.41979607630867255, "learning_rate": 5.238349208100832e-06, "loss": 0.4828, "step": 3867 }, { "epoch": 2.6823855755894592, "grad_norm": 0.3847292273392992, "learning_rate": 5.235931225935699e-06, "loss": 0.4333, "step": 3868 }, { "epoch": 2.6830790568654646, "grad_norm": 0.36875801078235587, "learning_rate": 5.2335131884699965e-06, "loss": 0.4368, "step": 3869 }, { "epoch": 2.6837725381414703, "grad_norm": 0.3735920475953635, "learning_rate": 5.231095096270493e-06, "loss": 0.3928, "step": 3870 }, { "epoch": 2.6844660194174756, "grad_norm": 0.4401761489296933, "learning_rate": 5.228676949903974e-06, "loss": 0.4658, "step": 3871 }, { "epoch": 2.6851595006934814, "grad_norm": 0.40798354428187616, "learning_rate": 5.226258749937232e-06, "loss": 0.4463, "step": 3872 }, { "epoch": 2.6858529819694867, "grad_norm": 0.36124373823914524, "learning_rate": 5.2238404969370795e-06, "loss": 0.3917, "step": 3873 }, { "epoch": 2.6865464632454925, "grad_norm": 0.39546396662256333, "learning_rate": 5.221422191470335e-06, "loss": 0.4414, "step": 3874 }, { "epoch": 2.687239944521498, "grad_norm": 0.35744152792064293, "learning_rate": 5.2190038341038315e-06, "loss": 0.3899, "step": 3875 }, { "epoch": 2.6879334257975036, "grad_norm": 0.3781062902299069, "learning_rate": 5.216585425404417e-06, "loss": 0.4787, "step": 3876 }, { "epoch": 2.688626907073509, "grad_norm": 0.4247252277253531, "learning_rate": 5.214166965938947e-06, "loss": 0.402, "step": 3877 }, { "epoch": 2.6893203883495147, "grad_norm": 0.35503088527907695, "learning_rate": 5.211748456274291e-06, "loss": 0.356, "step": 3878 }, { "epoch": 2.69001386962552, "grad_norm": 0.3785257962903678, "learning_rate": 5.20932989697733e-06, "loss": 0.434, "step": 3879 }, { "epoch": 2.690707350901526, "grad_norm": 0.39241219128103705, "learning_rate": 5.2069112886149564e-06, "loss": 0.478, "step": 3880 }, { "epoch": 2.691400832177531, "grad_norm": 0.33931724990554285, "learning_rate": 5.204492631754078e-06, "loss": 0.4215, "step": 3881 }, { "epoch": 2.692094313453537, "grad_norm": 1.4503540627682872, "learning_rate": 5.202073926961606e-06, "loss": 0.4134, "step": 3882 }, { "epoch": 2.692787794729542, "grad_norm": 0.37210848766355686, "learning_rate": 5.1996551748044685e-06, "loss": 0.4377, "step": 3883 }, { "epoch": 2.693481276005548, "grad_norm": 0.37487556806381966, "learning_rate": 5.197236375849604e-06, "loss": 0.3642, "step": 3884 }, { "epoch": 2.6941747572815533, "grad_norm": 0.3511259028253951, "learning_rate": 5.1948175306639625e-06, "loss": 0.4125, "step": 3885 }, { "epoch": 2.694868238557559, "grad_norm": 0.3728669647776683, "learning_rate": 5.192398639814503e-06, "loss": 0.4129, "step": 3886 }, { "epoch": 2.6955617198335644, "grad_norm": 0.37291981358264475, "learning_rate": 5.189979703868195e-06, "loss": 0.4068, "step": 3887 }, { "epoch": 2.69625520110957, "grad_norm": 0.3829356302509595, "learning_rate": 5.187560723392019e-06, "loss": 0.4089, "step": 3888 }, { "epoch": 2.6969486823855755, "grad_norm": 0.3747899019215926, "learning_rate": 5.1851416989529705e-06, "loss": 0.4109, "step": 3889 }, { "epoch": 2.6976421636615813, "grad_norm": 0.36963536144636167, "learning_rate": 5.182722631118048e-06, "loss": 0.429, "step": 3890 }, { "epoch": 2.6983356449375866, "grad_norm": 0.35026408797808406, "learning_rate": 5.180303520454263e-06, "loss": 0.3941, "step": 3891 }, { "epoch": 2.6990291262135924, "grad_norm": 0.37951603060911604, "learning_rate": 5.177884367528637e-06, "loss": 0.4464, "step": 3892 }, { "epoch": 2.6997226074895977, "grad_norm": 0.36419724131904624, "learning_rate": 5.1754651729082075e-06, "loss": 0.432, "step": 3893 }, { "epoch": 2.7004160887656035, "grad_norm": 0.33490403441036426, "learning_rate": 5.173045937160011e-06, "loss": 0.4361, "step": 3894 }, { "epoch": 2.701109570041609, "grad_norm": 0.38031713587647936, "learning_rate": 5.170626660851099e-06, "loss": 0.3977, "step": 3895 }, { "epoch": 2.7018030513176146, "grad_norm": 0.40244117903258303, "learning_rate": 5.168207344548534e-06, "loss": 0.4319, "step": 3896 }, { "epoch": 2.70249653259362, "grad_norm": 0.3762204461756013, "learning_rate": 5.165787988819384e-06, "loss": 0.4057, "step": 3897 }, { "epoch": 2.7031900138696257, "grad_norm": 0.3931808032627882, "learning_rate": 5.163368594230732e-06, "loss": 0.4209, "step": 3898 }, { "epoch": 2.703883495145631, "grad_norm": 0.3350320336599286, "learning_rate": 5.160949161349665e-06, "loss": 0.3606, "step": 3899 }, { "epoch": 2.7045769764216367, "grad_norm": 0.36981453923450963, "learning_rate": 5.158529690743279e-06, "loss": 0.4232, "step": 3900 }, { "epoch": 2.705270457697642, "grad_norm": 0.40037405326480896, "learning_rate": 5.156110182978682e-06, "loss": 0.4781, "step": 3901 }, { "epoch": 2.705963938973648, "grad_norm": 0.35507189122059263, "learning_rate": 5.153690638622989e-06, "loss": 0.39, "step": 3902 }, { "epoch": 2.706657420249653, "grad_norm": 0.3679326862552085, "learning_rate": 5.1512710582433246e-06, "loss": 0.434, "step": 3903 }, { "epoch": 2.707350901525659, "grad_norm": 0.3617900153838919, "learning_rate": 5.148851442406817e-06, "loss": 0.4125, "step": 3904 }, { "epoch": 2.7080443828016643, "grad_norm": 0.4024626544605178, "learning_rate": 5.1464317916806115e-06, "loss": 0.4068, "step": 3905 }, { "epoch": 2.70873786407767, "grad_norm": 0.3758695499541493, "learning_rate": 5.1440121066318526e-06, "loss": 0.4396, "step": 3906 }, { "epoch": 2.7094313453536754, "grad_norm": 0.3724970119974219, "learning_rate": 5.141592387827701e-06, "loss": 0.4185, "step": 3907 }, { "epoch": 2.710124826629681, "grad_norm": 0.4051626400875616, "learning_rate": 5.1391726358353174e-06, "loss": 0.3857, "step": 3908 }, { "epoch": 2.7108183079056865, "grad_norm": 0.38310603850455355, "learning_rate": 5.136752851221878e-06, "loss": 0.4384, "step": 3909 }, { "epoch": 2.7115117891816922, "grad_norm": 0.3708684290872625, "learning_rate": 5.134333034554559e-06, "loss": 0.4449, "step": 3910 }, { "epoch": 2.7122052704576975, "grad_norm": 0.3853142008963356, "learning_rate": 5.13191318640055e-06, "loss": 0.4846, "step": 3911 }, { "epoch": 2.7128987517337033, "grad_norm": 0.35251127257771286, "learning_rate": 5.1294933073270455e-06, "loss": 0.4171, "step": 3912 }, { "epoch": 2.7135922330097086, "grad_norm": 0.39257004227116815, "learning_rate": 5.127073397901248e-06, "loss": 0.494, "step": 3913 }, { "epoch": 2.7142857142857144, "grad_norm": 0.365390730712355, "learning_rate": 5.1246534586903655e-06, "loss": 0.4257, "step": 3914 }, { "epoch": 2.7149791955617197, "grad_norm": 0.34865437840966107, "learning_rate": 5.122233490261615e-06, "loss": 0.4062, "step": 3915 }, { "epoch": 2.7156726768377255, "grad_norm": 0.4707393739876894, "learning_rate": 5.119813493182221e-06, "loss": 0.4109, "step": 3916 }, { "epoch": 2.716366158113731, "grad_norm": 0.4518684083272908, "learning_rate": 5.1173934680194105e-06, "loss": 0.4118, "step": 3917 }, { "epoch": 2.7170596393897366, "grad_norm": 0.37911556292159143, "learning_rate": 5.114973415340422e-06, "loss": 0.4346, "step": 3918 }, { "epoch": 2.717753120665742, "grad_norm": 0.3737425317971622, "learning_rate": 5.112553335712497e-06, "loss": 0.4407, "step": 3919 }, { "epoch": 2.7184466019417477, "grad_norm": 0.39800718953842057, "learning_rate": 5.110133229702886e-06, "loss": 0.5039, "step": 3920 }, { "epoch": 2.719140083217753, "grad_norm": 0.35230826607601595, "learning_rate": 5.107713097878842e-06, "loss": 0.43, "step": 3921 }, { "epoch": 2.719833564493759, "grad_norm": 0.37002909636834563, "learning_rate": 5.10529294080763e-06, "loss": 0.4116, "step": 3922 }, { "epoch": 2.720527045769764, "grad_norm": 0.3915145136163304, "learning_rate": 5.102872759056514e-06, "loss": 0.4329, "step": 3923 }, { "epoch": 2.72122052704577, "grad_norm": 0.3735224585269615, "learning_rate": 5.100452553192769e-06, "loss": 0.4364, "step": 3924 }, { "epoch": 2.721914008321775, "grad_norm": 0.34175263458947175, "learning_rate": 5.098032323783673e-06, "loss": 0.4449, "step": 3925 }, { "epoch": 2.722607489597781, "grad_norm": 0.35448996796947957, "learning_rate": 5.09561207139651e-06, "loss": 0.4009, "step": 3926 }, { "epoch": 2.7233009708737863, "grad_norm": 0.3794775715329324, "learning_rate": 5.093191796598571e-06, "loss": 0.3595, "step": 3927 }, { "epoch": 2.723994452149792, "grad_norm": 0.4761010583957489, "learning_rate": 5.090771499957148e-06, "loss": 0.4123, "step": 3928 }, { "epoch": 2.7246879334257974, "grad_norm": 0.344154015482923, "learning_rate": 5.0883511820395425e-06, "loss": 0.4533, "step": 3929 }, { "epoch": 2.725381414701803, "grad_norm": 0.3862713584563973, "learning_rate": 5.085930843413062e-06, "loss": 0.3992, "step": 3930 }, { "epoch": 2.7260748959778085, "grad_norm": 0.34012845184880824, "learning_rate": 5.083510484645013e-06, "loss": 0.3915, "step": 3931 }, { "epoch": 2.7267683772538143, "grad_norm": 0.39418239618454676, "learning_rate": 5.081090106302711e-06, "loss": 0.4575, "step": 3932 }, { "epoch": 2.7274618585298196, "grad_norm": 0.7232255231081439, "learning_rate": 5.078669708953475e-06, "loss": 0.4453, "step": 3933 }, { "epoch": 2.7281553398058254, "grad_norm": 0.34048259015435967, "learning_rate": 5.07624929316463e-06, "loss": 0.4231, "step": 3934 }, { "epoch": 2.7288488210818307, "grad_norm": 0.3794902372204936, "learning_rate": 5.073828859503504e-06, "loss": 0.4642, "step": 3935 }, { "epoch": 2.7295423023578365, "grad_norm": 0.43782599599154987, "learning_rate": 5.071408408537426e-06, "loss": 0.4397, "step": 3936 }, { "epoch": 2.730235783633842, "grad_norm": 0.39049158873409967, "learning_rate": 5.068987940833735e-06, "loss": 0.405, "step": 3937 }, { "epoch": 2.7309292649098476, "grad_norm": 0.36966788682157375, "learning_rate": 5.066567456959769e-06, "loss": 0.4246, "step": 3938 }, { "epoch": 2.731622746185853, "grad_norm": 0.3573657053426213, "learning_rate": 5.064146957482875e-06, "loss": 0.4071, "step": 3939 }, { "epoch": 2.7323162274618586, "grad_norm": 0.3575676650714386, "learning_rate": 5.061726442970398e-06, "loss": 0.4171, "step": 3940 }, { "epoch": 2.733009708737864, "grad_norm": 0.37770593071879655, "learning_rate": 5.059305913989689e-06, "loss": 0.4123, "step": 3941 }, { "epoch": 2.7337031900138697, "grad_norm": 0.38681474442716546, "learning_rate": 5.0568853711081045e-06, "loss": 0.4547, "step": 3942 }, { "epoch": 2.734396671289875, "grad_norm": 0.37894669466361824, "learning_rate": 5.054464814893001e-06, "loss": 0.4359, "step": 3943 }, { "epoch": 2.735090152565881, "grad_norm": 0.370032643094225, "learning_rate": 5.052044245911739e-06, "loss": 0.4227, "step": 3944 }, { "epoch": 2.735783633841886, "grad_norm": 0.3608744852846485, "learning_rate": 5.0496236647316825e-06, "loss": 0.4647, "step": 3945 }, { "epoch": 2.736477115117892, "grad_norm": 0.35751879544982534, "learning_rate": 5.047203071920197e-06, "loss": 0.4436, "step": 3946 }, { "epoch": 2.7371705963938973, "grad_norm": 0.35840300538859254, "learning_rate": 5.0447824680446555e-06, "loss": 0.4499, "step": 3947 }, { "epoch": 2.737864077669903, "grad_norm": 0.57273099874388, "learning_rate": 5.042361853672429e-06, "loss": 0.4364, "step": 3948 }, { "epoch": 2.7385575589459084, "grad_norm": 0.7607428146567762, "learning_rate": 5.039941229370887e-06, "loss": 0.4293, "step": 3949 }, { "epoch": 2.739251040221914, "grad_norm": 0.3815644095081445, "learning_rate": 5.037520595707411e-06, "loss": 0.4259, "step": 3950 }, { "epoch": 2.7399445214979194, "grad_norm": 0.3348934643038374, "learning_rate": 5.035099953249381e-06, "loss": 0.4308, "step": 3951 }, { "epoch": 2.740638002773925, "grad_norm": 0.34241204079012694, "learning_rate": 5.032679302564176e-06, "loss": 0.4109, "step": 3952 }, { "epoch": 2.7413314840499305, "grad_norm": 0.4320558159408776, "learning_rate": 5.030258644219179e-06, "loss": 0.4792, "step": 3953 }, { "epoch": 2.7420249653259363, "grad_norm": 0.43582167757349133, "learning_rate": 5.027837978781773e-06, "loss": 0.4145, "step": 3954 }, { "epoch": 2.7427184466019416, "grad_norm": 0.39972661212372834, "learning_rate": 5.025417306819348e-06, "loss": 0.4051, "step": 3955 }, { "epoch": 2.7434119278779474, "grad_norm": 0.39764227036484173, "learning_rate": 5.022996628899291e-06, "loss": 0.5107, "step": 3956 }, { "epoch": 2.7441054091539527, "grad_norm": 0.3795203690315098, "learning_rate": 5.0205759455889904e-06, "loss": 0.3943, "step": 3957 }, { "epoch": 2.7447988904299585, "grad_norm": 0.3675255309526919, "learning_rate": 5.018155257455835e-06, "loss": 0.4032, "step": 3958 }, { "epoch": 2.745492371705964, "grad_norm": 0.4722710304115148, "learning_rate": 5.0157345650672206e-06, "loss": 0.4446, "step": 3959 }, { "epoch": 2.7461858529819696, "grad_norm": 0.3529200418459598, "learning_rate": 5.013313868990538e-06, "loss": 0.4418, "step": 3960 }, { "epoch": 2.746879334257975, "grad_norm": 0.39542475467593047, "learning_rate": 5.010893169793182e-06, "loss": 0.4869, "step": 3961 }, { "epoch": 2.7475728155339807, "grad_norm": 0.4150561060784059, "learning_rate": 5.008472468042543e-06, "loss": 0.3769, "step": 3962 }, { "epoch": 2.748266296809986, "grad_norm": 0.34132192843229336, "learning_rate": 5.006051764306021e-06, "loss": 0.4068, "step": 3963 }, { "epoch": 2.748959778085992, "grad_norm": 0.3441180384829794, "learning_rate": 5.003631059151008e-06, "loss": 0.3929, "step": 3964 }, { "epoch": 2.749653259361997, "grad_norm": 0.34506553264553497, "learning_rate": 5.001210353144903e-06, "loss": 0.4202, "step": 3965 }, { "epoch": 2.750346740638003, "grad_norm": 0.39071531681282184, "learning_rate": 4.998789646855099e-06, "loss": 0.4251, "step": 3966 }, { "epoch": 2.751040221914008, "grad_norm": 0.3904383793654294, "learning_rate": 4.996368940848992e-06, "loss": 0.4427, "step": 3967 }, { "epoch": 2.751733703190014, "grad_norm": 0.3378095906022995, "learning_rate": 4.99394823569398e-06, "loss": 0.3693, "step": 3968 }, { "epoch": 2.7524271844660193, "grad_norm": 0.3985577545920935, "learning_rate": 4.991527531957458e-06, "loss": 0.4413, "step": 3969 }, { "epoch": 2.753120665742025, "grad_norm": 0.3900665096057594, "learning_rate": 4.98910683020682e-06, "loss": 0.4239, "step": 3970 }, { "epoch": 2.7538141470180304, "grad_norm": 0.3701757399908618, "learning_rate": 4.986686131009464e-06, "loss": 0.4546, "step": 3971 }, { "epoch": 2.754507628294036, "grad_norm": 0.3714286039272502, "learning_rate": 4.984265434932781e-06, "loss": 0.406, "step": 3972 }, { "epoch": 2.7552011095700415, "grad_norm": 0.3892384977360426, "learning_rate": 4.981844742544167e-06, "loss": 0.4415, "step": 3973 }, { "epoch": 2.7558945908460473, "grad_norm": 0.35244289816170765, "learning_rate": 4.979424054411013e-06, "loss": 0.3944, "step": 3974 }, { "epoch": 2.7565880721220526, "grad_norm": 0.38433471246836537, "learning_rate": 4.97700337110071e-06, "loss": 0.4087, "step": 3975 }, { "epoch": 2.7572815533980584, "grad_norm": 0.3714735430473568, "learning_rate": 4.974582693180652e-06, "loss": 0.427, "step": 3976 }, { "epoch": 2.7579750346740637, "grad_norm": 0.37266625958308525, "learning_rate": 4.972162021218228e-06, "loss": 0.436, "step": 3977 }, { "epoch": 2.7586685159500695, "grad_norm": 0.36155232312722235, "learning_rate": 4.969741355780822e-06, "loss": 0.4083, "step": 3978 }, { "epoch": 2.759361997226075, "grad_norm": 0.3955245521459925, "learning_rate": 4.9673206974358254e-06, "loss": 0.3966, "step": 3979 }, { "epoch": 2.7600554785020806, "grad_norm": 0.37149560730018827, "learning_rate": 4.96490004675062e-06, "loss": 0.4679, "step": 3980 }, { "epoch": 2.760748959778086, "grad_norm": 0.6071953926121999, "learning_rate": 4.96247940429259e-06, "loss": 0.4214, "step": 3981 }, { "epoch": 2.7614424410540916, "grad_norm": 0.3900352334769149, "learning_rate": 4.9600587706291146e-06, "loss": 0.4726, "step": 3982 }, { "epoch": 2.762135922330097, "grad_norm": 0.34245855701039796, "learning_rate": 4.957638146327575e-06, "loss": 0.402, "step": 3983 }, { "epoch": 2.7628294036061027, "grad_norm": 0.3572282010192532, "learning_rate": 4.9552175319553445e-06, "loss": 0.4258, "step": 3984 }, { "epoch": 2.763522884882108, "grad_norm": 0.3825599970725927, "learning_rate": 4.9527969280798025e-06, "loss": 0.4302, "step": 3985 }, { "epoch": 2.764216366158114, "grad_norm": 0.37865087687600074, "learning_rate": 4.950376335268319e-06, "loss": 0.3837, "step": 3986 }, { "epoch": 2.764909847434119, "grad_norm": 0.41425995862562354, "learning_rate": 4.947955754088263e-06, "loss": 0.4751, "step": 3987 }, { "epoch": 2.765603328710125, "grad_norm": 0.41886747355440135, "learning_rate": 4.945535185107e-06, "loss": 0.4312, "step": 3988 }, { "epoch": 2.7662968099861303, "grad_norm": 0.3599878162392445, "learning_rate": 4.943114628891897e-06, "loss": 0.4714, "step": 3989 }, { "epoch": 2.766990291262136, "grad_norm": 0.4301473507839401, "learning_rate": 4.940694086010312e-06, "loss": 0.463, "step": 3990 }, { "epoch": 2.7676837725381414, "grad_norm": 0.3545771469108082, "learning_rate": 4.938273557029604e-06, "loss": 0.4024, "step": 3991 }, { "epoch": 2.768377253814147, "grad_norm": 0.3973740084240133, "learning_rate": 4.935853042517127e-06, "loss": 0.455, "step": 3992 }, { "epoch": 2.7690707350901524, "grad_norm": 0.3534259407642916, "learning_rate": 4.933432543040232e-06, "loss": 0.4356, "step": 3993 }, { "epoch": 2.769764216366158, "grad_norm": 0.39654648286811706, "learning_rate": 4.931012059166267e-06, "loss": 0.394, "step": 3994 }, { "epoch": 2.7704576976421635, "grad_norm": 0.37925078010425023, "learning_rate": 4.928591591462575e-06, "loss": 0.427, "step": 3995 }, { "epoch": 2.7711511789181693, "grad_norm": 0.4167478976360642, "learning_rate": 4.926171140496498e-06, "loss": 0.4495, "step": 3996 }, { "epoch": 2.7718446601941746, "grad_norm": 0.3506857658306157, "learning_rate": 4.923750706835371e-06, "loss": 0.4168, "step": 3997 }, { "epoch": 2.7725381414701804, "grad_norm": 0.3622577748822191, "learning_rate": 4.921330291046526e-06, "loss": 0.3713, "step": 3998 }, { "epoch": 2.7732316227461857, "grad_norm": 0.37554857340453845, "learning_rate": 4.91890989369729e-06, "loss": 0.3883, "step": 3999 }, { "epoch": 2.7739251040221915, "grad_norm": 0.4537593666625834, "learning_rate": 4.9164895153549894e-06, "loss": 0.4532, "step": 4000 }, { "epoch": 2.774618585298197, "grad_norm": 0.4054612480562233, "learning_rate": 4.914069156586941e-06, "loss": 0.4023, "step": 4001 }, { "epoch": 2.7753120665742026, "grad_norm": 0.3805519937829709, "learning_rate": 4.9116488179604575e-06, "loss": 0.4106, "step": 4002 }, { "epoch": 2.776005547850208, "grad_norm": 0.38545256445163834, "learning_rate": 4.909228500042852e-06, "loss": 0.4103, "step": 4003 }, { "epoch": 2.7766990291262137, "grad_norm": 0.9463666155927506, "learning_rate": 4.9068082034014305e-06, "loss": 0.465, "step": 4004 }, { "epoch": 2.777392510402219, "grad_norm": 0.4007506620101125, "learning_rate": 4.904387928603491e-06, "loss": 0.4682, "step": 4005 }, { "epoch": 2.778085991678225, "grad_norm": 0.38684697926971456, "learning_rate": 4.901967676216329e-06, "loss": 0.4115, "step": 4006 }, { "epoch": 2.77877947295423, "grad_norm": 0.34583373114051424, "learning_rate": 4.899547446807232e-06, "loss": 0.4459, "step": 4007 }, { "epoch": 2.779472954230236, "grad_norm": 0.38627086393795584, "learning_rate": 4.897127240943487e-06, "loss": 0.4446, "step": 4008 }, { "epoch": 2.780166435506241, "grad_norm": 0.3883560058879765, "learning_rate": 4.894707059192372e-06, "loss": 0.388, "step": 4009 }, { "epoch": 2.780859916782247, "grad_norm": 0.3503374636503669, "learning_rate": 4.892286902121159e-06, "loss": 0.4266, "step": 4010 }, { "epoch": 2.7815533980582523, "grad_norm": 0.350352145826333, "learning_rate": 4.889866770297116e-06, "loss": 0.4395, "step": 4011 }, { "epoch": 2.782246879334258, "grad_norm": 0.3642627761438329, "learning_rate": 4.887446664287504e-06, "loss": 0.4272, "step": 4012 }, { "epoch": 2.7829403606102634, "grad_norm": 0.3434874097966225, "learning_rate": 4.885026584659579e-06, "loss": 0.4149, "step": 4013 }, { "epoch": 2.783633841886269, "grad_norm": 0.3621844235954397, "learning_rate": 4.882606531980591e-06, "loss": 0.3828, "step": 4014 }, { "epoch": 2.7843273231622745, "grad_norm": 0.3427016163640971, "learning_rate": 4.880186506817781e-06, "loss": 0.4345, "step": 4015 }, { "epoch": 2.7850208044382803, "grad_norm": 0.36054048320306425, "learning_rate": 4.877766509738386e-06, "loss": 0.4492, "step": 4016 }, { "epoch": 2.7857142857142856, "grad_norm": 0.32912577313513003, "learning_rate": 4.875346541309637e-06, "loss": 0.3833, "step": 4017 }, { "epoch": 2.7864077669902914, "grad_norm": 0.35524338466273975, "learning_rate": 4.872926602098756e-06, "loss": 0.381, "step": 4018 }, { "epoch": 2.7871012482662967, "grad_norm": 0.40455749188681955, "learning_rate": 4.870506692672957e-06, "loss": 0.4709, "step": 4019 }, { "epoch": 2.7877947295423025, "grad_norm": 0.40182790959614184, "learning_rate": 4.86808681359945e-06, "loss": 0.4469, "step": 4020 }, { "epoch": 2.7884882108183078, "grad_norm": 0.3965258452569223, "learning_rate": 4.865666965445442e-06, "loss": 0.4475, "step": 4021 }, { "epoch": 2.7891816920943135, "grad_norm": 0.3877523551800925, "learning_rate": 4.863247148778124e-06, "loss": 0.4664, "step": 4022 }, { "epoch": 2.789875173370319, "grad_norm": 0.3586594465868982, "learning_rate": 4.860827364164683e-06, "loss": 0.4111, "step": 4023 }, { "epoch": 2.7905686546463246, "grad_norm": 0.37368248000961435, "learning_rate": 4.8584076121723e-06, "loss": 0.414, "step": 4024 }, { "epoch": 2.79126213592233, "grad_norm": 0.386644055137649, "learning_rate": 4.855987893368148e-06, "loss": 0.4293, "step": 4025 }, { "epoch": 2.7919556171983357, "grad_norm": 0.36677929808420884, "learning_rate": 4.853568208319391e-06, "loss": 0.4321, "step": 4026 }, { "epoch": 2.792649098474341, "grad_norm": 0.3789945636294229, "learning_rate": 4.851148557593185e-06, "loss": 0.3937, "step": 4027 }, { "epoch": 2.793342579750347, "grad_norm": 0.3480247387809359, "learning_rate": 4.848728941756679e-06, "loss": 0.439, "step": 4028 }, { "epoch": 2.794036061026352, "grad_norm": 0.36352830102050243, "learning_rate": 4.846309361377011e-06, "loss": 0.4447, "step": 4029 }, { "epoch": 2.794729542302358, "grad_norm": 0.366325211500067, "learning_rate": 4.843889817021318e-06, "loss": 0.3997, "step": 4030 }, { "epoch": 2.7954230235783633, "grad_norm": 0.3545734829458388, "learning_rate": 4.841470309256722e-06, "loss": 0.4299, "step": 4031 }, { "epoch": 2.796116504854369, "grad_norm": 0.5004600048170659, "learning_rate": 4.839050838650336e-06, "loss": 0.4791, "step": 4032 }, { "epoch": 2.7968099861303743, "grad_norm": 0.4024612900778736, "learning_rate": 4.8366314057692684e-06, "loss": 0.4252, "step": 4033 }, { "epoch": 2.79750346740638, "grad_norm": 0.35560712170623915, "learning_rate": 4.834212011180617e-06, "loss": 0.4092, "step": 4034 }, { "epoch": 2.7981969486823854, "grad_norm": 0.3856876940046428, "learning_rate": 4.831792655451468e-06, "loss": 0.4575, "step": 4035 }, { "epoch": 2.798890429958391, "grad_norm": 0.3591191578303279, "learning_rate": 4.829373339148903e-06, "loss": 0.4049, "step": 4036 }, { "epoch": 2.7995839112343965, "grad_norm": 0.37283837251655866, "learning_rate": 4.8269540628399925e-06, "loss": 0.4596, "step": 4037 }, { "epoch": 2.8002773925104023, "grad_norm": 0.3845580357058165, "learning_rate": 4.824534827091793e-06, "loss": 0.3955, "step": 4038 }, { "epoch": 2.8009708737864076, "grad_norm": 0.3552336919736331, "learning_rate": 4.822115632471363e-06, "loss": 0.4439, "step": 4039 }, { "epoch": 2.8016643550624134, "grad_norm": 0.3508682336015801, "learning_rate": 4.819696479545738e-06, "loss": 0.3975, "step": 4040 }, { "epoch": 2.8023578363384187, "grad_norm": 0.4022142157664882, "learning_rate": 4.817277368881954e-06, "loss": 0.4128, "step": 4041 }, { "epoch": 2.8030513176144245, "grad_norm": 0.4143632286344754, "learning_rate": 4.814858301047031e-06, "loss": 0.3935, "step": 4042 }, { "epoch": 2.80374479889043, "grad_norm": 0.3959569165255466, "learning_rate": 4.812439276607982e-06, "loss": 0.4562, "step": 4043 }, { "epoch": 2.8044382801664356, "grad_norm": 0.3551192583821301, "learning_rate": 4.810020296131807e-06, "loss": 0.4497, "step": 4044 }, { "epoch": 2.805131761442441, "grad_norm": 0.42301204900694334, "learning_rate": 4.8076013601854996e-06, "loss": 0.4257, "step": 4045 }, { "epoch": 2.8058252427184467, "grad_norm": 0.533627383552703, "learning_rate": 4.80518246933604e-06, "loss": 0.3952, "step": 4046 }, { "epoch": 2.806518723994452, "grad_norm": 0.32971782919941844, "learning_rate": 4.802763624150396e-06, "loss": 0.4018, "step": 4047 }, { "epoch": 2.807212205270458, "grad_norm": 0.3236523786576215, "learning_rate": 4.800344825195533e-06, "loss": 0.364, "step": 4048 }, { "epoch": 2.807905686546463, "grad_norm": 0.3884875215326674, "learning_rate": 4.7979260730383954e-06, "loss": 0.427, "step": 4049 }, { "epoch": 2.808599167822469, "grad_norm": 0.3687340718711838, "learning_rate": 4.795507368245924e-06, "loss": 0.4238, "step": 4050 }, { "epoch": 2.809292649098474, "grad_norm": 0.3945178218662022, "learning_rate": 4.793088711385044e-06, "loss": 0.4485, "step": 4051 }, { "epoch": 2.80998613037448, "grad_norm": 0.37202183009946754, "learning_rate": 4.790670103022672e-06, "loss": 0.4337, "step": 4052 }, { "epoch": 2.8106796116504853, "grad_norm": 0.38427164244341955, "learning_rate": 4.788251543725711e-06, "loss": 0.4183, "step": 4053 }, { "epoch": 2.811373092926491, "grad_norm": 0.33786128218826866, "learning_rate": 4.785833034061056e-06, "loss": 0.4126, "step": 4054 }, { "epoch": 2.8120665742024964, "grad_norm": 0.3856524875438802, "learning_rate": 4.783414574595585e-06, "loss": 0.4988, "step": 4055 }, { "epoch": 2.812760055478502, "grad_norm": 0.3707925529065902, "learning_rate": 4.780996165896169e-06, "loss": 0.3866, "step": 4056 }, { "epoch": 2.8134535367545075, "grad_norm": 0.3987336385136071, "learning_rate": 4.778577808529666e-06, "loss": 0.4327, "step": 4057 }, { "epoch": 2.8141470180305133, "grad_norm": 0.3696563322135868, "learning_rate": 4.776159503062922e-06, "loss": 0.4161, "step": 4058 }, { "epoch": 2.8148404993065186, "grad_norm": 0.3829064231711528, "learning_rate": 4.7737412500627694e-06, "loss": 0.4305, "step": 4059 }, { "epoch": 2.8155339805825244, "grad_norm": 0.42212570468835886, "learning_rate": 4.771323050096028e-06, "loss": 0.402, "step": 4060 }, { "epoch": 2.8162274618585297, "grad_norm": 0.3508844344112231, "learning_rate": 4.768904903729509e-06, "loss": 0.3635, "step": 4061 }, { "epoch": 2.8169209431345354, "grad_norm": 0.38245947414316256, "learning_rate": 4.766486811530006e-06, "loss": 0.4268, "step": 4062 }, { "epoch": 2.8176144244105408, "grad_norm": 0.34169649762997983, "learning_rate": 4.764068774064304e-06, "loss": 0.3638, "step": 4063 }, { "epoch": 2.8183079056865465, "grad_norm": 0.40942647151390993, "learning_rate": 4.76165079189917e-06, "loss": 0.4022, "step": 4064 }, { "epoch": 2.819001386962552, "grad_norm": 0.4402489576229297, "learning_rate": 4.759232865601366e-06, "loss": 0.4559, "step": 4065 }, { "epoch": 2.8196948682385576, "grad_norm": 0.348823874503297, "learning_rate": 4.756814995737635e-06, "loss": 0.4472, "step": 4066 }, { "epoch": 2.820388349514563, "grad_norm": 0.37775853749888355, "learning_rate": 4.754397182874708e-06, "loss": 0.4074, "step": 4067 }, { "epoch": 2.8210818307905687, "grad_norm": 0.4257208050903815, "learning_rate": 4.7519794275793015e-06, "loss": 0.4482, "step": 4068 }, { "epoch": 2.821775312066574, "grad_norm": 0.3521327762265513, "learning_rate": 4.749561730418121e-06, "loss": 0.4219, "step": 4069 }, { "epoch": 2.82246879334258, "grad_norm": 0.34980838516587615, "learning_rate": 4.7471440919578585e-06, "loss": 0.428, "step": 4070 }, { "epoch": 2.823162274618585, "grad_norm": 0.4258849417751294, "learning_rate": 4.744726512765189e-06, "loss": 0.4413, "step": 4071 }, { "epoch": 2.823855755894591, "grad_norm": 0.3513473352205952, "learning_rate": 4.742308993406775e-06, "loss": 0.3892, "step": 4072 }, { "epoch": 2.8245492371705962, "grad_norm": 0.3518035036634018, "learning_rate": 4.739891534449267e-06, "loss": 0.468, "step": 4073 }, { "epoch": 2.825242718446602, "grad_norm": 0.390328201896898, "learning_rate": 4.7374741364592995e-06, "loss": 0.414, "step": 4074 }, { "epoch": 2.8259361997226073, "grad_norm": 0.38985055994312234, "learning_rate": 4.735056800003494e-06, "loss": 0.418, "step": 4075 }, { "epoch": 2.826629680998613, "grad_norm": 0.4179553528250532, "learning_rate": 4.732639525648456e-06, "loss": 0.4692, "step": 4076 }, { "epoch": 2.8273231622746184, "grad_norm": 0.34855373666352646, "learning_rate": 4.730222313960776e-06, "loss": 0.4048, "step": 4077 }, { "epoch": 2.828016643550624, "grad_norm": 0.4185433830631261, "learning_rate": 4.727805165507032e-06, "loss": 0.4568, "step": 4078 }, { "epoch": 2.8287101248266295, "grad_norm": 0.3827686595456162, "learning_rate": 4.725388080853786e-06, "loss": 0.3943, "step": 4079 }, { "epoch": 2.8294036061026353, "grad_norm": 0.3868924451054122, "learning_rate": 4.722971060567584e-06, "loss": 0.448, "step": 4080 }, { "epoch": 2.8300970873786406, "grad_norm": 0.3440702513702837, "learning_rate": 4.720554105214961e-06, "loss": 0.4354, "step": 4081 }, { "epoch": 2.8307905686546464, "grad_norm": 0.37438743596755825, "learning_rate": 4.718137215362429e-06, "loss": 0.4763, "step": 4082 }, { "epoch": 2.8314840499306517, "grad_norm": 0.41864735318001417, "learning_rate": 4.715720391576495e-06, "loss": 0.3939, "step": 4083 }, { "epoch": 2.8321775312066575, "grad_norm": 0.3734249927386904, "learning_rate": 4.713303634423642e-06, "loss": 0.4359, "step": 4084 }, { "epoch": 2.832871012482663, "grad_norm": 0.3290090027750609, "learning_rate": 4.71088694447034e-06, "loss": 0.3989, "step": 4085 }, { "epoch": 2.8335644937586686, "grad_norm": 0.36094920238835954, "learning_rate": 4.708470322283045e-06, "loss": 0.4524, "step": 4086 }, { "epoch": 2.834257975034674, "grad_norm": 0.4766922360121868, "learning_rate": 4.706053768428195e-06, "loss": 0.4193, "step": 4087 }, { "epoch": 2.8349514563106797, "grad_norm": 0.3698517377046595, "learning_rate": 4.703637283472213e-06, "loss": 0.4247, "step": 4088 }, { "epoch": 2.835644937586685, "grad_norm": 0.3363697038046532, "learning_rate": 4.701220867981505e-06, "loss": 0.3703, "step": 4089 }, { "epoch": 2.836338418862691, "grad_norm": 0.386392636656102, "learning_rate": 4.698804522522462e-06, "loss": 0.4365, "step": 4090 }, { "epoch": 2.837031900138696, "grad_norm": 0.4060050410469409, "learning_rate": 4.6963882476614555e-06, "loss": 0.3954, "step": 4091 }, { "epoch": 2.837725381414702, "grad_norm": 0.3523887208184266, "learning_rate": 4.6939720439648465e-06, "loss": 0.4254, "step": 4092 }, { "epoch": 2.838418862690707, "grad_norm": 0.3579985074450476, "learning_rate": 4.691555911998975e-06, "loss": 0.4395, "step": 4093 }, { "epoch": 2.839112343966713, "grad_norm": 0.4029662350950304, "learning_rate": 4.689139852330162e-06, "loss": 0.4546, "step": 4094 }, { "epoch": 2.8398058252427183, "grad_norm": 0.35050211108750073, "learning_rate": 4.686723865524718e-06, "loss": 0.4299, "step": 4095 }, { "epoch": 2.840499306518724, "grad_norm": 0.3451123976059286, "learning_rate": 4.684307952148931e-06, "loss": 0.4004, "step": 4096 }, { "epoch": 2.8411927877947294, "grad_norm": 0.3796932050622585, "learning_rate": 4.681892112769072e-06, "loss": 0.4321, "step": 4097 }, { "epoch": 2.841886269070735, "grad_norm": 0.38961027642341317, "learning_rate": 4.6794763479514e-06, "loss": 0.4479, "step": 4098 }, { "epoch": 2.8425797503467405, "grad_norm": 0.3620956435495506, "learning_rate": 4.677060658262151e-06, "loss": 0.4502, "step": 4099 }, { "epoch": 2.8432732316227463, "grad_norm": 0.3694923229023742, "learning_rate": 4.674645044267541e-06, "loss": 0.4181, "step": 4100 }, { "epoch": 2.8439667128987516, "grad_norm": 0.3806111379567479, "learning_rate": 4.672229506533779e-06, "loss": 0.4987, "step": 4101 }, { "epoch": 2.8446601941747574, "grad_norm": 0.3803688644623606, "learning_rate": 4.669814045627046e-06, "loss": 0.4148, "step": 4102 }, { "epoch": 2.8453536754507627, "grad_norm": 0.382423290737069, "learning_rate": 4.667398662113511e-06, "loss": 0.4808, "step": 4103 }, { "epoch": 2.8460471567267684, "grad_norm": 0.3370787002408229, "learning_rate": 4.664983356559321e-06, "loss": 0.3859, "step": 4104 }, { "epoch": 2.8467406380027738, "grad_norm": 0.38263104851654395, "learning_rate": 4.662568129530603e-06, "loss": 0.4449, "step": 4105 }, { "epoch": 2.8474341192787795, "grad_norm": 0.3888058065974024, "learning_rate": 4.660152981593474e-06, "loss": 0.4272, "step": 4106 }, { "epoch": 2.848127600554785, "grad_norm": 0.3455810666774053, "learning_rate": 4.657737913314025e-06, "loss": 0.423, "step": 4107 }, { "epoch": 2.8488210818307906, "grad_norm": 0.375943631321159, "learning_rate": 4.65532292525833e-06, "loss": 0.4528, "step": 4108 }, { "epoch": 2.849514563106796, "grad_norm": 0.3766495348477, "learning_rate": 4.652908017992443e-06, "loss": 0.4733, "step": 4109 }, { "epoch": 2.8502080443828017, "grad_norm": 0.3484261311648054, "learning_rate": 4.650493192082404e-06, "loss": 0.4103, "step": 4110 }, { "epoch": 2.850901525658807, "grad_norm": 0.3383124399669671, "learning_rate": 4.64807844809423e-06, "loss": 0.4131, "step": 4111 }, { "epoch": 2.851595006934813, "grad_norm": 0.41783192949638315, "learning_rate": 4.64566378659392e-06, "loss": 0.4321, "step": 4112 }, { "epoch": 2.852288488210818, "grad_norm": 0.36766480464672224, "learning_rate": 4.643249208147452e-06, "loss": 0.4065, "step": 4113 }, { "epoch": 2.852981969486824, "grad_norm": 0.4019405627514558, "learning_rate": 4.640834713320785e-06, "loss": 0.4298, "step": 4114 }, { "epoch": 2.8536754507628292, "grad_norm": 0.40944835516505707, "learning_rate": 4.63842030267986e-06, "loss": 0.4431, "step": 4115 }, { "epoch": 2.854368932038835, "grad_norm": 0.3809806904706159, "learning_rate": 4.6360059767905975e-06, "loss": 0.4633, "step": 4116 }, { "epoch": 2.8550624133148403, "grad_norm": 0.36243598662449916, "learning_rate": 4.6335917362188975e-06, "loss": 0.4365, "step": 4117 }, { "epoch": 2.855755894590846, "grad_norm": 0.34285130789114643, "learning_rate": 4.63117758153064e-06, "loss": 0.3951, "step": 4118 }, { "epoch": 2.8564493758668514, "grad_norm": 0.3470943676193009, "learning_rate": 4.628763513291687e-06, "loss": 0.4373, "step": 4119 }, { "epoch": 2.857142857142857, "grad_norm": 0.3833804947544678, "learning_rate": 4.626349532067879e-06, "loss": 0.3982, "step": 4120 }, { "epoch": 2.8578363384188625, "grad_norm": 0.3550169695072367, "learning_rate": 4.623935638425034e-06, "loss": 0.3878, "step": 4121 }, { "epoch": 2.8585298196948683, "grad_norm": 0.37416667163524014, "learning_rate": 4.621521832928951e-06, "loss": 0.4041, "step": 4122 }, { "epoch": 2.8592233009708736, "grad_norm": 0.3630927983036487, "learning_rate": 4.619108116145411e-06, "loss": 0.4001, "step": 4123 }, { "epoch": 2.8599167822468794, "grad_norm": 0.38279124194809155, "learning_rate": 4.616694488640169e-06, "loss": 0.418, "step": 4124 }, { "epoch": 2.8606102635228847, "grad_norm": 0.36234016663909513, "learning_rate": 4.614280950978964e-06, "loss": 0.4704, "step": 4125 }, { "epoch": 2.8613037447988905, "grad_norm": 0.42495213346921995, "learning_rate": 4.611867503727508e-06, "loss": 0.4214, "step": 4126 }, { "epoch": 2.861997226074896, "grad_norm": 0.36159187678580645, "learning_rate": 4.6094541474514985e-06, "loss": 0.4273, "step": 4127 }, { "epoch": 2.8626907073509016, "grad_norm": 0.3408973458656512, "learning_rate": 4.607040882716609e-06, "loss": 0.3798, "step": 4128 }, { "epoch": 2.863384188626907, "grad_norm": 0.40945625197433955, "learning_rate": 4.604627710088492e-06, "loss": 0.4845, "step": 4129 }, { "epoch": 2.8640776699029127, "grad_norm": 0.4645050313987877, "learning_rate": 4.6022146301327755e-06, "loss": 0.4545, "step": 4130 }, { "epoch": 2.864771151178918, "grad_norm": 0.4146496248402175, "learning_rate": 4.599801643415069e-06, "loss": 0.4404, "step": 4131 }, { "epoch": 2.8654646324549238, "grad_norm": 0.3298645925276056, "learning_rate": 4.597388750500959e-06, "loss": 0.3853, "step": 4132 }, { "epoch": 2.866158113730929, "grad_norm": 0.3754955120945029, "learning_rate": 4.59497595195601e-06, "loss": 0.4285, "step": 4133 }, { "epoch": 2.866851595006935, "grad_norm": 0.3999373734210744, "learning_rate": 4.5925632483457635e-06, "loss": 0.4232, "step": 4134 }, { "epoch": 2.86754507628294, "grad_norm": 0.3331439598554828, "learning_rate": 4.590150640235742e-06, "loss": 0.4236, "step": 4135 }, { "epoch": 2.868238557558946, "grad_norm": 0.36819641326853636, "learning_rate": 4.58773812819144e-06, "loss": 0.4369, "step": 4136 }, { "epoch": 2.8689320388349513, "grad_norm": 0.37453329326162216, "learning_rate": 4.585325712778338e-06, "loss": 0.433, "step": 4137 }, { "epoch": 2.869625520110957, "grad_norm": 0.35624968189002554, "learning_rate": 4.582913394561884e-06, "loss": 0.4452, "step": 4138 }, { "epoch": 2.8703190013869624, "grad_norm": 0.37477086333955983, "learning_rate": 4.5805011741075095e-06, "loss": 0.4541, "step": 4139 }, { "epoch": 2.871012482662968, "grad_norm": 0.3876290327732691, "learning_rate": 4.578089051980622e-06, "loss": 0.4416, "step": 4140 }, { "epoch": 2.8717059639389735, "grad_norm": 0.3953283849497786, "learning_rate": 4.575677028746606e-06, "loss": 0.4902, "step": 4141 }, { "epoch": 2.8723994452149793, "grad_norm": 0.3561636683250079, "learning_rate": 4.57326510497082e-06, "loss": 0.4452, "step": 4142 }, { "epoch": 2.8730929264909846, "grad_norm": 0.3766848888549487, "learning_rate": 4.570853281218605e-06, "loss": 0.4119, "step": 4143 }, { "epoch": 2.8737864077669903, "grad_norm": 0.3740385918636498, "learning_rate": 4.568441558055271e-06, "loss": 0.4069, "step": 4144 }, { "epoch": 2.8744798890429957, "grad_norm": 0.36726727356780703, "learning_rate": 4.566029936046109e-06, "loss": 0.4816, "step": 4145 }, { "epoch": 2.8751733703190014, "grad_norm": 0.36866803601355214, "learning_rate": 4.563618415756389e-06, "loss": 0.3683, "step": 4146 }, { "epoch": 2.875866851595007, "grad_norm": 0.39472160529342576, "learning_rate": 4.561206997751352e-06, "loss": 0.4203, "step": 4147 }, { "epoch": 2.8765603328710125, "grad_norm": 0.3442874064224455, "learning_rate": 4.558795682596216e-06, "loss": 0.4165, "step": 4148 }, { "epoch": 2.877253814147018, "grad_norm": 0.38644588415967007, "learning_rate": 4.556384470856177e-06, "loss": 0.4127, "step": 4149 }, { "epoch": 2.8779472954230236, "grad_norm": 0.3619612092465305, "learning_rate": 4.553973363096405e-06, "loss": 0.4054, "step": 4150 }, { "epoch": 2.8786407766990294, "grad_norm": 0.3448337637953116, "learning_rate": 4.551562359882048e-06, "loss": 0.4177, "step": 4151 }, { "epoch": 2.8793342579750347, "grad_norm": 0.38364920269799246, "learning_rate": 4.549151461778225e-06, "loss": 0.4563, "step": 4152 }, { "epoch": 2.88002773925104, "grad_norm": 0.3723559507453039, "learning_rate": 4.546740669350034e-06, "loss": 0.4109, "step": 4153 }, { "epoch": 2.880721220527046, "grad_norm": 0.39681929155284845, "learning_rate": 4.5443299831625455e-06, "loss": 0.4478, "step": 4154 }, { "epoch": 2.8814147018030516, "grad_norm": 0.3586297582322779, "learning_rate": 4.54191940378081e-06, "loss": 0.4132, "step": 4155 }, { "epoch": 2.882108183079057, "grad_norm": 0.5746156961669232, "learning_rate": 4.53950893176985e-06, "loss": 0.3897, "step": 4156 }, { "epoch": 2.8828016643550622, "grad_norm": 0.38865913616338227, "learning_rate": 4.537098567694661e-06, "loss": 0.4411, "step": 4157 }, { "epoch": 2.883495145631068, "grad_norm": 0.39901877163538707, "learning_rate": 4.534688312120216e-06, "loss": 0.4294, "step": 4158 }, { "epoch": 2.884188626907074, "grad_norm": 0.37308151176484683, "learning_rate": 4.532278165611459e-06, "loss": 0.4531, "step": 4159 }, { "epoch": 2.884882108183079, "grad_norm": 0.3784354701011851, "learning_rate": 4.529868128733314e-06, "loss": 0.447, "step": 4160 }, { "epoch": 2.8855755894590844, "grad_norm": 0.38127303713728083, "learning_rate": 4.527458202050674e-06, "loss": 0.4612, "step": 4161 }, { "epoch": 2.88626907073509, "grad_norm": 0.377705214411384, "learning_rate": 4.525048386128409e-06, "loss": 0.5111, "step": 4162 }, { "epoch": 2.886962552011096, "grad_norm": 0.3841428778251947, "learning_rate": 4.522638681531361e-06, "loss": 0.4412, "step": 4163 }, { "epoch": 2.8876560332871013, "grad_norm": 0.3954687219562789, "learning_rate": 4.52022908882435e-06, "loss": 0.442, "step": 4164 }, { "epoch": 2.8883495145631066, "grad_norm": 0.35690896440635167, "learning_rate": 4.5178196085721675e-06, "loss": 0.4204, "step": 4165 }, { "epoch": 2.8890429958391124, "grad_norm": 0.36102101537630227, "learning_rate": 4.5154102413395766e-06, "loss": 0.4556, "step": 4166 }, { "epoch": 2.889736477115118, "grad_norm": 0.36307046359821055, "learning_rate": 4.513000987691314e-06, "loss": 0.4857, "step": 4167 }, { "epoch": 2.8904299583911235, "grad_norm": 0.3551096280195344, "learning_rate": 4.510591848192093e-06, "loss": 0.4005, "step": 4168 }, { "epoch": 2.891123439667129, "grad_norm": 0.3608771211593707, "learning_rate": 4.508182823406599e-06, "loss": 0.4605, "step": 4169 }, { "epoch": 2.8918169209431346, "grad_norm": 0.3809405439867942, "learning_rate": 4.50577391389949e-06, "loss": 0.4829, "step": 4170 }, { "epoch": 2.8925104022191404, "grad_norm": 0.3869152966360628, "learning_rate": 4.503365120235395e-06, "loss": 0.4496, "step": 4171 }, { "epoch": 2.8932038834951457, "grad_norm": 0.36587027722390336, "learning_rate": 4.500956442978918e-06, "loss": 0.459, "step": 4172 }, { "epoch": 2.893897364771151, "grad_norm": 0.33863471910286297, "learning_rate": 4.498547882694637e-06, "loss": 0.4278, "step": 4173 }, { "epoch": 2.8945908460471568, "grad_norm": 0.35048346242787526, "learning_rate": 4.496139439947103e-06, "loss": 0.4588, "step": 4174 }, { "epoch": 2.8952843273231625, "grad_norm": 0.3645785826197352, "learning_rate": 4.493731115300832e-06, "loss": 0.4172, "step": 4175 }, { "epoch": 2.895977808599168, "grad_norm": 0.40875720726240106, "learning_rate": 4.491322909320324e-06, "loss": 0.4195, "step": 4176 }, { "epoch": 2.896671289875173, "grad_norm": 0.31418901606969907, "learning_rate": 4.4889148225700406e-06, "loss": 0.3926, "step": 4177 }, { "epoch": 2.897364771151179, "grad_norm": 0.3740156771524573, "learning_rate": 4.486506855614422e-06, "loss": 0.4057, "step": 4178 }, { "epoch": 2.8980582524271847, "grad_norm": 0.38590233527498613, "learning_rate": 4.484099009017876e-06, "loss": 0.4252, "step": 4179 }, { "epoch": 2.89875173370319, "grad_norm": 0.37572662377068333, "learning_rate": 4.481691283344787e-06, "loss": 0.4099, "step": 4180 }, { "epoch": 2.8994452149791954, "grad_norm": 0.3816752535776974, "learning_rate": 4.479283679159506e-06, "loss": 0.4263, "step": 4181 }, { "epoch": 2.900138696255201, "grad_norm": 0.359232261823802, "learning_rate": 4.476876197026362e-06, "loss": 0.4189, "step": 4182 }, { "epoch": 2.900832177531207, "grad_norm": 0.4311357319828042, "learning_rate": 4.4744688375096475e-06, "loss": 0.4477, "step": 4183 }, { "epoch": 2.9015256588072122, "grad_norm": 0.44353577407545236, "learning_rate": 4.472061601173631e-06, "loss": 0.4739, "step": 4184 }, { "epoch": 2.9022191400832176, "grad_norm": 0.3840918030935441, "learning_rate": 4.469654488582552e-06, "loss": 0.41, "step": 4185 }, { "epoch": 2.9029126213592233, "grad_norm": 0.3410392120300619, "learning_rate": 4.467247500300621e-06, "loss": 0.4033, "step": 4186 }, { "epoch": 2.903606102635229, "grad_norm": 0.3885996076354032, "learning_rate": 4.464840636892015e-06, "loss": 0.4402, "step": 4187 }, { "epoch": 2.9042995839112344, "grad_norm": 0.34003891337217107, "learning_rate": 4.462433898920891e-06, "loss": 0.4049, "step": 4188 }, { "epoch": 2.9049930651872398, "grad_norm": 0.35214490516802605, "learning_rate": 4.460027286951366e-06, "loss": 0.395, "step": 4189 }, { "epoch": 2.9056865464632455, "grad_norm": 0.3784738130498375, "learning_rate": 4.457620801547533e-06, "loss": 0.4299, "step": 4190 }, { "epoch": 2.9063800277392513, "grad_norm": 0.37681793237234207, "learning_rate": 4.455214443273458e-06, "loss": 0.4373, "step": 4191 }, { "epoch": 2.9070735090152566, "grad_norm": 0.3883313713399433, "learning_rate": 4.452808212693171e-06, "loss": 0.3772, "step": 4192 }, { "epoch": 2.907766990291262, "grad_norm": 0.37135226167438945, "learning_rate": 4.450402110370677e-06, "loss": 0.4374, "step": 4193 }, { "epoch": 2.9084604715672677, "grad_norm": 0.3732731984268749, "learning_rate": 4.447996136869948e-06, "loss": 0.4764, "step": 4194 }, { "epoch": 2.9091539528432735, "grad_norm": 0.36666998974634846, "learning_rate": 4.445590292754927e-06, "loss": 0.3919, "step": 4195 }, { "epoch": 2.909847434119279, "grad_norm": 0.34035184473469293, "learning_rate": 4.443184578589525e-06, "loss": 0.4259, "step": 4196 }, { "epoch": 2.910540915395284, "grad_norm": 0.37071498147061005, "learning_rate": 4.440778994937625e-06, "loss": 0.4699, "step": 4197 }, { "epoch": 2.91123439667129, "grad_norm": 0.3685621554169873, "learning_rate": 4.4383735423630795e-06, "loss": 0.4372, "step": 4198 }, { "epoch": 2.9119278779472957, "grad_norm": 0.3493022308184043, "learning_rate": 4.435968221429706e-06, "loss": 0.4262, "step": 4199 }, { "epoch": 2.912621359223301, "grad_norm": 0.40893099483569534, "learning_rate": 4.433563032701298e-06, "loss": 0.4014, "step": 4200 }, { "epoch": 2.9133148404993063, "grad_norm": 0.3900126872343627, "learning_rate": 4.431157976741614e-06, "loss": 0.4277, "step": 4201 }, { "epoch": 2.914008321775312, "grad_norm": 0.35138144824315687, "learning_rate": 4.428753054114379e-06, "loss": 0.4358, "step": 4202 }, { "epoch": 2.914701803051318, "grad_norm": 0.39868077051943723, "learning_rate": 4.426348265383292e-06, "loss": 0.4106, "step": 4203 }, { "epoch": 2.915395284327323, "grad_norm": 0.4172496149807763, "learning_rate": 4.423943611112016e-06, "loss": 0.4277, "step": 4204 }, { "epoch": 2.9160887656033285, "grad_norm": 0.3761372428670614, "learning_rate": 4.421539091864187e-06, "loss": 0.4808, "step": 4205 }, { "epoch": 2.9167822468793343, "grad_norm": 0.5150935787749773, "learning_rate": 4.419134708203405e-06, "loss": 0.4495, "step": 4206 }, { "epoch": 2.91747572815534, "grad_norm": 0.3554940766434832, "learning_rate": 4.416730460693239e-06, "loss": 0.4398, "step": 4207 }, { "epoch": 2.9181692094313454, "grad_norm": 0.40458991987118614, "learning_rate": 4.41432634989723e-06, "loss": 0.4376, "step": 4208 }, { "epoch": 2.9188626907073507, "grad_norm": 0.952312026438392, "learning_rate": 4.411922376378881e-06, "loss": 0.4473, "step": 4209 }, { "epoch": 2.9195561719833565, "grad_norm": 0.37818496012554476, "learning_rate": 4.409518540701671e-06, "loss": 0.4414, "step": 4210 }, { "epoch": 2.9202496532593623, "grad_norm": 0.4574207706914251, "learning_rate": 4.407114843429037e-06, "loss": 0.4541, "step": 4211 }, { "epoch": 2.9209431345353676, "grad_norm": 0.41131225230761653, "learning_rate": 4.40471128512439e-06, "loss": 0.4166, "step": 4212 }, { "epoch": 2.921636615811373, "grad_norm": 0.39276294735712264, "learning_rate": 4.402307866351107e-06, "loss": 0.4372, "step": 4213 }, { "epoch": 2.9223300970873787, "grad_norm": 0.5007212598795793, "learning_rate": 4.399904587672531e-06, "loss": 0.4033, "step": 4214 }, { "epoch": 2.9230235783633844, "grad_norm": 0.3601425249249112, "learning_rate": 4.397501449651974e-06, "loss": 0.4436, "step": 4215 }, { "epoch": 2.9237170596393898, "grad_norm": 0.353205294677814, "learning_rate": 4.395098452852713e-06, "loss": 0.3889, "step": 4216 }, { "epoch": 2.924410540915395, "grad_norm": 0.3554608716705643, "learning_rate": 4.392695597837993e-06, "loss": 0.4176, "step": 4217 }, { "epoch": 2.925104022191401, "grad_norm": 0.3571090668692505, "learning_rate": 4.3902928851710274e-06, "loss": 0.4235, "step": 4218 }, { "epoch": 2.9257975034674066, "grad_norm": 0.3768309658853595, "learning_rate": 4.387890315414994e-06, "loss": 0.444, "step": 4219 }, { "epoch": 2.926490984743412, "grad_norm": 0.39588873298644633, "learning_rate": 4.385487889133039e-06, "loss": 0.4637, "step": 4220 }, { "epoch": 2.9271844660194173, "grad_norm": 0.4117060381307611, "learning_rate": 4.38308560688827e-06, "loss": 0.4356, "step": 4221 }, { "epoch": 2.927877947295423, "grad_norm": 0.3898423934610993, "learning_rate": 4.380683469243768e-06, "loss": 0.4008, "step": 4222 }, { "epoch": 2.928571428571429, "grad_norm": 0.3754459880185379, "learning_rate": 4.3782814767625755e-06, "loss": 0.4147, "step": 4223 }, { "epoch": 2.929264909847434, "grad_norm": 0.3544460821057703, "learning_rate": 4.375879630007701e-06, "loss": 0.4809, "step": 4224 }, { "epoch": 2.9299583911234395, "grad_norm": 0.3406276961905475, "learning_rate": 4.373477929542123e-06, "loss": 0.4396, "step": 4225 }, { "epoch": 2.9306518723994452, "grad_norm": 0.3400972695368736, "learning_rate": 4.3710763759287775e-06, "loss": 0.3578, "step": 4226 }, { "epoch": 2.931345353675451, "grad_norm": 0.3887518661034646, "learning_rate": 4.368674969730578e-06, "loss": 0.4811, "step": 4227 }, { "epoch": 2.9320388349514563, "grad_norm": 0.3882612395308509, "learning_rate": 4.3662737115103925e-06, "loss": 0.412, "step": 4228 }, { "epoch": 2.9327323162274617, "grad_norm": 0.37511246445358004, "learning_rate": 4.363872601831059e-06, "loss": 0.4366, "step": 4229 }, { "epoch": 2.9334257975034674, "grad_norm": 0.391641421354005, "learning_rate": 4.36147164125538e-06, "loss": 0.3931, "step": 4230 }, { "epoch": 2.934119278779473, "grad_norm": 0.4146628149959727, "learning_rate": 4.359070830346126e-06, "loss": 0.4177, "step": 4231 }, { "epoch": 2.9348127600554785, "grad_norm": 0.44252220270216525, "learning_rate": 4.356670169666025e-06, "loss": 0.4813, "step": 4232 }, { "epoch": 2.935506241331484, "grad_norm": 0.41331992251527333, "learning_rate": 4.354269659777779e-06, "loss": 0.4675, "step": 4233 }, { "epoch": 2.9361997226074896, "grad_norm": 0.37396035072667244, "learning_rate": 4.351869301244047e-06, "loss": 0.4077, "step": 4234 }, { "epoch": 2.9368932038834954, "grad_norm": 0.4240076191070561, "learning_rate": 4.349469094627456e-06, "loss": 0.3912, "step": 4235 }, { "epoch": 2.9375866851595007, "grad_norm": 0.36096051600234114, "learning_rate": 4.347069040490599e-06, "loss": 0.4727, "step": 4236 }, { "epoch": 2.938280166435506, "grad_norm": 0.36050935676652, "learning_rate": 4.3446691393960295e-06, "loss": 0.4088, "step": 4237 }, { "epoch": 2.938973647711512, "grad_norm": 0.44378486837617315, "learning_rate": 4.342269391906269e-06, "loss": 0.4186, "step": 4238 }, { "epoch": 2.9396671289875176, "grad_norm": 0.4080193062495759, "learning_rate": 4.339869798583799e-06, "loss": 0.4389, "step": 4239 }, { "epoch": 2.940360610263523, "grad_norm": 0.36739074697199814, "learning_rate": 4.337470359991068e-06, "loss": 0.461, "step": 4240 }, { "epoch": 2.9410540915395282, "grad_norm": 0.35488358826297134, "learning_rate": 4.335071076690484e-06, "loss": 0.4178, "step": 4241 }, { "epoch": 2.941747572815534, "grad_norm": 0.3505797448971799, "learning_rate": 4.332671949244426e-06, "loss": 0.427, "step": 4242 }, { "epoch": 2.9424410540915398, "grad_norm": 0.3995053966433075, "learning_rate": 4.3302729782152276e-06, "loss": 0.4599, "step": 4243 }, { "epoch": 2.943134535367545, "grad_norm": 0.36577894271365613, "learning_rate": 4.327874164165195e-06, "loss": 0.4449, "step": 4244 }, { "epoch": 2.9438280166435504, "grad_norm": 0.3558919168561584, "learning_rate": 4.325475507656591e-06, "loss": 0.4086, "step": 4245 }, { "epoch": 2.944521497919556, "grad_norm": 0.413725186535279, "learning_rate": 4.323077009251641e-06, "loss": 0.4415, "step": 4246 }, { "epoch": 2.945214979195562, "grad_norm": 0.3808476733000526, "learning_rate": 4.320678669512539e-06, "loss": 0.4474, "step": 4247 }, { "epoch": 2.9459084604715673, "grad_norm": 0.36358032243985366, "learning_rate": 4.318280489001437e-06, "loss": 0.3982, "step": 4248 }, { "epoch": 2.9466019417475726, "grad_norm": 0.34758205590939695, "learning_rate": 4.31588246828045e-06, "loss": 0.3799, "step": 4249 }, { "epoch": 2.9472954230235784, "grad_norm": 0.36320108140650526, "learning_rate": 4.313484607911659e-06, "loss": 0.3944, "step": 4250 }, { "epoch": 2.947988904299584, "grad_norm": 0.3685465916452843, "learning_rate": 4.3110869084571035e-06, "loss": 0.4189, "step": 4251 }, { "epoch": 2.9486823855755895, "grad_norm": 0.38986392855565566, "learning_rate": 4.3086893704787855e-06, "loss": 0.3935, "step": 4252 }, { "epoch": 2.949375866851595, "grad_norm": 0.3909559608756658, "learning_rate": 4.306291994538674e-06, "loss": 0.4492, "step": 4253 }, { "epoch": 2.9500693481276006, "grad_norm": 0.3616514085155487, "learning_rate": 4.3038947811986945e-06, "loss": 0.3869, "step": 4254 }, { "epoch": 2.9507628294036063, "grad_norm": 0.35377823474419373, "learning_rate": 4.3014977310207385e-06, "loss": 0.4333, "step": 4255 }, { "epoch": 2.9514563106796117, "grad_norm": 0.36875846519045613, "learning_rate": 4.299100844566654e-06, "loss": 0.4088, "step": 4256 }, { "epoch": 2.952149791955617, "grad_norm": 0.3822074991593694, "learning_rate": 4.296704122398256e-06, "loss": 0.4219, "step": 4257 }, { "epoch": 2.9528432732316228, "grad_norm": 0.3558894690283381, "learning_rate": 4.294307565077318e-06, "loss": 0.43, "step": 4258 }, { "epoch": 2.9535367545076285, "grad_norm": 0.3758378590592379, "learning_rate": 4.2919111731655764e-06, "loss": 0.4515, "step": 4259 }, { "epoch": 2.954230235783634, "grad_norm": 0.3651968594830969, "learning_rate": 4.2895149472247275e-06, "loss": 0.4407, "step": 4260 }, { "epoch": 2.954923717059639, "grad_norm": 0.37824989311907037, "learning_rate": 4.2871188878164275e-06, "loss": 0.408, "step": 4261 }, { "epoch": 2.955617198335645, "grad_norm": 0.3616979418948012, "learning_rate": 4.284722995502298e-06, "loss": 0.3892, "step": 4262 }, { "epoch": 2.9563106796116507, "grad_norm": 0.37057440961879723, "learning_rate": 4.282327270843919e-06, "loss": 0.3993, "step": 4263 }, { "epoch": 2.957004160887656, "grad_norm": 0.393727943353917, "learning_rate": 4.27993171440283e-06, "loss": 0.3983, "step": 4264 }, { "epoch": 2.9576976421636614, "grad_norm": 0.33098126965279817, "learning_rate": 4.277536326740532e-06, "loss": 0.4278, "step": 4265 }, { "epoch": 2.958391123439667, "grad_norm": 0.35456096910736296, "learning_rate": 4.275141108418487e-06, "loss": 0.406, "step": 4266 }, { "epoch": 2.959084604715673, "grad_norm": 0.3857375998666261, "learning_rate": 4.272746059998117e-06, "loss": 0.4548, "step": 4267 }, { "epoch": 2.9597780859916782, "grad_norm": 0.3443289102946537, "learning_rate": 4.270351182040802e-06, "loss": 0.414, "step": 4268 }, { "epoch": 2.9604715672676836, "grad_norm": 0.402873190215866, "learning_rate": 4.267956475107886e-06, "loss": 0.4114, "step": 4269 }, { "epoch": 2.9611650485436893, "grad_norm": 0.48112136404907163, "learning_rate": 4.265561939760671e-06, "loss": 0.4445, "step": 4270 }, { "epoch": 2.961858529819695, "grad_norm": 0.5064996632471026, "learning_rate": 4.263167576560417e-06, "loss": 0.4009, "step": 4271 }, { "epoch": 2.9625520110957004, "grad_norm": 0.368407837281134, "learning_rate": 4.2607733860683485e-06, "loss": 0.4447, "step": 4272 }, { "epoch": 2.9632454923717058, "grad_norm": 0.3543295548768013, "learning_rate": 4.258379368845644e-06, "loss": 0.4133, "step": 4273 }, { "epoch": 2.9639389736477115, "grad_norm": 0.3528573723218785, "learning_rate": 4.255985525453443e-06, "loss": 0.394, "step": 4274 }, { "epoch": 2.9646324549237173, "grad_norm": 0.389851665848564, "learning_rate": 4.253591856452849e-06, "loss": 0.4113, "step": 4275 }, { "epoch": 2.9653259361997226, "grad_norm": 0.49549636672036323, "learning_rate": 4.251198362404917e-06, "loss": 0.4479, "step": 4276 }, { "epoch": 2.966019417475728, "grad_norm": 0.508548139600559, "learning_rate": 4.248805043870665e-06, "loss": 0.5098, "step": 4277 }, { "epoch": 2.9667128987517337, "grad_norm": 0.42703095776247146, "learning_rate": 4.246411901411071e-06, "loss": 0.4378, "step": 4278 }, { "epoch": 2.9674063800277395, "grad_norm": 0.4660796201150934, "learning_rate": 4.244018935587068e-06, "loss": 0.4465, "step": 4279 }, { "epoch": 2.968099861303745, "grad_norm": 0.3667537562587972, "learning_rate": 4.241626146959553e-06, "loss": 0.4175, "step": 4280 }, { "epoch": 2.96879334257975, "grad_norm": 0.41178798121954824, "learning_rate": 4.239233536089377e-06, "loss": 0.4339, "step": 4281 }, { "epoch": 2.969486823855756, "grad_norm": 0.41827651139444094, "learning_rate": 4.236841103537349e-06, "loss": 0.4153, "step": 4282 }, { "epoch": 2.9701803051317617, "grad_norm": 0.3524789326648256, "learning_rate": 4.234448849864241e-06, "loss": 0.4012, "step": 4283 }, { "epoch": 2.970873786407767, "grad_norm": 0.3816231220806304, "learning_rate": 4.232056775630778e-06, "loss": 0.4134, "step": 4284 }, { "epoch": 2.9715672676837723, "grad_norm": 0.36447400401810076, "learning_rate": 4.229664881397645e-06, "loss": 0.4423, "step": 4285 }, { "epoch": 2.972260748959778, "grad_norm": 0.3225440930529132, "learning_rate": 4.227273167725484e-06, "loss": 0.4161, "step": 4286 }, { "epoch": 2.972954230235784, "grad_norm": 0.34130846687034655, "learning_rate": 4.224881635174897e-06, "loss": 0.4376, "step": 4287 }, { "epoch": 2.973647711511789, "grad_norm": 0.34847053691640917, "learning_rate": 4.2224902843064384e-06, "loss": 0.3965, "step": 4288 }, { "epoch": 2.9743411927877945, "grad_norm": 0.35125933384185115, "learning_rate": 4.220099115680628e-06, "loss": 0.4568, "step": 4289 }, { "epoch": 2.9750346740638003, "grad_norm": 0.3585432803771428, "learning_rate": 4.217708129857937e-06, "loss": 0.3907, "step": 4290 }, { "epoch": 2.975728155339806, "grad_norm": 0.36571877662928043, "learning_rate": 4.215317327398795e-06, "loss": 0.4425, "step": 4291 }, { "epoch": 2.9764216366158114, "grad_norm": 0.7306906625225886, "learning_rate": 4.212926708863588e-06, "loss": 0.4324, "step": 4292 }, { "epoch": 2.9771151178918167, "grad_norm": 0.36069675711693266, "learning_rate": 4.210536274812661e-06, "loss": 0.3597, "step": 4293 }, { "epoch": 2.9778085991678225, "grad_norm": 0.43992143936926636, "learning_rate": 4.208146025806313e-06, "loss": 0.4402, "step": 4294 }, { "epoch": 2.9785020804438282, "grad_norm": 0.3551426560349192, "learning_rate": 4.205755962404801e-06, "loss": 0.3753, "step": 4295 }, { "epoch": 2.9791955617198336, "grad_norm": 0.33705470699106477, "learning_rate": 4.20336608516834e-06, "loss": 0.3778, "step": 4296 }, { "epoch": 2.979889042995839, "grad_norm": 0.3875134793699313, "learning_rate": 4.200976394657098e-06, "loss": 0.4325, "step": 4297 }, { "epoch": 2.9805825242718447, "grad_norm": 0.3615281724838666, "learning_rate": 4.198586891431203e-06, "loss": 0.432, "step": 4298 }, { "epoch": 2.9812760055478504, "grad_norm": 0.41095565863060013, "learning_rate": 4.196197576050737e-06, "loss": 0.4523, "step": 4299 }, { "epoch": 2.9819694868238558, "grad_norm": 0.4682817793802189, "learning_rate": 4.1938084490757375e-06, "loss": 0.4717, "step": 4300 }, { "epoch": 2.982662968099861, "grad_norm": 0.3563769249355266, "learning_rate": 4.191419511066199e-06, "loss": 0.4351, "step": 4301 }, { "epoch": 2.983356449375867, "grad_norm": 0.4302142439490134, "learning_rate": 4.1890307625820705e-06, "loss": 0.4829, "step": 4302 }, { "epoch": 2.9840499306518726, "grad_norm": 0.37469363036699777, "learning_rate": 4.186642204183259e-06, "loss": 0.4627, "step": 4303 }, { "epoch": 2.984743411927878, "grad_norm": 0.4031889250698094, "learning_rate": 4.184253836429624e-06, "loss": 0.4348, "step": 4304 }, { "epoch": 2.9854368932038833, "grad_norm": 0.35424813351448564, "learning_rate": 4.181865659880982e-06, "loss": 0.4111, "step": 4305 }, { "epoch": 2.986130374479889, "grad_norm": 0.34422476739029473, "learning_rate": 4.179477675097102e-06, "loss": 0.3848, "step": 4306 }, { "epoch": 2.986823855755895, "grad_norm": 0.39324852193346244, "learning_rate": 4.177089882637713e-06, "loss": 0.454, "step": 4307 }, { "epoch": 2.9875173370319, "grad_norm": 0.36175083718314827, "learning_rate": 4.174702283062497e-06, "loss": 0.4316, "step": 4308 }, { "epoch": 2.9882108183079055, "grad_norm": 0.3511453289481942, "learning_rate": 4.172314876931089e-06, "loss": 0.3857, "step": 4309 }, { "epoch": 2.9889042995839112, "grad_norm": 0.3534191191212724, "learning_rate": 4.1699276648030805e-06, "loss": 0.4012, "step": 4310 }, { "epoch": 2.989597780859917, "grad_norm": 0.38250423048380033, "learning_rate": 4.167540647238013e-06, "loss": 0.4212, "step": 4311 }, { "epoch": 2.9902912621359223, "grad_norm": 0.35440520217564475, "learning_rate": 4.165153824795391e-06, "loss": 0.4413, "step": 4312 }, { "epoch": 2.9909847434119277, "grad_norm": 0.35663598743105585, "learning_rate": 4.162767198034665e-06, "loss": 0.4371, "step": 4313 }, { "epoch": 2.9916782246879334, "grad_norm": 0.38788743180077906, "learning_rate": 4.1603807675152444e-06, "loss": 0.4355, "step": 4314 }, { "epoch": 2.992371705963939, "grad_norm": 0.4110449964082554, "learning_rate": 4.15799453379649e-06, "loss": 0.4587, "step": 4315 }, { "epoch": 2.9930651872399445, "grad_norm": 0.3820571059332362, "learning_rate": 4.15560849743772e-06, "loss": 0.4319, "step": 4316 }, { "epoch": 2.99375866851595, "grad_norm": 0.35652755584557944, "learning_rate": 4.153222658998203e-06, "loss": 0.4386, "step": 4317 }, { "epoch": 2.9944521497919556, "grad_norm": 0.38678005121238507, "learning_rate": 4.1508370190371626e-06, "loss": 0.3998, "step": 4318 }, { "epoch": 2.9951456310679614, "grad_norm": 0.38066917572403497, "learning_rate": 4.148451578113773e-06, "loss": 0.4128, "step": 4319 }, { "epoch": 2.9958391123439667, "grad_norm": 0.38007142513719117, "learning_rate": 4.146066336787169e-06, "loss": 0.4394, "step": 4320 }, { "epoch": 2.996532593619972, "grad_norm": 0.4126330063215766, "learning_rate": 4.143681295616429e-06, "loss": 0.4262, "step": 4321 }, { "epoch": 2.997226074895978, "grad_norm": 0.38827172282440003, "learning_rate": 4.141296455160592e-06, "loss": 0.4048, "step": 4322 }, { "epoch": 2.9979195561719836, "grad_norm": 0.38603404511784284, "learning_rate": 4.138911815978648e-06, "loss": 0.4587, "step": 4323 }, { "epoch": 2.998613037447989, "grad_norm": 0.4406539070299765, "learning_rate": 4.136527378629535e-06, "loss": 0.4192, "step": 4324 }, { "epoch": 2.9993065187239942, "grad_norm": 0.391010000823013, "learning_rate": 4.134143143672154e-06, "loss": 0.4233, "step": 4325 }, { "epoch": 3.0, "grad_norm": 0.4101991000166011, "learning_rate": 4.131759111665349e-06, "loss": 0.4465, "step": 4326 }, { "epoch": 3.0006934812760058, "grad_norm": 0.3719559775138811, "learning_rate": 4.129375283167919e-06, "loss": 0.3685, "step": 4327 }, { "epoch": 3.001386962552011, "grad_norm": 0.3654641212889352, "learning_rate": 4.126991658738618e-06, "loss": 0.3695, "step": 4328 }, { "epoch": 3.002080443828017, "grad_norm": 0.3789960837522673, "learning_rate": 4.12460823893615e-06, "loss": 0.4001, "step": 4329 }, { "epoch": 3.002773925104022, "grad_norm": 0.3691564243155371, "learning_rate": 4.122225024319171e-06, "loss": 0.3851, "step": 4330 }, { "epoch": 3.003467406380028, "grad_norm": 0.4072313138856476, "learning_rate": 4.119842015446288e-06, "loss": 0.4315, "step": 4331 }, { "epoch": 3.0041608876560333, "grad_norm": 0.347853917127619, "learning_rate": 4.117459212876062e-06, "loss": 0.3919, "step": 4332 }, { "epoch": 3.004854368932039, "grad_norm": 0.38506692871083675, "learning_rate": 4.115076617167004e-06, "loss": 0.3465, "step": 4333 }, { "epoch": 3.0055478502080444, "grad_norm": 0.3748928420469291, "learning_rate": 4.11269422887758e-06, "loss": 0.4176, "step": 4334 }, { "epoch": 3.00624133148405, "grad_norm": 0.3675701455427544, "learning_rate": 4.110312048566203e-06, "loss": 0.3776, "step": 4335 }, { "epoch": 3.0069348127600555, "grad_norm": 0.3881110998526009, "learning_rate": 4.107930076791237e-06, "loss": 0.3807, "step": 4336 }, { "epoch": 3.0076282940360612, "grad_norm": 0.43616785225233196, "learning_rate": 4.105548314111001e-06, "loss": 0.384, "step": 4337 }, { "epoch": 3.0083217753120666, "grad_norm": 0.40718749495585393, "learning_rate": 4.103166761083762e-06, "loss": 0.3954, "step": 4338 }, { "epoch": 3.0090152565880723, "grad_norm": 0.4001664690303196, "learning_rate": 4.100785418267739e-06, "loss": 0.3849, "step": 4339 }, { "epoch": 3.0097087378640777, "grad_norm": 0.39387857196310244, "learning_rate": 4.098404286221102e-06, "loss": 0.3454, "step": 4340 }, { "epoch": 3.0104022191400834, "grad_norm": 0.3825724846315654, "learning_rate": 4.0960233655019706e-06, "loss": 0.3866, "step": 4341 }, { "epoch": 3.0110957004160888, "grad_norm": 0.44220586162762765, "learning_rate": 4.093642656668414e-06, "loss": 0.3636, "step": 4342 }, { "epoch": 3.0117891816920945, "grad_norm": 0.3632444781743847, "learning_rate": 4.091262160278455e-06, "loss": 0.3515, "step": 4343 }, { "epoch": 3.0124826629681, "grad_norm": 0.34085402395558856, "learning_rate": 4.088881876890065e-06, "loss": 0.3301, "step": 4344 }, { "epoch": 3.0131761442441056, "grad_norm": 0.36949316661043635, "learning_rate": 4.086501807061164e-06, "loss": 0.3559, "step": 4345 }, { "epoch": 3.013869625520111, "grad_norm": 0.3529786631931451, "learning_rate": 4.084121951349625e-06, "loss": 0.3885, "step": 4346 }, { "epoch": 3.0145631067961167, "grad_norm": 0.3770742460761115, "learning_rate": 4.081742310313266e-06, "loss": 0.3687, "step": 4347 }, { "epoch": 3.015256588072122, "grad_norm": 0.37380731725930166, "learning_rate": 4.0793628845098595e-06, "loss": 0.3798, "step": 4348 }, { "epoch": 3.015950069348128, "grad_norm": 0.38757118955990333, "learning_rate": 4.076983674497125e-06, "loss": 0.3713, "step": 4349 }, { "epoch": 3.016643550624133, "grad_norm": 0.5098215009078929, "learning_rate": 4.074604680832733e-06, "loss": 0.3833, "step": 4350 }, { "epoch": 3.017337031900139, "grad_norm": 0.36314231069913294, "learning_rate": 4.0722259040743e-06, "loss": 0.3756, "step": 4351 }, { "epoch": 3.0180305131761442, "grad_norm": 0.3538191891435209, "learning_rate": 4.069847344779397e-06, "loss": 0.3172, "step": 4352 }, { "epoch": 3.01872399445215, "grad_norm": 0.5568284681540532, "learning_rate": 4.0674690035055405e-06, "loss": 0.4167, "step": 4353 }, { "epoch": 3.0194174757281553, "grad_norm": 0.42335869469240806, "learning_rate": 4.0650908808101965e-06, "loss": 0.3746, "step": 4354 }, { "epoch": 3.020110957004161, "grad_norm": 0.37612096423362434, "learning_rate": 4.0627129772507785e-06, "loss": 0.3401, "step": 4355 }, { "epoch": 3.0208044382801664, "grad_norm": 0.36977584141068426, "learning_rate": 4.0603352933846494e-06, "loss": 0.342, "step": 4356 }, { "epoch": 3.021497919556172, "grad_norm": 0.3932491615188737, "learning_rate": 4.057957829769123e-06, "loss": 0.3772, "step": 4357 }, { "epoch": 3.0221914008321775, "grad_norm": 0.4121961084134789, "learning_rate": 4.05558058696146e-06, "loss": 0.393, "step": 4358 }, { "epoch": 3.0228848821081833, "grad_norm": 0.48445694047534166, "learning_rate": 4.053203565518865e-06, "loss": 0.4083, "step": 4359 }, { "epoch": 3.0235783633841886, "grad_norm": 0.3656478121904701, "learning_rate": 4.0508267659984975e-06, "loss": 0.3668, "step": 4360 }, { "epoch": 3.0242718446601944, "grad_norm": 0.40094960423949705, "learning_rate": 4.048450188957462e-06, "loss": 0.3851, "step": 4361 }, { "epoch": 3.0249653259361997, "grad_norm": 0.40240583829137166, "learning_rate": 4.046073834952812e-06, "loss": 0.3697, "step": 4362 }, { "epoch": 3.0256588072122055, "grad_norm": 0.43912272555439524, "learning_rate": 4.043697704541546e-06, "loss": 0.4239, "step": 4363 }, { "epoch": 3.026352288488211, "grad_norm": 0.3720974632416348, "learning_rate": 4.041321798280612e-06, "loss": 0.3764, "step": 4364 }, { "epoch": 3.0270457697642166, "grad_norm": 0.45346617219474145, "learning_rate": 4.038946116726906e-06, "loss": 0.3576, "step": 4365 }, { "epoch": 3.027739251040222, "grad_norm": 0.34260736533784614, "learning_rate": 4.03657066043727e-06, "loss": 0.367, "step": 4366 }, { "epoch": 3.0284327323162277, "grad_norm": 0.39723099833615794, "learning_rate": 4.034195429968494e-06, "loss": 0.4182, "step": 4367 }, { "epoch": 3.029126213592233, "grad_norm": 0.3666378531436607, "learning_rate": 4.031820425877313e-06, "loss": 0.3314, "step": 4368 }, { "epoch": 3.0298196948682388, "grad_norm": 0.3558768237219755, "learning_rate": 4.029445648720411e-06, "loss": 0.3576, "step": 4369 }, { "epoch": 3.030513176144244, "grad_norm": 0.40215255007578005, "learning_rate": 4.027071099054423e-06, "loss": 0.3855, "step": 4370 }, { "epoch": 3.03120665742025, "grad_norm": 0.45793754512393064, "learning_rate": 4.024696777435922e-06, "loss": 0.4424, "step": 4371 }, { "epoch": 3.031900138696255, "grad_norm": 0.37122390252052, "learning_rate": 4.022322684421432e-06, "loss": 0.369, "step": 4372 }, { "epoch": 3.032593619972261, "grad_norm": 0.3784314387255944, "learning_rate": 4.0199488205674256e-06, "loss": 0.377, "step": 4373 }, { "epoch": 3.0332871012482663, "grad_norm": 0.3902191024416504, "learning_rate": 4.017575186430318e-06, "loss": 0.3903, "step": 4374 }, { "epoch": 3.033980582524272, "grad_norm": 0.5406084682555689, "learning_rate": 4.015201782566471e-06, "loss": 0.3748, "step": 4375 }, { "epoch": 3.0346740638002774, "grad_norm": 0.399094481947949, "learning_rate": 4.012828609532193e-06, "loss": 0.3856, "step": 4376 }, { "epoch": 3.035367545076283, "grad_norm": 0.4238528346551883, "learning_rate": 4.010455667883741e-06, "loss": 0.3824, "step": 4377 }, { "epoch": 3.0360610263522885, "grad_norm": 0.8500650484267042, "learning_rate": 4.008082958177311e-06, "loss": 0.3648, "step": 4378 }, { "epoch": 3.0367545076282942, "grad_norm": 0.39045213922371413, "learning_rate": 4.005710480969055e-06, "loss": 0.3606, "step": 4379 }, { "epoch": 3.0374479889042996, "grad_norm": 0.4233018574190896, "learning_rate": 4.0033382368150605e-06, "loss": 0.4164, "step": 4380 }, { "epoch": 3.0381414701803053, "grad_norm": 0.4066581532418864, "learning_rate": 4.0009662262713635e-06, "loss": 0.3974, "step": 4381 }, { "epoch": 3.0388349514563107, "grad_norm": 0.46212936930378745, "learning_rate": 3.99859444989395e-06, "loss": 0.4105, "step": 4382 }, { "epoch": 3.0395284327323164, "grad_norm": 0.4187223555933156, "learning_rate": 3.996222908238744e-06, "loss": 0.3873, "step": 4383 }, { "epoch": 3.0402219140083218, "grad_norm": 0.3788092232501361, "learning_rate": 3.993851601861618e-06, "loss": 0.3761, "step": 4384 }, { "epoch": 3.0409153952843275, "grad_norm": 0.3945015512020017, "learning_rate": 3.991480531318391e-06, "loss": 0.4047, "step": 4385 }, { "epoch": 3.041608876560333, "grad_norm": 0.3762729893257261, "learning_rate": 3.989109697164823e-06, "loss": 0.3458, "step": 4386 }, { "epoch": 3.0423023578363386, "grad_norm": 0.3798084301011424, "learning_rate": 3.986739099956619e-06, "loss": 0.3858, "step": 4387 }, { "epoch": 3.042995839112344, "grad_norm": 0.3815554014462042, "learning_rate": 3.984368740249433e-06, "loss": 0.3798, "step": 4388 }, { "epoch": 3.0436893203883497, "grad_norm": 0.4605481225074819, "learning_rate": 3.981998618598858e-06, "loss": 0.4124, "step": 4389 }, { "epoch": 3.044382801664355, "grad_norm": 0.36333817698223786, "learning_rate": 3.979628735560436e-06, "loss": 0.3574, "step": 4390 }, { "epoch": 3.045076282940361, "grad_norm": 0.3967177621347652, "learning_rate": 3.9772590916896466e-06, "loss": 0.3639, "step": 4391 }, { "epoch": 3.045769764216366, "grad_norm": 0.3705992323429345, "learning_rate": 3.974889687541921e-06, "loss": 0.3781, "step": 4392 }, { "epoch": 3.046463245492372, "grad_norm": 0.3519964447329601, "learning_rate": 3.972520523672627e-06, "loss": 0.3878, "step": 4393 }, { "epoch": 3.0471567267683772, "grad_norm": 0.41492051483474657, "learning_rate": 3.970151600637081e-06, "loss": 0.4385, "step": 4394 }, { "epoch": 3.047850208044383, "grad_norm": 0.39278228628621326, "learning_rate": 3.967782918990542e-06, "loss": 0.4078, "step": 4395 }, { "epoch": 3.0485436893203883, "grad_norm": 0.40295121425496927, "learning_rate": 3.965414479288209e-06, "loss": 0.3569, "step": 4396 }, { "epoch": 3.049237170596394, "grad_norm": 0.37943102657769573, "learning_rate": 3.96304628208523e-06, "loss": 0.381, "step": 4397 }, { "epoch": 3.0499306518723994, "grad_norm": 0.3449115428865669, "learning_rate": 3.960678327936693e-06, "loss": 0.3368, "step": 4398 }, { "epoch": 3.050624133148405, "grad_norm": 0.38458165780369025, "learning_rate": 3.95831061739763e-06, "loss": 0.3573, "step": 4399 }, { "epoch": 3.0513176144244105, "grad_norm": 0.42200252453603304, "learning_rate": 3.955943151023014e-06, "loss": 0.3866, "step": 4400 }, { "epoch": 3.0520110957004163, "grad_norm": 0.3748800788726624, "learning_rate": 3.95357592936776e-06, "loss": 0.3662, "step": 4401 }, { "epoch": 3.0527045769764216, "grad_norm": 0.4440741913042754, "learning_rate": 3.951208952986731e-06, "loss": 0.3889, "step": 4402 }, { "epoch": 3.0533980582524274, "grad_norm": 0.7173710109753139, "learning_rate": 3.948842222434728e-06, "loss": 0.3632, "step": 4403 }, { "epoch": 3.0540915395284327, "grad_norm": 0.40818919279684757, "learning_rate": 3.9464757382664945e-06, "loss": 0.4191, "step": 4404 }, { "epoch": 3.0547850208044385, "grad_norm": 0.3904329790032371, "learning_rate": 3.944109501036717e-06, "loss": 0.407, "step": 4405 }, { "epoch": 3.055478502080444, "grad_norm": 0.40606757353672723, "learning_rate": 3.941743511300026e-06, "loss": 0.359, "step": 4406 }, { "epoch": 3.0561719833564496, "grad_norm": 0.6589194010129817, "learning_rate": 3.939377769610993e-06, "loss": 0.4049, "step": 4407 }, { "epoch": 3.056865464632455, "grad_norm": 0.3872819681503515, "learning_rate": 3.9370122765241285e-06, "loss": 0.3978, "step": 4408 }, { "epoch": 3.0575589459084607, "grad_norm": 0.37702985940007444, "learning_rate": 3.934647032593888e-06, "loss": 0.3765, "step": 4409 }, { "epoch": 3.058252427184466, "grad_norm": 0.3687171886954747, "learning_rate": 3.932282038374667e-06, "loss": 0.3437, "step": 4410 }, { "epoch": 3.0589459084604718, "grad_norm": 0.38087865965786877, "learning_rate": 3.929917294420804e-06, "loss": 0.3966, "step": 4411 }, { "epoch": 3.059639389736477, "grad_norm": 0.4568339837713104, "learning_rate": 3.927552801286578e-06, "loss": 0.3811, "step": 4412 }, { "epoch": 3.060332871012483, "grad_norm": 0.4977526444265689, "learning_rate": 3.925188559526207e-06, "loss": 0.3698, "step": 4413 }, { "epoch": 3.061026352288488, "grad_norm": 0.403868775544694, "learning_rate": 3.922824569693852e-06, "loss": 0.3826, "step": 4414 }, { "epoch": 3.061719833564494, "grad_norm": 0.3637143719338198, "learning_rate": 3.920460832343619e-06, "loss": 0.3403, "step": 4415 }, { "epoch": 3.0624133148404993, "grad_norm": 0.38143344933604256, "learning_rate": 3.918097348029548e-06, "loss": 0.3652, "step": 4416 }, { "epoch": 3.063106796116505, "grad_norm": 0.38097375495634567, "learning_rate": 3.915734117305624e-06, "loss": 0.425, "step": 4417 }, { "epoch": 3.0638002773925104, "grad_norm": 0.3758435975327286, "learning_rate": 3.913371140725769e-06, "loss": 0.402, "step": 4418 }, { "epoch": 3.064493758668516, "grad_norm": 0.43236393960817493, "learning_rate": 3.911008418843849e-06, "loss": 0.4165, "step": 4419 }, { "epoch": 3.0651872399445215, "grad_norm": 0.46155058807197424, "learning_rate": 3.90864595221367e-06, "loss": 0.3354, "step": 4420 }, { "epoch": 3.0658807212205272, "grad_norm": 0.3912992436195119, "learning_rate": 3.906283741388974e-06, "loss": 0.361, "step": 4421 }, { "epoch": 3.0665742024965326, "grad_norm": 0.33556236693293084, "learning_rate": 3.903921786923447e-06, "loss": 0.3226, "step": 4422 }, { "epoch": 3.0672676837725383, "grad_norm": 0.374569138102664, "learning_rate": 3.901560089370717e-06, "loss": 0.3496, "step": 4423 }, { "epoch": 3.0679611650485437, "grad_norm": 0.38838078368995566, "learning_rate": 3.899198649284348e-06, "loss": 0.379, "step": 4424 }, { "epoch": 3.0686546463245494, "grad_norm": 0.44914310878085484, "learning_rate": 3.896837467217842e-06, "loss": 0.3542, "step": 4425 }, { "epoch": 3.0693481276005548, "grad_norm": 0.3807961890905395, "learning_rate": 3.894476543724643e-06, "loss": 0.3576, "step": 4426 }, { "epoch": 3.0700416088765605, "grad_norm": 0.372374503023027, "learning_rate": 3.8921158793581375e-06, "loss": 0.3156, "step": 4427 }, { "epoch": 3.070735090152566, "grad_norm": 0.40164708871920485, "learning_rate": 3.889755474671645e-06, "loss": 0.3544, "step": 4428 }, { "epoch": 3.0714285714285716, "grad_norm": 0.38329865374235583, "learning_rate": 3.887395330218429e-06, "loss": 0.3704, "step": 4429 }, { "epoch": 3.072122052704577, "grad_norm": 0.3604012596897037, "learning_rate": 3.88503544655169e-06, "loss": 0.3635, "step": 4430 }, { "epoch": 3.0728155339805827, "grad_norm": 0.36037728141675845, "learning_rate": 3.882675824224565e-06, "loss": 0.3449, "step": 4431 }, { "epoch": 3.073509015256588, "grad_norm": 0.5244135197845126, "learning_rate": 3.880316463790137e-06, "loss": 0.3926, "step": 4432 }, { "epoch": 3.074202496532594, "grad_norm": 0.4424754702166302, "learning_rate": 3.8779573658014204e-06, "loss": 0.4037, "step": 4433 }, { "epoch": 3.074895977808599, "grad_norm": 0.4151757387406011, "learning_rate": 3.8755985308113705e-06, "loss": 0.3547, "step": 4434 }, { "epoch": 3.075589459084605, "grad_norm": 0.4122879892622112, "learning_rate": 3.873239959372883e-06, "loss": 0.3903, "step": 4435 }, { "epoch": 3.0762829403606102, "grad_norm": 1.2072034955373634, "learning_rate": 3.870881652038788e-06, "loss": 0.362, "step": 4436 }, { "epoch": 3.076976421636616, "grad_norm": 0.4145341816521233, "learning_rate": 3.8685236093618574e-06, "loss": 0.3455, "step": 4437 }, { "epoch": 3.0776699029126213, "grad_norm": 0.4269189459427349, "learning_rate": 3.866165831894796e-06, "loss": 0.4168, "step": 4438 }, { "epoch": 3.078363384188627, "grad_norm": 0.39122307033715725, "learning_rate": 3.863808320190254e-06, "loss": 0.3492, "step": 4439 }, { "epoch": 3.0790568654646324, "grad_norm": 0.37944658350183813, "learning_rate": 3.861451074800809e-06, "loss": 0.3621, "step": 4440 }, { "epoch": 3.079750346740638, "grad_norm": 0.4260255566730035, "learning_rate": 3.85909409627899e-06, "loss": 0.402, "step": 4441 }, { "epoch": 3.0804438280166435, "grad_norm": 0.40241700719531964, "learning_rate": 3.856737385177252e-06, "loss": 0.3976, "step": 4442 }, { "epoch": 3.0811373092926493, "grad_norm": 0.41342816133891147, "learning_rate": 3.85438094204799e-06, "loss": 0.4092, "step": 4443 }, { "epoch": 3.0818307905686546, "grad_norm": 0.36065332055095284, "learning_rate": 3.852024767443539e-06, "loss": 0.3479, "step": 4444 }, { "epoch": 3.0825242718446604, "grad_norm": 0.4019472444148632, "learning_rate": 3.849668861916169e-06, "loss": 0.4026, "step": 4445 }, { "epoch": 3.0832177531206657, "grad_norm": 0.37167539357588597, "learning_rate": 3.847313226018085e-06, "loss": 0.4287, "step": 4446 }, { "epoch": 3.0839112343966715, "grad_norm": 0.4102882834671503, "learning_rate": 3.844957860301434e-06, "loss": 0.3743, "step": 4447 }, { "epoch": 3.084604715672677, "grad_norm": 0.4743165595528586, "learning_rate": 3.8426027653182955e-06, "loss": 0.3825, "step": 4448 }, { "epoch": 3.0852981969486826, "grad_norm": 0.4946917617137666, "learning_rate": 3.840247941620683e-06, "loss": 0.3436, "step": 4449 }, { "epoch": 3.085991678224688, "grad_norm": 0.42854191436665223, "learning_rate": 3.8378933897605574e-06, "loss": 0.3683, "step": 4450 }, { "epoch": 3.0866851595006937, "grad_norm": 0.4163173289963767, "learning_rate": 3.835539110289804e-06, "loss": 0.3881, "step": 4451 }, { "epoch": 3.087378640776699, "grad_norm": 0.38090885153552223, "learning_rate": 3.83318510376025e-06, "loss": 0.3892, "step": 4452 }, { "epoch": 3.0880721220527048, "grad_norm": 0.7474026311641647, "learning_rate": 3.8308313707236566e-06, "loss": 0.3915, "step": 4453 }, { "epoch": 3.08876560332871, "grad_norm": 0.416515367957737, "learning_rate": 3.828477911731722e-06, "loss": 0.4082, "step": 4454 }, { "epoch": 3.089459084604716, "grad_norm": 0.38225540896970156, "learning_rate": 3.826124727336082e-06, "loss": 0.3708, "step": 4455 }, { "epoch": 3.090152565880721, "grad_norm": 0.5631861252468322, "learning_rate": 3.823771818088303e-06, "loss": 0.3425, "step": 4456 }, { "epoch": 3.090846047156727, "grad_norm": 0.388759511851324, "learning_rate": 3.8214191845398925e-06, "loss": 0.4166, "step": 4457 }, { "epoch": 3.0915395284327323, "grad_norm": 0.3842554999377556, "learning_rate": 3.8190668272422875e-06, "loss": 0.375, "step": 4458 }, { "epoch": 3.092233009708738, "grad_norm": 0.39135523441734305, "learning_rate": 3.8167147467468655e-06, "loss": 0.3473, "step": 4459 }, { "epoch": 3.0929264909847434, "grad_norm": 0.3891529251234928, "learning_rate": 3.814362943604938e-06, "loss": 0.3962, "step": 4460 }, { "epoch": 3.093619972260749, "grad_norm": 0.3915371632914156, "learning_rate": 3.81201141836775e-06, "loss": 0.3664, "step": 4461 }, { "epoch": 3.0943134535367545, "grad_norm": 0.36919388784572393, "learning_rate": 3.8096601715864824e-06, "loss": 0.3342, "step": 4462 }, { "epoch": 3.0950069348127602, "grad_norm": 0.38259619053637206, "learning_rate": 3.8073092038122483e-06, "loss": 0.4063, "step": 4463 }, { "epoch": 3.0957004160887656, "grad_norm": 0.38933793033415215, "learning_rate": 3.8049585155961e-06, "loss": 0.3243, "step": 4464 }, { "epoch": 3.0963938973647713, "grad_norm": 0.4338739880744865, "learning_rate": 3.80260810748902e-06, "loss": 0.3677, "step": 4465 }, { "epoch": 3.0970873786407767, "grad_norm": 0.44818598840734036, "learning_rate": 3.8002579800419276e-06, "loss": 0.3578, "step": 4466 }, { "epoch": 3.0977808599167824, "grad_norm": 0.39606357351676086, "learning_rate": 3.7979081338056756e-06, "loss": 0.3842, "step": 4467 }, { "epoch": 3.0984743411927878, "grad_norm": 0.43804550380169477, "learning_rate": 3.795558569331051e-06, "loss": 0.3739, "step": 4468 }, { "epoch": 3.0991678224687935, "grad_norm": 0.38043404835259115, "learning_rate": 3.7932092871687754e-06, "loss": 0.3838, "step": 4469 }, { "epoch": 3.099861303744799, "grad_norm": 0.41754009678952525, "learning_rate": 3.7908602878695035e-06, "loss": 0.4116, "step": 4470 }, { "epoch": 3.1005547850208046, "grad_norm": 0.3848029699126805, "learning_rate": 3.7885115719838215e-06, "loss": 0.3609, "step": 4471 }, { "epoch": 3.10124826629681, "grad_norm": 0.5704725192060731, "learning_rate": 3.7861631400622544e-06, "loss": 0.43, "step": 4472 }, { "epoch": 3.1019417475728157, "grad_norm": 0.3992635890503639, "learning_rate": 3.7838149926552565e-06, "loss": 0.3809, "step": 4473 }, { "epoch": 3.102635228848821, "grad_norm": 0.3834299870363859, "learning_rate": 3.781467130313215e-06, "loss": 0.3954, "step": 4474 }, { "epoch": 3.103328710124827, "grad_norm": 0.38099413579015073, "learning_rate": 3.7791195535864543e-06, "loss": 0.3741, "step": 4475 }, { "epoch": 3.104022191400832, "grad_norm": 0.4581920847878088, "learning_rate": 3.7767722630252258e-06, "loss": 0.376, "step": 4476 }, { "epoch": 3.104715672676838, "grad_norm": 0.3818859346766297, "learning_rate": 3.774425259179722e-06, "loss": 0.3731, "step": 4477 }, { "epoch": 3.1054091539528432, "grad_norm": 0.38971293661134465, "learning_rate": 3.7720785426000616e-06, "loss": 0.3745, "step": 4478 }, { "epoch": 3.106102635228849, "grad_norm": 0.3741115171117569, "learning_rate": 3.7697321138362964e-06, "loss": 0.3521, "step": 4479 }, { "epoch": 3.1067961165048543, "grad_norm": 0.427065926538677, "learning_rate": 3.7673859734384153e-06, "loss": 0.4011, "step": 4480 }, { "epoch": 3.10748959778086, "grad_norm": 0.44365746140787743, "learning_rate": 3.765040121956335e-06, "loss": 0.4323, "step": 4481 }, { "epoch": 3.1081830790568654, "grad_norm": 0.43525203993773753, "learning_rate": 3.7626945599399057e-06, "loss": 0.336, "step": 4482 }, { "epoch": 3.108876560332871, "grad_norm": 0.3777284600854801, "learning_rate": 3.7603492879389093e-06, "loss": 0.4419, "step": 4483 }, { "epoch": 3.1095700416088765, "grad_norm": 0.3762643513558504, "learning_rate": 3.7580043065030635e-06, "loss": 0.4031, "step": 4484 }, { "epoch": 3.1102635228848823, "grad_norm": 0.3776638373639323, "learning_rate": 3.755659616182011e-06, "loss": 0.378, "step": 4485 }, { "epoch": 3.1109570041608876, "grad_norm": 0.375558249870944, "learning_rate": 3.753315217525334e-06, "loss": 0.3407, "step": 4486 }, { "epoch": 3.1116504854368934, "grad_norm": 0.36291641879972486, "learning_rate": 3.750971111082542e-06, "loss": 0.3569, "step": 4487 }, { "epoch": 3.1123439667128987, "grad_norm": 0.4381425116892885, "learning_rate": 3.748627297403074e-06, "loss": 0.3881, "step": 4488 }, { "epoch": 3.1130374479889045, "grad_norm": 0.4206945966092455, "learning_rate": 3.746283777036306e-06, "loss": 0.3238, "step": 4489 }, { "epoch": 3.11373092926491, "grad_norm": 0.36659217003345, "learning_rate": 3.743940550531541e-06, "loss": 0.3441, "step": 4490 }, { "epoch": 3.1144244105409156, "grad_norm": 0.641827426954368, "learning_rate": 3.7415976184380125e-06, "loss": 0.3801, "step": 4491 }, { "epoch": 3.115117891816921, "grad_norm": 0.39134077585171323, "learning_rate": 3.73925498130489e-06, "loss": 0.387, "step": 4492 }, { "epoch": 3.1158113730929267, "grad_norm": 0.38067619193735663, "learning_rate": 3.7369126396812694e-06, "loss": 0.3642, "step": 4493 }, { "epoch": 3.116504854368932, "grad_norm": 0.38495377604143216, "learning_rate": 3.7345705941161757e-06, "loss": 0.3639, "step": 4494 }, { "epoch": 3.1171983356449378, "grad_norm": 0.45917577105591284, "learning_rate": 3.732228845158572e-06, "loss": 0.4108, "step": 4495 }, { "epoch": 3.117891816920943, "grad_norm": 0.6862560129851659, "learning_rate": 3.729887393357345e-06, "loss": 0.4102, "step": 4496 }, { "epoch": 3.118585298196949, "grad_norm": 0.41394043543534825, "learning_rate": 3.7275462392613148e-06, "loss": 0.3654, "step": 4497 }, { "epoch": 3.119278779472954, "grad_norm": 0.390747317729945, "learning_rate": 3.725205383419231e-06, "loss": 0.362, "step": 4498 }, { "epoch": 3.11997226074896, "grad_norm": 0.43877439333836693, "learning_rate": 3.722864826379772e-06, "loss": 0.4057, "step": 4499 }, { "epoch": 3.1206657420249653, "grad_norm": 0.3773831358177694, "learning_rate": 3.7205245686915486e-06, "loss": 0.3507, "step": 4500 }, { "epoch": 3.121359223300971, "grad_norm": 0.42652590786524336, "learning_rate": 3.7181846109031007e-06, "loss": 0.4007, "step": 4501 }, { "epoch": 3.1220527045769764, "grad_norm": 0.4323533630204603, "learning_rate": 3.715844953562896e-06, "loss": 0.4064, "step": 4502 }, { "epoch": 3.122746185852982, "grad_norm": 0.4509676369289724, "learning_rate": 3.713505597219332e-06, "loss": 0.403, "step": 4503 }, { "epoch": 3.1234396671289875, "grad_norm": 0.3642261228219249, "learning_rate": 3.71116654242074e-06, "loss": 0.3487, "step": 4504 }, { "epoch": 3.1241331484049932, "grad_norm": 0.39544002363504477, "learning_rate": 3.7088277897153768e-06, "loss": 0.3329, "step": 4505 }, { "epoch": 3.1248266296809986, "grad_norm": 0.3979523920049841, "learning_rate": 3.706489339651429e-06, "loss": 0.3853, "step": 4506 }, { "epoch": 3.1255201109570043, "grad_norm": 0.42283589870748417, "learning_rate": 3.7041511927770117e-06, "loss": 0.3716, "step": 4507 }, { "epoch": 3.1262135922330097, "grad_norm": 0.38950004656984266, "learning_rate": 3.7018133496401688e-06, "loss": 0.2901, "step": 4508 }, { "epoch": 3.1269070735090154, "grad_norm": 0.3857349942222745, "learning_rate": 3.699475810788876e-06, "loss": 0.3959, "step": 4509 }, { "epoch": 3.1276005547850207, "grad_norm": 0.38254306786487735, "learning_rate": 3.6971385767710345e-06, "loss": 0.3679, "step": 4510 }, { "epoch": 3.1282940360610265, "grad_norm": 0.3699064806571636, "learning_rate": 3.694801648134474e-06, "loss": 0.3671, "step": 4511 }, { "epoch": 3.128987517337032, "grad_norm": 0.37103069167123504, "learning_rate": 3.6924650254269545e-06, "loss": 0.3857, "step": 4512 }, { "epoch": 3.1296809986130376, "grad_norm": 0.40369084618412426, "learning_rate": 3.6901287091961626e-06, "loss": 0.4098, "step": 4513 }, { "epoch": 3.130374479889043, "grad_norm": 0.4151237828786567, "learning_rate": 3.687792699989716e-06, "loss": 0.4195, "step": 4514 }, { "epoch": 3.1310679611650487, "grad_norm": 0.40708269901780325, "learning_rate": 3.685456998355158e-06, "loss": 0.3905, "step": 4515 }, { "epoch": 3.131761442441054, "grad_norm": 0.4433310014494769, "learning_rate": 3.6831216048399576e-06, "loss": 0.3944, "step": 4516 }, { "epoch": 3.13245492371706, "grad_norm": 0.4360622441782407, "learning_rate": 3.680786519991516e-06, "loss": 0.3275, "step": 4517 }, { "epoch": 3.133148404993065, "grad_norm": 0.37538970177895226, "learning_rate": 3.678451744357161e-06, "loss": 0.3918, "step": 4518 }, { "epoch": 3.133841886269071, "grad_norm": 0.3703308227912376, "learning_rate": 3.6761172784841446e-06, "loss": 0.371, "step": 4519 }, { "epoch": 3.1345353675450762, "grad_norm": 0.4020934089664458, "learning_rate": 3.6737831229196506e-06, "loss": 0.3864, "step": 4520 }, { "epoch": 3.135228848821082, "grad_norm": 0.4086005509435353, "learning_rate": 3.671449278210787e-06, "loss": 0.4016, "step": 4521 }, { "epoch": 3.1359223300970873, "grad_norm": 0.3673901868730733, "learning_rate": 3.6691157449045915e-06, "loss": 0.382, "step": 4522 }, { "epoch": 3.136615811373093, "grad_norm": 0.4809131935124871, "learning_rate": 3.666782523548027e-06, "loss": 0.3446, "step": 4523 }, { "epoch": 3.1373092926490984, "grad_norm": 0.41684086141260485, "learning_rate": 3.664449614687983e-06, "loss": 0.3499, "step": 4524 }, { "epoch": 3.138002773925104, "grad_norm": 0.4036790077975126, "learning_rate": 3.6621170188712773e-06, "loss": 0.3315, "step": 4525 }, { "epoch": 3.1386962552011095, "grad_norm": 0.41920618795044234, "learning_rate": 3.6597847366446524e-06, "loss": 0.373, "step": 4526 }, { "epoch": 3.1393897364771153, "grad_norm": 0.4496728427869807, "learning_rate": 3.6574527685547802e-06, "loss": 0.3833, "step": 4527 }, { "epoch": 3.1400832177531206, "grad_norm": 0.39037248721873086, "learning_rate": 3.655121115148254e-06, "loss": 0.3908, "step": 4528 }, { "epoch": 3.1407766990291264, "grad_norm": 0.43033088505115946, "learning_rate": 3.6527897769716e-06, "loss": 0.3588, "step": 4529 }, { "epoch": 3.1414701803051317, "grad_norm": 0.3882364138862382, "learning_rate": 3.650458754571262e-06, "loss": 0.3889, "step": 4530 }, { "epoch": 3.1421636615811375, "grad_norm": 0.38693129603089405, "learning_rate": 3.6481280484936215e-06, "loss": 0.3548, "step": 4531 }, { "epoch": 3.142857142857143, "grad_norm": 0.401522254946418, "learning_rate": 3.6457976592849753e-06, "loss": 0.3837, "step": 4532 }, { "epoch": 3.1435506241331486, "grad_norm": 0.4144178746518509, "learning_rate": 3.643467587491549e-06, "loss": 0.3953, "step": 4533 }, { "epoch": 3.144244105409154, "grad_norm": 0.4026627646459669, "learning_rate": 3.6411378336594966e-06, "loss": 0.3769, "step": 4534 }, { "epoch": 3.1449375866851597, "grad_norm": 0.395956594977255, "learning_rate": 3.6388083983348948e-06, "loss": 0.3372, "step": 4535 }, { "epoch": 3.145631067961165, "grad_norm": 0.4663168141131961, "learning_rate": 3.636479282063745e-06, "loss": 0.3863, "step": 4536 }, { "epoch": 3.1463245492371708, "grad_norm": 0.4400011726696039, "learning_rate": 3.6341504853919778e-06, "loss": 0.3758, "step": 4537 }, { "epoch": 3.147018030513176, "grad_norm": 0.3921494050754728, "learning_rate": 3.631822008865445e-06, "loss": 0.3677, "step": 4538 }, { "epoch": 3.147711511789182, "grad_norm": 0.4016681240367152, "learning_rate": 3.6294938530299216e-06, "loss": 0.3851, "step": 4539 }, { "epoch": 3.148404993065187, "grad_norm": 0.35104120813762685, "learning_rate": 3.6271660184311164e-06, "loss": 0.3677, "step": 4540 }, { "epoch": 3.149098474341193, "grad_norm": 0.3988607916651478, "learning_rate": 3.624838505614653e-06, "loss": 0.3922, "step": 4541 }, { "epoch": 3.1497919556171983, "grad_norm": 0.43849015745767567, "learning_rate": 3.6225113151260848e-06, "loss": 0.4291, "step": 4542 }, { "epoch": 3.150485436893204, "grad_norm": 0.39884929207837433, "learning_rate": 3.6201844475108884e-06, "loss": 0.3746, "step": 4543 }, { "epoch": 3.1511789181692094, "grad_norm": 0.38140417716325825, "learning_rate": 3.6178579033144635e-06, "loss": 0.3868, "step": 4544 }, { "epoch": 3.151872399445215, "grad_norm": 0.4214282092022257, "learning_rate": 3.615531683082137e-06, "loss": 0.3756, "step": 4545 }, { "epoch": 3.1525658807212205, "grad_norm": 0.37454025023252646, "learning_rate": 3.613205787359157e-06, "loss": 0.3443, "step": 4546 }, { "epoch": 3.1532593619972262, "grad_norm": 0.4532746477859641, "learning_rate": 3.610880216690697e-06, "loss": 0.3905, "step": 4547 }, { "epoch": 3.1539528432732316, "grad_norm": 0.4506339576500096, "learning_rate": 3.6085549716218517e-06, "loss": 0.3577, "step": 4548 }, { "epoch": 3.1546463245492373, "grad_norm": 0.4472297946605434, "learning_rate": 3.6062300526976448e-06, "loss": 0.4038, "step": 4549 }, { "epoch": 3.1553398058252426, "grad_norm": 0.36535844144971125, "learning_rate": 3.6039054604630202e-06, "loss": 0.3519, "step": 4550 }, { "epoch": 3.1560332871012484, "grad_norm": 0.39295369856605805, "learning_rate": 3.601581195462845e-06, "loss": 0.3579, "step": 4551 }, { "epoch": 3.1567267683772537, "grad_norm": 0.37912670237497886, "learning_rate": 3.5992572582419094e-06, "loss": 0.3209, "step": 4552 }, { "epoch": 3.1574202496532595, "grad_norm": 0.38743517658601034, "learning_rate": 3.596933649344927e-06, "loss": 0.4002, "step": 4553 }, { "epoch": 3.158113730929265, "grad_norm": 0.3822231342830769, "learning_rate": 3.5946103693165367e-06, "loss": 0.3685, "step": 4554 }, { "epoch": 3.1588072122052706, "grad_norm": 0.39259526918622556, "learning_rate": 3.5922874187012977e-06, "loss": 0.3876, "step": 4555 }, { "epoch": 3.159500693481276, "grad_norm": 0.3523415802257681, "learning_rate": 3.589964798043691e-06, "loss": 0.368, "step": 4556 }, { "epoch": 3.1601941747572817, "grad_norm": 0.45927927993817286, "learning_rate": 3.5876425078881245e-06, "loss": 0.4185, "step": 4557 }, { "epoch": 3.160887656033287, "grad_norm": 0.40915345725167057, "learning_rate": 3.5853205487789245e-06, "loss": 0.3709, "step": 4558 }, { "epoch": 3.161581137309293, "grad_norm": 0.4478400318131345, "learning_rate": 3.5829989212603445e-06, "loss": 0.3992, "step": 4559 }, { "epoch": 3.162274618585298, "grad_norm": 0.43303004832747666, "learning_rate": 3.580677625876554e-06, "loss": 0.3973, "step": 4560 }, { "epoch": 3.162968099861304, "grad_norm": 0.36561616440696576, "learning_rate": 3.578356663171648e-06, "loss": 0.3916, "step": 4561 }, { "epoch": 3.163661581137309, "grad_norm": 0.3671723246093496, "learning_rate": 3.576036033689645e-06, "loss": 0.3623, "step": 4562 }, { "epoch": 3.164355062413315, "grad_norm": 0.4045849606754154, "learning_rate": 3.573715737974483e-06, "loss": 0.4287, "step": 4563 }, { "epoch": 3.1650485436893203, "grad_norm": 0.4009710230310148, "learning_rate": 3.5713957765700224e-06, "loss": 0.3872, "step": 4564 }, { "epoch": 3.165742024965326, "grad_norm": 0.42273243627138596, "learning_rate": 3.5690761500200445e-06, "loss": 0.4277, "step": 4565 }, { "epoch": 3.1664355062413314, "grad_norm": 0.47947504806208163, "learning_rate": 3.5667568588682523e-06, "loss": 0.3627, "step": 4566 }, { "epoch": 3.167128987517337, "grad_norm": 0.43195614980791464, "learning_rate": 3.5644379036582747e-06, "loss": 0.4079, "step": 4567 }, { "epoch": 3.1678224687933425, "grad_norm": 0.4149106884186393, "learning_rate": 3.5621192849336563e-06, "loss": 0.3718, "step": 4568 }, { "epoch": 3.1685159500693483, "grad_norm": 0.40573620487210077, "learning_rate": 3.5598010032378614e-06, "loss": 0.3739, "step": 4569 }, { "epoch": 3.1692094313453536, "grad_norm": 0.3873702181727803, "learning_rate": 3.557483059114283e-06, "loss": 0.3394, "step": 4570 }, { "epoch": 3.1699029126213594, "grad_norm": 0.38556620066222613, "learning_rate": 3.5551654531062283e-06, "loss": 0.3736, "step": 4571 }, { "epoch": 3.1705963938973647, "grad_norm": 0.4838950478437859, "learning_rate": 3.5528481857569276e-06, "loss": 0.4417, "step": 4572 }, { "epoch": 3.1712898751733705, "grad_norm": 0.4046176789685047, "learning_rate": 3.5505312576095295e-06, "loss": 0.3469, "step": 4573 }, { "epoch": 3.171983356449376, "grad_norm": 0.3881344136985285, "learning_rate": 3.5482146692071084e-06, "loss": 0.3879, "step": 4574 }, { "epoch": 3.1726768377253816, "grad_norm": 0.3861992773489338, "learning_rate": 3.545898421092653e-06, "loss": 0.3769, "step": 4575 }, { "epoch": 3.173370319001387, "grad_norm": 0.39701912747056467, "learning_rate": 3.5435825138090785e-06, "loss": 0.3885, "step": 4576 }, { "epoch": 3.1740638002773927, "grad_norm": 0.5519607197176933, "learning_rate": 3.5412669478992143e-06, "loss": 0.3682, "step": 4577 }, { "epoch": 3.174757281553398, "grad_norm": 0.40347950556372547, "learning_rate": 3.5389517239058126e-06, "loss": 0.3676, "step": 4578 }, { "epoch": 3.1754507628294038, "grad_norm": 0.3898340025145356, "learning_rate": 3.5366368423715457e-06, "loss": 0.3613, "step": 4579 }, { "epoch": 3.176144244105409, "grad_norm": 0.4204797053937529, "learning_rate": 3.534322303839005e-06, "loss": 0.3872, "step": 4580 }, { "epoch": 3.176837725381415, "grad_norm": 0.4243223771050655, "learning_rate": 3.5320081088507006e-06, "loss": 0.358, "step": 4581 }, { "epoch": 3.17753120665742, "grad_norm": 0.3678015990819988, "learning_rate": 3.5296942579490645e-06, "loss": 0.3539, "step": 4582 }, { "epoch": 3.178224687933426, "grad_norm": 0.3904807604473883, "learning_rate": 3.5273807516764456e-06, "loss": 0.3692, "step": 4583 }, { "epoch": 3.1789181692094313, "grad_norm": 0.3869752461794236, "learning_rate": 3.525067590575112e-06, "loss": 0.3662, "step": 4584 }, { "epoch": 3.179611650485437, "grad_norm": 0.39326699730368225, "learning_rate": 3.5227547751872548e-06, "loss": 0.4055, "step": 4585 }, { "epoch": 3.1803051317614424, "grad_norm": 0.4099980159577789, "learning_rate": 3.5204423060549794e-06, "loss": 0.3886, "step": 4586 }, { "epoch": 3.180998613037448, "grad_norm": 0.3549444392541635, "learning_rate": 3.518130183720312e-06, "loss": 0.3569, "step": 4587 }, { "epoch": 3.1816920943134535, "grad_norm": 0.4175436791370744, "learning_rate": 3.515818408725198e-06, "loss": 0.3676, "step": 4588 }, { "epoch": 3.1823855755894592, "grad_norm": 0.3585594232941696, "learning_rate": 3.5135069816115e-06, "loss": 0.3664, "step": 4589 }, { "epoch": 3.1830790568654646, "grad_norm": 0.370043642543432, "learning_rate": 3.511195902920998e-06, "loss": 0.3692, "step": 4590 }, { "epoch": 3.1837725381414703, "grad_norm": 0.38135068968923674, "learning_rate": 3.5088851731953956e-06, "loss": 0.3668, "step": 4591 }, { "epoch": 3.1844660194174756, "grad_norm": 0.3907094132964532, "learning_rate": 3.5065747929763093e-06, "loss": 0.3366, "step": 4592 }, { "epoch": 3.1851595006934814, "grad_norm": 0.4233589705480947, "learning_rate": 3.5042647628052733e-06, "loss": 0.3742, "step": 4593 }, { "epoch": 3.1858529819694867, "grad_norm": 0.4116355800782619, "learning_rate": 3.5019550832237458e-06, "loss": 0.405, "step": 4594 }, { "epoch": 3.1865464632454925, "grad_norm": 0.400903550984469, "learning_rate": 3.4996457547730985e-06, "loss": 0.3766, "step": 4595 }, { "epoch": 3.187239944521498, "grad_norm": 0.39370603520421876, "learning_rate": 3.49733677799462e-06, "loss": 0.375, "step": 4596 }, { "epoch": 3.1879334257975036, "grad_norm": 0.38725808482091406, "learning_rate": 3.4950281534295176e-06, "loss": 0.3668, "step": 4597 }, { "epoch": 3.188626907073509, "grad_norm": 0.49294027754251885, "learning_rate": 3.4927198816189156e-06, "loss": 0.3109, "step": 4598 }, { "epoch": 3.1893203883495147, "grad_norm": 0.4715187484089429, "learning_rate": 3.4904119631038585e-06, "loss": 0.4018, "step": 4599 }, { "epoch": 3.19001386962552, "grad_norm": 0.4041807094580422, "learning_rate": 3.488104398425304e-06, "loss": 0.3645, "step": 4600 }, { "epoch": 3.190707350901526, "grad_norm": 0.4142833713595007, "learning_rate": 3.485797188124127e-06, "loss": 0.3994, "step": 4601 }, { "epoch": 3.191400832177531, "grad_norm": 0.39521559316436344, "learning_rate": 3.4834903327411253e-06, "loss": 0.4013, "step": 4602 }, { "epoch": 3.192094313453537, "grad_norm": 0.621138806112336, "learning_rate": 3.4811838328170044e-06, "loss": 0.3708, "step": 4603 }, { "epoch": 3.192787794729542, "grad_norm": 0.4221170334782303, "learning_rate": 3.4788776888923947e-06, "loss": 0.3884, "step": 4604 }, { "epoch": 3.193481276005548, "grad_norm": 0.38681617623347475, "learning_rate": 3.4765719015078385e-06, "loss": 0.3368, "step": 4605 }, { "epoch": 3.1941747572815533, "grad_norm": 0.41633729932224134, "learning_rate": 3.4742664712037944e-06, "loss": 0.3231, "step": 4606 }, { "epoch": 3.194868238557559, "grad_norm": 0.3604596676437244, "learning_rate": 3.47196139852064e-06, "loss": 0.3383, "step": 4607 }, { "epoch": 3.1955617198335644, "grad_norm": 0.42882658298637893, "learning_rate": 3.469656683998668e-06, "loss": 0.3845, "step": 4608 }, { "epoch": 3.19625520110957, "grad_norm": 0.3828207706645611, "learning_rate": 3.4673523281780856e-06, "loss": 0.3577, "step": 4609 }, { "epoch": 3.1969486823855755, "grad_norm": 0.35462348082915773, "learning_rate": 3.4650483315990157e-06, "loss": 0.3643, "step": 4610 }, { "epoch": 3.1976421636615813, "grad_norm": 0.36150254945936683, "learning_rate": 3.4627446948015007e-06, "loss": 0.3662, "step": 4611 }, { "epoch": 3.1983356449375866, "grad_norm": 0.3937477429821671, "learning_rate": 3.4604414183254974e-06, "loss": 0.379, "step": 4612 }, { "epoch": 3.1990291262135924, "grad_norm": 0.3694025345050062, "learning_rate": 3.458138502710876e-06, "loss": 0.3601, "step": 4613 }, { "epoch": 3.1997226074895977, "grad_norm": 0.3885387581009498, "learning_rate": 3.4558359484974226e-06, "loss": 0.3689, "step": 4614 }, { "epoch": 3.2004160887656035, "grad_norm": 0.4140533461404799, "learning_rate": 3.4535337562248382e-06, "loss": 0.4066, "step": 4615 }, { "epoch": 3.201109570041609, "grad_norm": 0.6309648393434568, "learning_rate": 3.451231926432742e-06, "loss": 0.3584, "step": 4616 }, { "epoch": 3.2018030513176146, "grad_norm": 0.4944290627120866, "learning_rate": 3.4489304596606664e-06, "loss": 0.3371, "step": 4617 }, { "epoch": 3.20249653259362, "grad_norm": 0.4556730862357426, "learning_rate": 3.4466293564480562e-06, "loss": 0.4076, "step": 4618 }, { "epoch": 3.2031900138696257, "grad_norm": 0.38401356145804477, "learning_rate": 3.4443286173342737e-06, "loss": 0.3562, "step": 4619 }, { "epoch": 3.203883495145631, "grad_norm": 0.3727558156754104, "learning_rate": 3.4420282428585988e-06, "loss": 0.3721, "step": 4620 }, { "epoch": 3.2045769764216367, "grad_norm": 0.42192983817694546, "learning_rate": 3.4397282335602205e-06, "loss": 0.3593, "step": 4621 }, { "epoch": 3.205270457697642, "grad_norm": 0.4215612665330896, "learning_rate": 3.4374285899782444e-06, "loss": 0.3758, "step": 4622 }, { "epoch": 3.205963938973648, "grad_norm": 0.38203795863831636, "learning_rate": 3.435129312651688e-06, "loss": 0.3806, "step": 4623 }, { "epoch": 3.206657420249653, "grad_norm": 0.512997472099785, "learning_rate": 3.4328304021194905e-06, "loss": 0.3708, "step": 4624 }, { "epoch": 3.207350901525659, "grad_norm": 0.38846156156263434, "learning_rate": 3.430531858920495e-06, "loss": 0.3862, "step": 4625 }, { "epoch": 3.2080443828016643, "grad_norm": 0.3709959064371235, "learning_rate": 3.4282336835934647e-06, "loss": 0.3754, "step": 4626 }, { "epoch": 3.20873786407767, "grad_norm": 0.37436449041591935, "learning_rate": 3.425935876677077e-06, "loss": 0.3519, "step": 4627 }, { "epoch": 3.2094313453536754, "grad_norm": 0.43123123274058006, "learning_rate": 3.4236384387099174e-06, "loss": 0.3806, "step": 4628 }, { "epoch": 3.210124826629681, "grad_norm": 0.38365617875949104, "learning_rate": 3.421341370230493e-06, "loss": 0.39, "step": 4629 }, { "epoch": 3.2108183079056865, "grad_norm": 0.4015275316706064, "learning_rate": 3.4190446717772185e-06, "loss": 0.3946, "step": 4630 }, { "epoch": 3.2115117891816922, "grad_norm": 0.3760587210598077, "learning_rate": 3.4167483438884223e-06, "loss": 0.3467, "step": 4631 }, { "epoch": 3.2122052704576975, "grad_norm": 0.43362930010163175, "learning_rate": 3.4144523871023494e-06, "loss": 0.411, "step": 4632 }, { "epoch": 3.2128987517337033, "grad_norm": 0.38667502702388695, "learning_rate": 3.4121568019571528e-06, "loss": 0.3636, "step": 4633 }, { "epoch": 3.2135922330097086, "grad_norm": 0.4319684190430007, "learning_rate": 3.4098615889909025e-06, "loss": 0.3482, "step": 4634 }, { "epoch": 3.2142857142857144, "grad_norm": 0.43155431710589437, "learning_rate": 3.4075667487415785e-06, "loss": 0.4027, "step": 4635 }, { "epoch": 3.2149791955617197, "grad_norm": 0.45202120508256316, "learning_rate": 3.4052722817470767e-06, "loss": 0.4174, "step": 4636 }, { "epoch": 3.2156726768377255, "grad_norm": 0.3647606637363773, "learning_rate": 3.4029781885452007e-06, "loss": 0.3712, "step": 4637 }, { "epoch": 3.216366158113731, "grad_norm": 0.40252336097090957, "learning_rate": 3.400684469673673e-06, "loss": 0.4076, "step": 4638 }, { "epoch": 3.2170596393897366, "grad_norm": 0.41251079528321616, "learning_rate": 3.398391125670123e-06, "loss": 0.3832, "step": 4639 }, { "epoch": 3.217753120665742, "grad_norm": 0.433025357876695, "learning_rate": 3.3960981570720918e-06, "loss": 0.3585, "step": 4640 }, { "epoch": 3.2184466019417477, "grad_norm": 0.3987377400454232, "learning_rate": 3.3938055644170387e-06, "loss": 0.3563, "step": 4641 }, { "epoch": 3.219140083217753, "grad_norm": 0.3724300335347595, "learning_rate": 3.391513348242328e-06, "loss": 0.3402, "step": 4642 }, { "epoch": 3.219833564493759, "grad_norm": 0.4322215115821083, "learning_rate": 3.3892215090852387e-06, "loss": 0.3724, "step": 4643 }, { "epoch": 3.220527045769764, "grad_norm": 0.3985720750929115, "learning_rate": 3.3869300474829625e-06, "loss": 0.4023, "step": 4644 }, { "epoch": 3.22122052704577, "grad_norm": 0.3978605034531885, "learning_rate": 3.3846389639726007e-06, "loss": 0.398, "step": 4645 }, { "epoch": 3.221914008321775, "grad_norm": 0.4405848916175234, "learning_rate": 3.382348259091165e-06, "loss": 0.3993, "step": 4646 }, { "epoch": 3.222607489597781, "grad_norm": 0.39587299525288566, "learning_rate": 3.380057933375584e-06, "loss": 0.4011, "step": 4647 }, { "epoch": 3.2233009708737863, "grad_norm": 0.4420675634961044, "learning_rate": 3.37776798736269e-06, "loss": 0.36, "step": 4648 }, { "epoch": 3.223994452149792, "grad_norm": 0.6828239560594047, "learning_rate": 3.375478421589232e-06, "loss": 0.351, "step": 4649 }, { "epoch": 3.2246879334257974, "grad_norm": 0.3933581398346004, "learning_rate": 3.373189236591867e-06, "loss": 0.3705, "step": 4650 }, { "epoch": 3.225381414701803, "grad_norm": 0.40404088842264474, "learning_rate": 3.3709004329071613e-06, "loss": 0.374, "step": 4651 }, { "epoch": 3.2260748959778085, "grad_norm": 0.3742425957208491, "learning_rate": 3.368612011071597e-06, "loss": 0.3744, "step": 4652 }, { "epoch": 3.2267683772538143, "grad_norm": 0.3593800806284897, "learning_rate": 3.366323971621562e-06, "loss": 0.3139, "step": 4653 }, { "epoch": 3.2274618585298196, "grad_norm": 0.37775553551268687, "learning_rate": 3.3640363150933574e-06, "loss": 0.3681, "step": 4654 }, { "epoch": 3.2281553398058254, "grad_norm": 0.4205893592818195, "learning_rate": 3.361749042023189e-06, "loss": 0.3747, "step": 4655 }, { "epoch": 3.2288488210818307, "grad_norm": 0.39380798785953863, "learning_rate": 3.359462152947182e-06, "loss": 0.3979, "step": 4656 }, { "epoch": 3.2295423023578365, "grad_norm": 0.4170226723703579, "learning_rate": 3.357175648401366e-06, "loss": 0.4508, "step": 4657 }, { "epoch": 3.230235783633842, "grad_norm": 0.4188910446555926, "learning_rate": 3.3548895289216802e-06, "loss": 0.3853, "step": 4658 }, { "epoch": 3.2309292649098476, "grad_norm": 0.3963257745135273, "learning_rate": 3.3526037950439748e-06, "loss": 0.3628, "step": 4659 }, { "epoch": 3.231622746185853, "grad_norm": 0.3858475732876974, "learning_rate": 3.3503184473040074e-06, "loss": 0.3611, "step": 4660 }, { "epoch": 3.2323162274618586, "grad_norm": 0.49357399049790474, "learning_rate": 3.3480334862374484e-06, "loss": 0.4091, "step": 4661 }, { "epoch": 3.233009708737864, "grad_norm": 0.3614221256334901, "learning_rate": 3.345748912379878e-06, "loss": 0.3545, "step": 4662 }, { "epoch": 3.2337031900138697, "grad_norm": 0.376766513471375, "learning_rate": 3.34346472626678e-06, "loss": 0.3884, "step": 4663 }, { "epoch": 3.234396671289875, "grad_norm": 0.421475352248187, "learning_rate": 3.3411809284335527e-06, "loss": 0.3468, "step": 4664 }, { "epoch": 3.235090152565881, "grad_norm": 0.4049583045952764, "learning_rate": 3.338897519415502e-06, "loss": 0.3711, "step": 4665 }, { "epoch": 3.235783633841886, "grad_norm": 0.4096439404025271, "learning_rate": 3.336614499747844e-06, "loss": 0.3842, "step": 4666 }, { "epoch": 3.236477115117892, "grad_norm": 0.39925744089105675, "learning_rate": 3.3343318699657e-06, "loss": 0.382, "step": 4667 }, { "epoch": 3.2371705963938973, "grad_norm": 0.40684812844166546, "learning_rate": 3.3320496306041016e-06, "loss": 0.3792, "step": 4668 }, { "epoch": 3.237864077669903, "grad_norm": 0.38669919153425775, "learning_rate": 3.329767782197991e-06, "loss": 0.3842, "step": 4669 }, { "epoch": 3.2385575589459084, "grad_norm": 0.359739471700017, "learning_rate": 3.3274863252822155e-06, "loss": 0.3692, "step": 4670 }, { "epoch": 3.239251040221914, "grad_norm": 0.3821369711369388, "learning_rate": 3.325205260391532e-06, "loss": 0.3735, "step": 4671 }, { "epoch": 3.2399445214979194, "grad_norm": 0.39425492535221834, "learning_rate": 3.3229245880606063e-06, "loss": 0.3463, "step": 4672 }, { "epoch": 3.240638002773925, "grad_norm": 0.4021031385456516, "learning_rate": 3.32064430882401e-06, "loss": 0.3319, "step": 4673 }, { "epoch": 3.2413314840499305, "grad_norm": 0.4140595911675711, "learning_rate": 3.3183644232162264e-06, "loss": 0.3751, "step": 4674 }, { "epoch": 3.2420249653259363, "grad_norm": 0.7733574661774831, "learning_rate": 3.3160849317716436e-06, "loss": 0.3804, "step": 4675 }, { "epoch": 3.2427184466019416, "grad_norm": 0.6079882560885275, "learning_rate": 3.313805835024556e-06, "loss": 0.3749, "step": 4676 }, { "epoch": 3.2434119278779474, "grad_norm": 0.3599516991109779, "learning_rate": 3.31152713350917e-06, "loss": 0.3632, "step": 4677 }, { "epoch": 3.2441054091539527, "grad_norm": 0.3856308005868276, "learning_rate": 3.3092488277595956e-06, "loss": 0.3901, "step": 4678 }, { "epoch": 3.2447988904299585, "grad_norm": 0.38346741669811346, "learning_rate": 3.306970918309851e-06, "loss": 0.3877, "step": 4679 }, { "epoch": 3.245492371705964, "grad_norm": 0.41299761280186253, "learning_rate": 3.3046934056938597e-06, "loss": 0.4434, "step": 4680 }, { "epoch": 3.2461858529819696, "grad_norm": 0.37844739613202444, "learning_rate": 3.3024162904454584e-06, "loss": 0.4111, "step": 4681 }, { "epoch": 3.246879334257975, "grad_norm": 0.40472183159992986, "learning_rate": 3.300139573098381e-06, "loss": 0.3917, "step": 4682 }, { "epoch": 3.2475728155339807, "grad_norm": 0.4343151573089637, "learning_rate": 3.2978632541862788e-06, "loss": 0.3872, "step": 4683 }, { "epoch": 3.248266296809986, "grad_norm": 0.386539834709046, "learning_rate": 3.295587334242703e-06, "loss": 0.3422, "step": 4684 }, { "epoch": 3.248959778085992, "grad_norm": 0.41752111355890925, "learning_rate": 3.293311813801111e-06, "loss": 0.4358, "step": 4685 }, { "epoch": 3.249653259361997, "grad_norm": 0.5154575876616192, "learning_rate": 3.29103669339487e-06, "loss": 0.3778, "step": 4686 }, { "epoch": 3.250346740638003, "grad_norm": 0.3897439344895969, "learning_rate": 3.2887619735572517e-06, "loss": 0.4008, "step": 4687 }, { "epoch": 3.251040221914008, "grad_norm": 0.37212187353874293, "learning_rate": 3.286487654821432e-06, "loss": 0.3511, "step": 4688 }, { "epoch": 3.251733703190014, "grad_norm": 0.38096950234035915, "learning_rate": 3.2842137377204977e-06, "loss": 0.3933, "step": 4689 }, { "epoch": 3.2524271844660193, "grad_norm": 0.45556083132043235, "learning_rate": 3.2819402227874364e-06, "loss": 0.3588, "step": 4690 }, { "epoch": 3.253120665742025, "grad_norm": 0.46434375002141354, "learning_rate": 3.2796671105551425e-06, "loss": 0.3659, "step": 4691 }, { "epoch": 3.2538141470180304, "grad_norm": 0.71621304486904, "learning_rate": 3.2773944015564203e-06, "loss": 0.3612, "step": 4692 }, { "epoch": 3.254507628294036, "grad_norm": 0.46015280690996024, "learning_rate": 3.275122096323974e-06, "loss": 0.3706, "step": 4693 }, { "epoch": 3.2552011095700415, "grad_norm": 0.39217217473107296, "learning_rate": 3.272850195390417e-06, "loss": 0.3564, "step": 4694 }, { "epoch": 3.2558945908460473, "grad_norm": 0.3757902600375741, "learning_rate": 3.2705786992882656e-06, "loss": 0.3831, "step": 4695 }, { "epoch": 3.2565880721220526, "grad_norm": 0.44250048647734747, "learning_rate": 3.268307608549941e-06, "loss": 0.3645, "step": 4696 }, { "epoch": 3.2572815533980584, "grad_norm": 0.43052569761464365, "learning_rate": 3.2660369237077726e-06, "loss": 0.4026, "step": 4697 }, { "epoch": 3.2579750346740637, "grad_norm": 0.38384950579415383, "learning_rate": 3.2637666452939908e-06, "loss": 0.3712, "step": 4698 }, { "epoch": 3.2586685159500695, "grad_norm": 0.40359480650687335, "learning_rate": 3.2614967738407332e-06, "loss": 0.3735, "step": 4699 }, { "epoch": 3.259361997226075, "grad_norm": 0.406477739696617, "learning_rate": 3.2592273098800396e-06, "loss": 0.3656, "step": 4700 }, { "epoch": 3.2600554785020806, "grad_norm": 0.47092113457420953, "learning_rate": 3.2569582539438577e-06, "loss": 0.3872, "step": 4701 }, { "epoch": 3.260748959778086, "grad_norm": 0.42953117680258723, "learning_rate": 3.254689606564039e-06, "loss": 0.3771, "step": 4702 }, { "epoch": 3.2614424410540916, "grad_norm": 0.37939592440727415, "learning_rate": 3.252421368272336e-06, "loss": 0.3772, "step": 4703 }, { "epoch": 3.262135922330097, "grad_norm": 0.4047359199208624, "learning_rate": 3.250153539600407e-06, "loss": 0.3458, "step": 4704 }, { "epoch": 3.2628294036061027, "grad_norm": 0.40822260083803935, "learning_rate": 3.2478861210798153e-06, "loss": 0.4031, "step": 4705 }, { "epoch": 3.263522884882108, "grad_norm": 0.3976685892114422, "learning_rate": 3.245619113242028e-06, "loss": 0.3787, "step": 4706 }, { "epoch": 3.264216366158114, "grad_norm": 0.39611973753983676, "learning_rate": 3.243352516618415e-06, "loss": 0.3805, "step": 4707 }, { "epoch": 3.264909847434119, "grad_norm": 0.376861464491667, "learning_rate": 3.2410863317402486e-06, "loss": 0.343, "step": 4708 }, { "epoch": 3.265603328710125, "grad_norm": 0.43254684201661425, "learning_rate": 3.238820559138707e-06, "loss": 0.3848, "step": 4709 }, { "epoch": 3.2662968099861303, "grad_norm": 0.37562919250073, "learning_rate": 3.236555199344872e-06, "loss": 0.3735, "step": 4710 }, { "epoch": 3.266990291262136, "grad_norm": 0.4552139783968729, "learning_rate": 3.234290252889728e-06, "loss": 0.3882, "step": 4711 }, { "epoch": 3.2676837725381414, "grad_norm": 0.4104471229451528, "learning_rate": 3.2320257203041605e-06, "loss": 0.3632, "step": 4712 }, { "epoch": 3.268377253814147, "grad_norm": 0.4535476664720102, "learning_rate": 3.229761602118958e-06, "loss": 0.3797, "step": 4713 }, { "epoch": 3.2690707350901524, "grad_norm": 0.3961736462119493, "learning_rate": 3.2274978988648175e-06, "loss": 0.3479, "step": 4714 }, { "epoch": 3.269764216366158, "grad_norm": 0.3773652200780356, "learning_rate": 3.225234611072332e-06, "loss": 0.3328, "step": 4715 }, { "epoch": 3.2704576976421635, "grad_norm": 0.3630660874836001, "learning_rate": 3.2229717392719996e-06, "loss": 0.3514, "step": 4716 }, { "epoch": 3.2711511789181693, "grad_norm": 0.4070481699030177, "learning_rate": 3.220709283994222e-06, "loss": 0.3853, "step": 4717 }, { "epoch": 3.2718446601941746, "grad_norm": 0.39008883733768285, "learning_rate": 3.2184472457693005e-06, "loss": 0.3512, "step": 4718 }, { "epoch": 3.2725381414701804, "grad_norm": 0.4063513655394903, "learning_rate": 3.216185625127444e-06, "loss": 0.3846, "step": 4719 }, { "epoch": 3.2732316227461857, "grad_norm": 0.38393336996444766, "learning_rate": 3.2139244225987576e-06, "loss": 0.3675, "step": 4720 }, { "epoch": 3.2739251040221915, "grad_norm": 0.42296411593548733, "learning_rate": 3.2116636387132506e-06, "loss": 0.3863, "step": 4721 }, { "epoch": 3.274618585298197, "grad_norm": 0.3678992924450587, "learning_rate": 3.209403274000835e-06, "loss": 0.4057, "step": 4722 }, { "epoch": 3.2753120665742026, "grad_norm": 0.41732537123782387, "learning_rate": 3.2071433289913252e-06, "loss": 0.3494, "step": 4723 }, { "epoch": 3.276005547850208, "grad_norm": 0.4722948230226156, "learning_rate": 3.2048838042144337e-06, "loss": 0.3582, "step": 4724 }, { "epoch": 3.2766990291262137, "grad_norm": 0.3742099048030128, "learning_rate": 3.202624700199777e-06, "loss": 0.3717, "step": 4725 }, { "epoch": 3.277392510402219, "grad_norm": 0.3793621842235678, "learning_rate": 3.2003660174768746e-06, "loss": 0.3394, "step": 4726 }, { "epoch": 3.278085991678225, "grad_norm": 0.4103144771838663, "learning_rate": 3.198107756575142e-06, "loss": 0.3663, "step": 4727 }, { "epoch": 3.27877947295423, "grad_norm": 0.4395224447677697, "learning_rate": 3.195849918023903e-06, "loss": 0.3768, "step": 4728 }, { "epoch": 3.279472954230236, "grad_norm": 0.38681679074906994, "learning_rate": 3.1935925023523775e-06, "loss": 0.38, "step": 4729 }, { "epoch": 3.280166435506241, "grad_norm": 0.3828074876963442, "learning_rate": 3.191335510089685e-06, "loss": 0.3143, "step": 4730 }, { "epoch": 3.280859916782247, "grad_norm": 0.4169252349768072, "learning_rate": 3.1890789417648515e-06, "loss": 0.3636, "step": 4731 }, { "epoch": 3.2815533980582523, "grad_norm": 0.40541723504443294, "learning_rate": 3.1868227979067985e-06, "loss": 0.3896, "step": 4732 }, { "epoch": 3.282246879334258, "grad_norm": 0.38123370082559205, "learning_rate": 3.1845670790443495e-06, "loss": 0.3865, "step": 4733 }, { "epoch": 3.2829403606102634, "grad_norm": 0.40263127376607377, "learning_rate": 3.1823117857062297e-06, "loss": 0.4081, "step": 4734 }, { "epoch": 3.283633841886269, "grad_norm": 0.39593720799173643, "learning_rate": 3.1800569184210627e-06, "loss": 0.3935, "step": 4735 }, { "epoch": 3.2843273231622745, "grad_norm": 0.4232411873435769, "learning_rate": 3.177802477717372e-06, "loss": 0.3895, "step": 4736 }, { "epoch": 3.2850208044382803, "grad_norm": 0.460907159586181, "learning_rate": 3.1755484641235855e-06, "loss": 0.403, "step": 4737 }, { "epoch": 3.2857142857142856, "grad_norm": 0.4113030507654702, "learning_rate": 3.173294878168025e-06, "loss": 0.359, "step": 4738 }, { "epoch": 3.2864077669902914, "grad_norm": 0.3928490884002706, "learning_rate": 3.1710417203789155e-06, "loss": 0.395, "step": 4739 }, { "epoch": 3.2871012482662967, "grad_norm": 0.40122412846447003, "learning_rate": 3.1687889912843816e-06, "loss": 0.4225, "step": 4740 }, { "epoch": 3.2877947295423025, "grad_norm": 0.3930969110948373, "learning_rate": 3.1665366914124452e-06, "loss": 0.3925, "step": 4741 }, { "epoch": 3.2884882108183078, "grad_norm": 0.4605346063562092, "learning_rate": 3.1642848212910297e-06, "loss": 0.3973, "step": 4742 }, { "epoch": 3.2891816920943135, "grad_norm": 0.406540718644168, "learning_rate": 3.1620333814479583e-06, "loss": 0.3466, "step": 4743 }, { "epoch": 3.289875173370319, "grad_norm": 0.38605095746544155, "learning_rate": 3.15978237241095e-06, "loss": 0.3717, "step": 4744 }, { "epoch": 3.2905686546463246, "grad_norm": 0.4349248871930986, "learning_rate": 3.157531794707625e-06, "loss": 0.4058, "step": 4745 }, { "epoch": 3.29126213592233, "grad_norm": 0.42455338847461416, "learning_rate": 3.1552816488655042e-06, "loss": 0.3972, "step": 4746 }, { "epoch": 3.2919556171983357, "grad_norm": 0.42332846630236565, "learning_rate": 3.1530319354120058e-06, "loss": 0.3578, "step": 4747 }, { "epoch": 3.292649098474341, "grad_norm": 0.39867405952893936, "learning_rate": 3.150782654874446e-06, "loss": 0.3941, "step": 4748 }, { "epoch": 3.293342579750347, "grad_norm": 0.4216220441924548, "learning_rate": 3.148533807780038e-06, "loss": 0.3792, "step": 4749 }, { "epoch": 3.294036061026352, "grad_norm": 0.33972172852554966, "learning_rate": 3.146285394655896e-06, "loss": 0.3702, "step": 4750 }, { "epoch": 3.294729542302358, "grad_norm": 0.38013695173073153, "learning_rate": 3.144037416029034e-06, "loss": 0.3621, "step": 4751 }, { "epoch": 3.2954230235783633, "grad_norm": 0.6888525340981343, "learning_rate": 3.1417898724263598e-06, "loss": 0.4257, "step": 4752 }, { "epoch": 3.296116504854369, "grad_norm": 0.3772052478915104, "learning_rate": 3.1395427643746802e-06, "loss": 0.3725, "step": 4753 }, { "epoch": 3.2968099861303743, "grad_norm": 0.510334734999082, "learning_rate": 3.1372960924007027e-06, "loss": 0.378, "step": 4754 }, { "epoch": 3.29750346740638, "grad_norm": 0.4193102882342751, "learning_rate": 3.135049857031031e-06, "loss": 0.4035, "step": 4755 }, { "epoch": 3.2981969486823854, "grad_norm": 0.41504374375765757, "learning_rate": 3.1328040587921672e-06, "loss": 0.3601, "step": 4756 }, { "epoch": 3.298890429958391, "grad_norm": 0.46961796996100313, "learning_rate": 3.1305586982105097e-06, "loss": 0.3694, "step": 4757 }, { "epoch": 3.2995839112343965, "grad_norm": 0.38038206847740236, "learning_rate": 3.1283137758123523e-06, "loss": 0.3833, "step": 4758 }, { "epoch": 3.3002773925104023, "grad_norm": 0.4153138935633195, "learning_rate": 3.1260692921238917e-06, "loss": 0.3742, "step": 4759 }, { "epoch": 3.3009708737864076, "grad_norm": 0.39352053174162527, "learning_rate": 3.123825247671217e-06, "loss": 0.3966, "step": 4760 }, { "epoch": 3.3016643550624134, "grad_norm": 0.39420314251902044, "learning_rate": 3.1215816429803174e-06, "loss": 0.408, "step": 4761 }, { "epoch": 3.3023578363384187, "grad_norm": 0.4174036995541163, "learning_rate": 3.1193384785770755e-06, "loss": 0.3674, "step": 4762 }, { "epoch": 3.3030513176144245, "grad_norm": 0.501637802834254, "learning_rate": 3.1170957549872718e-06, "loss": 0.3761, "step": 4763 }, { "epoch": 3.30374479889043, "grad_norm": 0.4786328205287392, "learning_rate": 3.1148534727365894e-06, "loss": 0.3821, "step": 4764 }, { "epoch": 3.3044382801664356, "grad_norm": 0.3686093336736551, "learning_rate": 3.1126116323505996e-06, "loss": 0.3975, "step": 4765 }, { "epoch": 3.305131761442441, "grad_norm": 0.37392921484159614, "learning_rate": 3.110370234354773e-06, "loss": 0.3727, "step": 4766 }, { "epoch": 3.3058252427184467, "grad_norm": 0.5261589475294723, "learning_rate": 3.1081292792744793e-06, "loss": 0.3544, "step": 4767 }, { "epoch": 3.306518723994452, "grad_norm": 0.40050830384526626, "learning_rate": 3.1058887676349814e-06, "loss": 0.4168, "step": 4768 }, { "epoch": 3.307212205270458, "grad_norm": 0.39231050690104, "learning_rate": 3.103648699961438e-06, "loss": 0.3495, "step": 4769 }, { "epoch": 3.307905686546463, "grad_norm": 0.37504240401756256, "learning_rate": 3.101409076778904e-06, "loss": 0.342, "step": 4770 }, { "epoch": 3.308599167822469, "grad_norm": 0.3837277033299974, "learning_rate": 3.0991698986123343e-06, "loss": 0.3919, "step": 4771 }, { "epoch": 3.309292649098474, "grad_norm": 0.3643823548751013, "learning_rate": 3.096931165986571e-06, "loss": 0.3629, "step": 4772 }, { "epoch": 3.30998613037448, "grad_norm": 0.3665139520803244, "learning_rate": 3.094692879426362e-06, "loss": 0.3798, "step": 4773 }, { "epoch": 3.3106796116504853, "grad_norm": 0.368599050542166, "learning_rate": 3.0924550394563433e-06, "loss": 0.3737, "step": 4774 }, { "epoch": 3.311373092926491, "grad_norm": 0.3744984842661619, "learning_rate": 3.090217646601047e-06, "loss": 0.4023, "step": 4775 }, { "epoch": 3.3120665742024964, "grad_norm": 0.7820708735439355, "learning_rate": 3.0879807013849037e-06, "loss": 0.3911, "step": 4776 }, { "epoch": 3.312760055478502, "grad_norm": 0.37775588113662956, "learning_rate": 3.085744204332237e-06, "loss": 0.3765, "step": 4777 }, { "epoch": 3.3134535367545075, "grad_norm": 0.3806866399607705, "learning_rate": 3.083508155967264e-06, "loss": 0.3668, "step": 4778 }, { "epoch": 3.3141470180305133, "grad_norm": 0.3874645731776387, "learning_rate": 3.081272556814101e-06, "loss": 0.4059, "step": 4779 }, { "epoch": 3.3148404993065186, "grad_norm": 0.3763347121067024, "learning_rate": 3.0790374073967523e-06, "loss": 0.3739, "step": 4780 }, { "epoch": 3.3155339805825244, "grad_norm": 0.4099787943029592, "learning_rate": 3.0768027082391246e-06, "loss": 0.3769, "step": 4781 }, { "epoch": 3.3162274618585297, "grad_norm": 0.43572291503787564, "learning_rate": 3.074568459865014e-06, "loss": 0.3709, "step": 4782 }, { "epoch": 3.3169209431345354, "grad_norm": 0.435155025887218, "learning_rate": 3.07233466279811e-06, "loss": 0.3549, "step": 4783 }, { "epoch": 3.3176144244105408, "grad_norm": 0.3994661726058533, "learning_rate": 3.070101317562002e-06, "loss": 0.4443, "step": 4784 }, { "epoch": 3.3183079056865465, "grad_norm": 0.44061617332841746, "learning_rate": 3.0678684246801684e-06, "loss": 0.4357, "step": 4785 }, { "epoch": 3.319001386962552, "grad_norm": 0.4091944474569298, "learning_rate": 3.065635984675982e-06, "loss": 0.3866, "step": 4786 }, { "epoch": 3.3196948682385576, "grad_norm": 0.3735030393703658, "learning_rate": 3.0634039980727115e-06, "loss": 0.3694, "step": 4787 }, { "epoch": 3.320388349514563, "grad_norm": 0.3981062550907113, "learning_rate": 3.0611724653935184e-06, "loss": 0.409, "step": 4788 }, { "epoch": 3.3210818307905687, "grad_norm": 0.37682629797510875, "learning_rate": 3.058941387161456e-06, "loss": 0.3722, "step": 4789 }, { "epoch": 3.321775312066574, "grad_norm": 0.43033083528732036, "learning_rate": 3.0567107638994775e-06, "loss": 0.3731, "step": 4790 }, { "epoch": 3.32246879334258, "grad_norm": 0.4876268695971904, "learning_rate": 3.0544805961304203e-06, "loss": 0.3594, "step": 4791 }, { "epoch": 3.323162274618585, "grad_norm": 0.36190890433520706, "learning_rate": 3.0522508843770217e-06, "loss": 0.3665, "step": 4792 }, { "epoch": 3.323855755894591, "grad_norm": 0.3784287080428108, "learning_rate": 3.05002162916191e-06, "loss": 0.3573, "step": 4793 }, { "epoch": 3.3245492371705962, "grad_norm": 0.41364371948826995, "learning_rate": 3.0477928310076066e-06, "loss": 0.3925, "step": 4794 }, { "epoch": 3.325242718446602, "grad_norm": 0.35301376080114627, "learning_rate": 3.0455644904365234e-06, "loss": 0.3451, "step": 4795 }, { "epoch": 3.3259361997226073, "grad_norm": 0.3908244398272018, "learning_rate": 3.0433366079709705e-06, "loss": 0.3439, "step": 4796 }, { "epoch": 3.326629680998613, "grad_norm": 0.3793879081130786, "learning_rate": 3.0411091841331454e-06, "loss": 0.3678, "step": 4797 }, { "epoch": 3.3273231622746184, "grad_norm": 0.3869478050302743, "learning_rate": 3.0388822194451385e-06, "loss": 0.3837, "step": 4798 }, { "epoch": 3.328016643550624, "grad_norm": 0.4083021642711554, "learning_rate": 3.036655714428939e-06, "loss": 0.3455, "step": 4799 }, { "epoch": 3.3287101248266295, "grad_norm": 0.38226088788306095, "learning_rate": 3.034429669606419e-06, "loss": 0.3493, "step": 4800 }, { "epoch": 3.3294036061026353, "grad_norm": 0.4812896414464564, "learning_rate": 3.0322040854993508e-06, "loss": 0.3887, "step": 4801 }, { "epoch": 3.3300970873786406, "grad_norm": 0.38464444585631524, "learning_rate": 3.029978962629393e-06, "loss": 0.3692, "step": 4802 }, { "epoch": 3.3307905686546464, "grad_norm": 0.3615496548988666, "learning_rate": 3.0277543015180976e-06, "loss": 0.3929, "step": 4803 }, { "epoch": 3.3314840499306517, "grad_norm": 0.4377609914098718, "learning_rate": 3.0255301026869118e-06, "loss": 0.3566, "step": 4804 }, { "epoch": 3.3321775312066575, "grad_norm": 0.4003491932807638, "learning_rate": 3.02330636665717e-06, "loss": 0.3875, "step": 4805 }, { "epoch": 3.332871012482663, "grad_norm": 0.39813084971419965, "learning_rate": 3.0210830939501e-06, "loss": 0.3799, "step": 4806 }, { "epoch": 3.3335644937586686, "grad_norm": 0.4009090654200237, "learning_rate": 3.0188602850868186e-06, "loss": 0.3765, "step": 4807 }, { "epoch": 3.334257975034674, "grad_norm": 0.4211772999469095, "learning_rate": 3.0166379405883394e-06, "loss": 0.3624, "step": 4808 }, { "epoch": 3.3349514563106797, "grad_norm": 0.39689042013027537, "learning_rate": 3.0144160609755635e-06, "loss": 0.4124, "step": 4809 }, { "epoch": 3.335644937586685, "grad_norm": 0.4267410643883082, "learning_rate": 3.012194646769283e-06, "loss": 0.3651, "step": 4810 }, { "epoch": 3.336338418862691, "grad_norm": 0.38128040683865877, "learning_rate": 3.0099736984901806e-06, "loss": 0.3916, "step": 4811 }, { "epoch": 3.337031900138696, "grad_norm": 0.41167653379599123, "learning_rate": 3.00775321665883e-06, "loss": 0.3926, "step": 4812 }, { "epoch": 3.337725381414702, "grad_norm": 0.38515492164328397, "learning_rate": 3.0055332017956984e-06, "loss": 0.4075, "step": 4813 }, { "epoch": 3.338418862690707, "grad_norm": 0.3928173736721108, "learning_rate": 3.0033136544211387e-06, "loss": 0.387, "step": 4814 }, { "epoch": 3.339112343966713, "grad_norm": 0.4052209320438356, "learning_rate": 3.0010945750553975e-06, "loss": 0.3979, "step": 4815 }, { "epoch": 3.3398058252427183, "grad_norm": 0.38808232379758656, "learning_rate": 2.99887596421861e-06, "loss": 0.3647, "step": 4816 }, { "epoch": 3.340499306518724, "grad_norm": 0.38593592589078973, "learning_rate": 2.9966578224308053e-06, "loss": 0.3838, "step": 4817 }, { "epoch": 3.3411927877947294, "grad_norm": 0.38211435077861855, "learning_rate": 2.9944401502118987e-06, "loss": 0.4043, "step": 4818 }, { "epoch": 3.341886269070735, "grad_norm": 0.492156552703435, "learning_rate": 2.9922229480816956e-06, "loss": 0.3818, "step": 4819 }, { "epoch": 3.3425797503467405, "grad_norm": 0.4168359745761171, "learning_rate": 2.9900062165598916e-06, "loss": 0.3346, "step": 4820 }, { "epoch": 3.3432732316227463, "grad_norm": 0.42075187556768207, "learning_rate": 2.987789956166074e-06, "loss": 0.4033, "step": 4821 }, { "epoch": 3.3439667128987516, "grad_norm": 0.4143576158340521, "learning_rate": 2.9855741674197182e-06, "loss": 0.3969, "step": 4822 }, { "epoch": 3.3446601941747574, "grad_norm": 0.4120220938954012, "learning_rate": 2.983358850840187e-06, "loss": 0.3981, "step": 4823 }, { "epoch": 3.3453536754507627, "grad_norm": 0.40816273604396464, "learning_rate": 2.9811440069467367e-06, "loss": 0.4046, "step": 4824 }, { "epoch": 3.3460471567267684, "grad_norm": 0.4000688578661085, "learning_rate": 2.9789296362585084e-06, "loss": 0.3502, "step": 4825 }, { "epoch": 3.3467406380027738, "grad_norm": 0.41682558150661075, "learning_rate": 2.9767157392945378e-06, "loss": 0.398, "step": 4826 }, { "epoch": 3.3474341192787795, "grad_norm": 0.3921429805180197, "learning_rate": 2.9745023165737445e-06, "loss": 0.3713, "step": 4827 }, { "epoch": 3.348127600554785, "grad_norm": 0.36688239420464774, "learning_rate": 2.9722893686149377e-06, "loss": 0.3736, "step": 4828 }, { "epoch": 3.3488210818307906, "grad_norm": 0.3887522990258237, "learning_rate": 2.9700768959368196e-06, "loss": 0.3373, "step": 4829 }, { "epoch": 3.349514563106796, "grad_norm": 0.5145190725681971, "learning_rate": 2.967864899057975e-06, "loss": 0.4464, "step": 4830 }, { "epoch": 3.3502080443828017, "grad_norm": 0.4339586706330397, "learning_rate": 2.9656533784968804e-06, "loss": 0.4072, "step": 4831 }, { "epoch": 3.350901525658807, "grad_norm": 0.4060396537410531, "learning_rate": 2.9634423347718998e-06, "loss": 0.4047, "step": 4832 }, { "epoch": 3.351595006934813, "grad_norm": 0.3928970993760514, "learning_rate": 2.961231768401287e-06, "loss": 0.3436, "step": 4833 }, { "epoch": 3.352288488210818, "grad_norm": 0.4517601326555857, "learning_rate": 2.9590216799031814e-06, "loss": 0.4042, "step": 4834 }, { "epoch": 3.352981969486824, "grad_norm": 0.44926535318849076, "learning_rate": 2.9568120697956137e-06, "loss": 0.4021, "step": 4835 }, { "epoch": 3.3536754507628292, "grad_norm": 0.4066676413216299, "learning_rate": 2.954602938596499e-06, "loss": 0.4197, "step": 4836 }, { "epoch": 3.354368932038835, "grad_norm": 0.610090112849432, "learning_rate": 2.9523942868236414e-06, "loss": 0.3733, "step": 4837 }, { "epoch": 3.3550624133148403, "grad_norm": 0.4016903449405014, "learning_rate": 2.9501861149947347e-06, "loss": 0.3561, "step": 4838 }, { "epoch": 3.355755894590846, "grad_norm": 0.3941258843057235, "learning_rate": 2.9479784236273572e-06, "loss": 0.3414, "step": 4839 }, { "epoch": 3.3564493758668514, "grad_norm": 0.43253920163960613, "learning_rate": 2.945771213238975e-06, "loss": 0.3504, "step": 4840 }, { "epoch": 3.357142857142857, "grad_norm": 0.35929961615809525, "learning_rate": 2.9435644843469434e-06, "loss": 0.3674, "step": 4841 }, { "epoch": 3.3578363384188625, "grad_norm": 0.4007135177687099, "learning_rate": 2.9413582374685036e-06, "loss": 0.3476, "step": 4842 }, { "epoch": 3.3585298196948683, "grad_norm": 0.4080473708867387, "learning_rate": 2.939152473120781e-06, "loss": 0.3378, "step": 4843 }, { "epoch": 3.3592233009708736, "grad_norm": 0.3946858676555683, "learning_rate": 2.936947191820796e-06, "loss": 0.3891, "step": 4844 }, { "epoch": 3.3599167822468794, "grad_norm": 0.40830885659396526, "learning_rate": 2.934742394085447e-06, "loss": 0.3665, "step": 4845 }, { "epoch": 3.3606102635228847, "grad_norm": 0.3958600097382781, "learning_rate": 2.932538080431524e-06, "loss": 0.4472, "step": 4846 }, { "epoch": 3.3613037447988905, "grad_norm": 0.40731749546536183, "learning_rate": 2.9303342513757023e-06, "loss": 0.3624, "step": 4847 }, { "epoch": 3.361997226074896, "grad_norm": 0.39258631435835023, "learning_rate": 2.928130907434541e-06, "loss": 0.36, "step": 4848 }, { "epoch": 3.3626907073509016, "grad_norm": 0.4436248182024021, "learning_rate": 2.925928049124491e-06, "loss": 0.3855, "step": 4849 }, { "epoch": 3.363384188626907, "grad_norm": 0.4250106708461144, "learning_rate": 2.923725676961886e-06, "loss": 0.3486, "step": 4850 }, { "epoch": 3.3640776699029127, "grad_norm": 0.8206220779206658, "learning_rate": 2.9215237914629445e-06, "loss": 0.3843, "step": 4851 }, { "epoch": 3.364771151178918, "grad_norm": 0.34570622275469653, "learning_rate": 2.919322393143772e-06, "loss": 0.3341, "step": 4852 }, { "epoch": 3.3654646324549238, "grad_norm": 0.3837678150132038, "learning_rate": 2.9171214825203626e-06, "loss": 0.3754, "step": 4853 }, { "epoch": 3.366158113730929, "grad_norm": 0.39575493565391595, "learning_rate": 2.914921060108592e-06, "loss": 0.4418, "step": 4854 }, { "epoch": 3.366851595006935, "grad_norm": 0.42549432184618546, "learning_rate": 2.9127211264242244e-06, "loss": 0.3677, "step": 4855 }, { "epoch": 3.36754507628294, "grad_norm": 0.3822340361511247, "learning_rate": 2.9105216819829094e-06, "loss": 0.3625, "step": 4856 }, { "epoch": 3.368238557558946, "grad_norm": 0.36753243020092696, "learning_rate": 2.9083227273001784e-06, "loss": 0.3836, "step": 4857 }, { "epoch": 3.3689320388349513, "grad_norm": 0.504998469652746, "learning_rate": 2.906124262891451e-06, "loss": 0.3989, "step": 4858 }, { "epoch": 3.369625520110957, "grad_norm": 0.39073449430611673, "learning_rate": 2.9039262892720338e-06, "loss": 0.3717, "step": 4859 }, { "epoch": 3.3703190013869624, "grad_norm": 0.3863673296401061, "learning_rate": 2.9017288069571114e-06, "loss": 0.3783, "step": 4860 }, { "epoch": 3.371012482662968, "grad_norm": 0.3908072330064723, "learning_rate": 2.8995318164617614e-06, "loss": 0.3549, "step": 4861 }, { "epoch": 3.3717059639389735, "grad_norm": 0.45745098507551407, "learning_rate": 2.89733531830094e-06, "loss": 0.3748, "step": 4862 }, { "epoch": 3.3723994452149793, "grad_norm": 0.3936236154645762, "learning_rate": 2.8951393129894928e-06, "loss": 0.3571, "step": 4863 }, { "epoch": 3.3730929264909846, "grad_norm": 0.4247916075005123, "learning_rate": 2.8929438010421486e-06, "loss": 0.4084, "step": 4864 }, { "epoch": 3.3737864077669903, "grad_norm": 0.42947294425671784, "learning_rate": 2.8907487829735147e-06, "loss": 0.3714, "step": 4865 }, { "epoch": 3.3744798890429957, "grad_norm": 0.6270453252780668, "learning_rate": 2.888554259298092e-06, "loss": 0.3945, "step": 4866 }, { "epoch": 3.3751733703190014, "grad_norm": 0.46125027737791646, "learning_rate": 2.886360230530258e-06, "loss": 0.403, "step": 4867 }, { "epoch": 3.3758668515950068, "grad_norm": 0.416072507615972, "learning_rate": 2.8841666971842776e-06, "loss": 0.3726, "step": 4868 }, { "epoch": 3.3765603328710125, "grad_norm": 0.4009550156039696, "learning_rate": 2.881973659774302e-06, "loss": 0.4169, "step": 4869 }, { "epoch": 3.377253814147018, "grad_norm": 0.35906874390525434, "learning_rate": 2.8797811188143572e-06, "loss": 0.3707, "step": 4870 }, { "epoch": 3.3779472954230236, "grad_norm": 0.3779197767370708, "learning_rate": 2.8775890748183666e-06, "loss": 0.3243, "step": 4871 }, { "epoch": 3.378640776699029, "grad_norm": 0.39668593449340256, "learning_rate": 2.8753975283001232e-06, "loss": 0.3681, "step": 4872 }, { "epoch": 3.3793342579750347, "grad_norm": 0.42130463516499855, "learning_rate": 2.873206479773313e-06, "loss": 0.3405, "step": 4873 }, { "epoch": 3.38002773925104, "grad_norm": 0.38727094281194896, "learning_rate": 2.8710159297515027e-06, "loss": 0.4041, "step": 4874 }, { "epoch": 3.380721220527046, "grad_norm": 0.38543446372078244, "learning_rate": 2.8688258787481376e-06, "loss": 0.4067, "step": 4875 }, { "epoch": 3.381414701803051, "grad_norm": 0.3813864971120607, "learning_rate": 2.866636327276552e-06, "loss": 0.3725, "step": 4876 }, { "epoch": 3.382108183079057, "grad_norm": 0.3870088894913858, "learning_rate": 2.864447275849962e-06, "loss": 0.347, "step": 4877 }, { "epoch": 3.3828016643550622, "grad_norm": 0.35426576167665785, "learning_rate": 2.8622587249814625e-06, "loss": 0.3716, "step": 4878 }, { "epoch": 3.383495145631068, "grad_norm": 0.39258220662106075, "learning_rate": 2.860070675184036e-06, "loss": 0.369, "step": 4879 }, { "epoch": 3.3841886269070733, "grad_norm": 0.38958571880739534, "learning_rate": 2.8578831269705454e-06, "loss": 0.4157, "step": 4880 }, { "epoch": 3.384882108183079, "grad_norm": 0.40299059748768185, "learning_rate": 2.855696080853735e-06, "loss": 0.3986, "step": 4881 }, { "epoch": 3.3855755894590844, "grad_norm": 0.4756806532688594, "learning_rate": 2.853509537346236e-06, "loss": 0.3539, "step": 4882 }, { "epoch": 3.38626907073509, "grad_norm": 0.40915831012982906, "learning_rate": 2.8513234969605534e-06, "loss": 0.3881, "step": 4883 }, { "epoch": 3.3869625520110955, "grad_norm": 0.42051132510023564, "learning_rate": 2.8491379602090816e-06, "loss": 0.3443, "step": 4884 }, { "epoch": 3.3876560332871013, "grad_norm": 0.41574227309923295, "learning_rate": 2.8469529276040976e-06, "loss": 0.3659, "step": 4885 }, { "epoch": 3.3883495145631066, "grad_norm": 0.3801177312434935, "learning_rate": 2.8447683996577513e-06, "loss": 0.397, "step": 4886 }, { "epoch": 3.3890429958391124, "grad_norm": 0.3922759108182902, "learning_rate": 2.8425843768820838e-06, "loss": 0.3889, "step": 4887 }, { "epoch": 3.3897364771151177, "grad_norm": 0.3738034887792933, "learning_rate": 2.840400859789013e-06, "loss": 0.3271, "step": 4888 }, { "epoch": 3.3904299583911235, "grad_norm": 0.4141784960468706, "learning_rate": 2.838217848890341e-06, "loss": 0.381, "step": 4889 }, { "epoch": 3.391123439667129, "grad_norm": 0.38936755920522714, "learning_rate": 2.8360353446977505e-06, "loss": 0.3926, "step": 4890 }, { "epoch": 3.3918169209431346, "grad_norm": 0.4371766590483668, "learning_rate": 2.8338533477228007e-06, "loss": 0.3683, "step": 4891 }, { "epoch": 3.39251040221914, "grad_norm": 0.40790824562218736, "learning_rate": 2.8316718584769385e-06, "loss": 0.3726, "step": 4892 }, { "epoch": 3.3932038834951457, "grad_norm": 0.38681992444589886, "learning_rate": 2.829490877471491e-06, "loss": 0.3519, "step": 4893 }, { "epoch": 3.393897364771151, "grad_norm": 0.37556829310744894, "learning_rate": 2.8273104052176603e-06, "loss": 0.3824, "step": 4894 }, { "epoch": 3.3945908460471568, "grad_norm": 0.4443909455244169, "learning_rate": 2.8251304422265347e-06, "loss": 0.3719, "step": 4895 }, { "epoch": 3.395284327323162, "grad_norm": 0.4118663188699836, "learning_rate": 2.8229509890090843e-06, "loss": 0.3945, "step": 4896 }, { "epoch": 3.395977808599168, "grad_norm": 0.38264484392423737, "learning_rate": 2.8207720460761523e-06, "loss": 0.3732, "step": 4897 }, { "epoch": 3.396671289875173, "grad_norm": 0.3910796509452467, "learning_rate": 2.8185936139384727e-06, "loss": 0.3447, "step": 4898 }, { "epoch": 3.397364771151179, "grad_norm": 0.40475057627631883, "learning_rate": 2.81641569310665e-06, "loss": 0.3923, "step": 4899 }, { "epoch": 3.3980582524271843, "grad_norm": 0.46432757412272163, "learning_rate": 2.8142382840911747e-06, "loss": 0.3981, "step": 4900 }, { "epoch": 3.39875173370319, "grad_norm": 0.44219148564980687, "learning_rate": 2.8120613874024173e-06, "loss": 0.4332, "step": 4901 }, { "epoch": 3.3994452149791954, "grad_norm": 0.38875900013315995, "learning_rate": 2.809885003550623e-06, "loss": 0.3809, "step": 4902 }, { "epoch": 3.400138696255201, "grad_norm": 0.3999033233778416, "learning_rate": 2.8077091330459225e-06, "loss": 0.3422, "step": 4903 }, { "epoch": 3.4008321775312065, "grad_norm": 0.7729474720429104, "learning_rate": 2.805533776398326e-06, "loss": 0.3685, "step": 4904 }, { "epoch": 3.4015256588072122, "grad_norm": 0.39594443974714943, "learning_rate": 2.803358934117717e-06, "loss": 0.3821, "step": 4905 }, { "epoch": 3.4022191400832176, "grad_norm": 0.4427389632249658, "learning_rate": 2.8011846067138648e-06, "loss": 0.3678, "step": 4906 }, { "epoch": 3.4029126213592233, "grad_norm": 0.38383049263298413, "learning_rate": 2.7990107946964163e-06, "loss": 0.3722, "step": 4907 }, { "epoch": 3.4036061026352287, "grad_norm": 0.3771531191666602, "learning_rate": 2.7968374985748977e-06, "loss": 0.3965, "step": 4908 }, { "epoch": 3.4042995839112344, "grad_norm": 0.45903971744526006, "learning_rate": 2.794664718858715e-06, "loss": 0.3506, "step": 4909 }, { "epoch": 3.4049930651872398, "grad_norm": 0.41421234277571894, "learning_rate": 2.792492456057148e-06, "loss": 0.3829, "step": 4910 }, { "epoch": 3.4056865464632455, "grad_norm": 0.402475409613642, "learning_rate": 2.7903207106793646e-06, "loss": 0.4218, "step": 4911 }, { "epoch": 3.406380027739251, "grad_norm": 0.39327737538667384, "learning_rate": 2.7881494832344008e-06, "loss": 0.3793, "step": 4912 }, { "epoch": 3.4070735090152566, "grad_norm": 0.49336222269509616, "learning_rate": 2.7859787742311794e-06, "loss": 0.42, "step": 4913 }, { "epoch": 3.407766990291262, "grad_norm": 0.3943370551289641, "learning_rate": 2.7838085841785005e-06, "loss": 0.3867, "step": 4914 }, { "epoch": 3.4084604715672677, "grad_norm": 0.42733581194457265, "learning_rate": 2.7816389135850353e-06, "loss": 0.4147, "step": 4915 }, { "epoch": 3.409153952843273, "grad_norm": 0.38723319973204884, "learning_rate": 2.7794697629593457e-06, "loss": 0.3829, "step": 4916 }, { "epoch": 3.409847434119279, "grad_norm": 0.44312960703892645, "learning_rate": 2.777301132809861e-06, "loss": 0.4378, "step": 4917 }, { "epoch": 3.410540915395284, "grad_norm": 0.4333836906210958, "learning_rate": 2.775133023644893e-06, "loss": 0.3976, "step": 4918 }, { "epoch": 3.41123439667129, "grad_norm": 0.3848660625105505, "learning_rate": 2.7729654359726327e-06, "loss": 0.353, "step": 4919 }, { "epoch": 3.4119278779472952, "grad_norm": 0.3975276218469039, "learning_rate": 2.770798370301143e-06, "loss": 0.4022, "step": 4920 }, { "epoch": 3.412621359223301, "grad_norm": 0.3826030067414858, "learning_rate": 2.7686318271383717e-06, "loss": 0.3513, "step": 4921 }, { "epoch": 3.4133148404993063, "grad_norm": 0.5153097547808159, "learning_rate": 2.7664658069921415e-06, "loss": 0.4003, "step": 4922 }, { "epoch": 3.414008321775312, "grad_norm": 0.388803719858887, "learning_rate": 2.764300310370147e-06, "loss": 0.3933, "step": 4923 }, { "epoch": 3.4147018030513174, "grad_norm": 0.37902493217826716, "learning_rate": 2.762135337779969e-06, "loss": 0.4015, "step": 4924 }, { "epoch": 3.415395284327323, "grad_norm": 0.449218891198431, "learning_rate": 2.75997088972906e-06, "loss": 0.3987, "step": 4925 }, { "epoch": 3.4160887656033285, "grad_norm": 0.40938851019963973, "learning_rate": 2.757806966724752e-06, "loss": 0.3932, "step": 4926 }, { "epoch": 3.4167822468793343, "grad_norm": 0.4075548869080461, "learning_rate": 2.755643569274254e-06, "loss": 0.3754, "step": 4927 }, { "epoch": 3.4174757281553396, "grad_norm": 0.43239540728189507, "learning_rate": 2.753480697884647e-06, "loss": 0.3835, "step": 4928 }, { "epoch": 3.4181692094313454, "grad_norm": 0.465700034039619, "learning_rate": 2.751318353062894e-06, "loss": 0.393, "step": 4929 }, { "epoch": 3.4188626907073507, "grad_norm": 0.39435980690696476, "learning_rate": 2.7491565353158356e-06, "loss": 0.4192, "step": 4930 }, { "epoch": 3.4195561719833565, "grad_norm": 0.39235831769813967, "learning_rate": 2.7469952451501825e-06, "loss": 0.3703, "step": 4931 }, { "epoch": 3.420249653259362, "grad_norm": 0.4332464961711542, "learning_rate": 2.744834483072526e-06, "loss": 0.3784, "step": 4932 }, { "epoch": 3.4209431345353676, "grad_norm": 0.4361363296596932, "learning_rate": 2.7426742495893343e-06, "loss": 0.4181, "step": 4933 }, { "epoch": 3.421636615811373, "grad_norm": 0.4211794105331103, "learning_rate": 2.7405145452069505e-06, "loss": 0.3956, "step": 4934 }, { "epoch": 3.4223300970873787, "grad_norm": 0.3920133142444421, "learning_rate": 2.7383553704315946e-06, "loss": 0.3682, "step": 4935 }, { "epoch": 3.423023578363384, "grad_norm": 0.41823077310912277, "learning_rate": 2.736196725769359e-06, "loss": 0.3589, "step": 4936 }, { "epoch": 3.4237170596393898, "grad_norm": 0.507199572755019, "learning_rate": 2.734038611726215e-06, "loss": 0.3763, "step": 4937 }, { "epoch": 3.424410540915395, "grad_norm": 0.36561853832034596, "learning_rate": 2.731881028808012e-06, "loss": 0.333, "step": 4938 }, { "epoch": 3.425104022191401, "grad_norm": 0.37067240687619535, "learning_rate": 2.7297239775204674e-06, "loss": 0.3569, "step": 4939 }, { "epoch": 3.425797503467406, "grad_norm": 0.3661613510657694, "learning_rate": 2.7275674583691804e-06, "loss": 0.3578, "step": 4940 }, { "epoch": 3.426490984743412, "grad_norm": 0.3799890045841063, "learning_rate": 2.7254114718596253e-06, "loss": 0.3714, "step": 4941 }, { "epoch": 3.4271844660194173, "grad_norm": 0.4125765941311352, "learning_rate": 2.7232560184971437e-06, "loss": 0.393, "step": 4942 }, { "epoch": 3.427877947295423, "grad_norm": 0.39792676362304097, "learning_rate": 2.721101098786967e-06, "loss": 0.3662, "step": 4943 }, { "epoch": 3.4285714285714284, "grad_norm": 0.43528143961658444, "learning_rate": 2.718946713234185e-06, "loss": 0.3692, "step": 4944 }, { "epoch": 3.429264909847434, "grad_norm": 0.37779663047146134, "learning_rate": 2.7167928623437744e-06, "loss": 0.334, "step": 4945 }, { "epoch": 3.4299583911234395, "grad_norm": 0.3623910565481717, "learning_rate": 2.714639546620582e-06, "loss": 0.3706, "step": 4946 }, { "epoch": 3.4306518723994452, "grad_norm": 0.4217422480863894, "learning_rate": 2.7124867665693276e-06, "loss": 0.388, "step": 4947 }, { "epoch": 3.4313453536754506, "grad_norm": 0.38209248965279885, "learning_rate": 2.71033452269461e-06, "loss": 0.31, "step": 4948 }, { "epoch": 3.4320388349514563, "grad_norm": 0.35687687047638755, "learning_rate": 2.7081828155008953e-06, "loss": 0.3556, "step": 4949 }, { "epoch": 3.4327323162274617, "grad_norm": 0.38785585870490286, "learning_rate": 2.7060316454925305e-06, "loss": 0.3894, "step": 4950 }, { "epoch": 3.4334257975034674, "grad_norm": 0.404601808598807, "learning_rate": 2.7038810131737346e-06, "loss": 0.3734, "step": 4951 }, { "epoch": 3.4341192787794728, "grad_norm": 0.4191797723449136, "learning_rate": 2.7017309190486e-06, "loss": 0.4103, "step": 4952 }, { "epoch": 3.4348127600554785, "grad_norm": 0.39661398154454475, "learning_rate": 2.699581363621093e-06, "loss": 0.3202, "step": 4953 }, { "epoch": 3.435506241331484, "grad_norm": 0.4036795626786731, "learning_rate": 2.697432347395056e-06, "loss": 0.3618, "step": 4954 }, { "epoch": 3.4361997226074896, "grad_norm": 0.3989886522608835, "learning_rate": 2.695283870874199e-06, "loss": 0.3801, "step": 4955 }, { "epoch": 3.436893203883495, "grad_norm": 0.38679606268716427, "learning_rate": 2.693135934562113e-06, "loss": 0.3684, "step": 4956 }, { "epoch": 3.4375866851595007, "grad_norm": 0.40409128568604624, "learning_rate": 2.6909885389622547e-06, "loss": 0.4059, "step": 4957 }, { "epoch": 3.438280166435506, "grad_norm": 0.3771051473781267, "learning_rate": 2.68884168457796e-06, "loss": 0.378, "step": 4958 }, { "epoch": 3.438973647711512, "grad_norm": 0.3768436049736947, "learning_rate": 2.6866953719124365e-06, "loss": 0.3847, "step": 4959 }, { "epoch": 3.4396671289875176, "grad_norm": 0.4149233474542711, "learning_rate": 2.684549601468764e-06, "loss": 0.3822, "step": 4960 }, { "epoch": 3.440360610263523, "grad_norm": 0.4396941506657551, "learning_rate": 2.6824043737498978e-06, "loss": 0.4048, "step": 4961 }, { "epoch": 3.4410540915395282, "grad_norm": 0.39699085268813994, "learning_rate": 2.6802596892586595e-06, "loss": 0.3679, "step": 4962 }, { "epoch": 3.441747572815534, "grad_norm": 0.38707204105879717, "learning_rate": 2.6781155484977495e-06, "loss": 0.3818, "step": 4963 }, { "epoch": 3.4424410540915398, "grad_norm": 0.40384547582505415, "learning_rate": 2.6759719519697412e-06, "loss": 0.3858, "step": 4964 }, { "epoch": 3.443134535367545, "grad_norm": 0.36378350408684135, "learning_rate": 2.673828900177074e-06, "loss": 0.3171, "step": 4965 }, { "epoch": 3.4438280166435504, "grad_norm": 0.4095288700423232, "learning_rate": 2.671686393622066e-06, "loss": 0.3504, "step": 4966 }, { "epoch": 3.444521497919556, "grad_norm": 0.40593831978099953, "learning_rate": 2.6695444328069063e-06, "loss": 0.4187, "step": 4967 }, { "epoch": 3.445214979195562, "grad_norm": 0.3777510446737168, "learning_rate": 2.6674030182336496e-06, "loss": 0.3572, "step": 4968 }, { "epoch": 3.4459084604715673, "grad_norm": 0.38471942821892796, "learning_rate": 2.6652621504042366e-06, "loss": 0.3721, "step": 4969 }, { "epoch": 3.4466019417475726, "grad_norm": 0.5303866671587186, "learning_rate": 2.6631218298204643e-06, "loss": 0.3707, "step": 4970 }, { "epoch": 3.4472954230235784, "grad_norm": 0.38591395865593525, "learning_rate": 2.6609820569840106e-06, "loss": 0.375, "step": 4971 }, { "epoch": 3.447988904299584, "grad_norm": 0.43278824202581095, "learning_rate": 2.6588428323964243e-06, "loss": 0.3654, "step": 4972 }, { "epoch": 3.4486823855755895, "grad_norm": 0.48739101326818984, "learning_rate": 2.656704156559121e-06, "loss": 0.3612, "step": 4973 }, { "epoch": 3.449375866851595, "grad_norm": 0.416971104948852, "learning_rate": 2.6545660299733923e-06, "loss": 0.3834, "step": 4974 }, { "epoch": 3.4500693481276006, "grad_norm": 0.41300738954943433, "learning_rate": 2.652428453140402e-06, "loss": 0.3728, "step": 4975 }, { "epoch": 3.4507628294036063, "grad_norm": 0.36040023295863355, "learning_rate": 2.6502914265611783e-06, "loss": 0.3127, "step": 4976 }, { "epoch": 3.4514563106796117, "grad_norm": 0.36416957185809895, "learning_rate": 2.6481549507366266e-06, "loss": 0.3984, "step": 4977 }, { "epoch": 3.452149791955617, "grad_norm": 0.41709294981582845, "learning_rate": 2.6460190261675223e-06, "loss": 0.3884, "step": 4978 }, { "epoch": 3.4528432732316228, "grad_norm": 1.042961544375003, "learning_rate": 2.6438836533545092e-06, "loss": 0.3926, "step": 4979 }, { "epoch": 3.4535367545076285, "grad_norm": 0.480741388193357, "learning_rate": 2.641748832798107e-06, "loss": 0.4261, "step": 4980 }, { "epoch": 3.454230235783634, "grad_norm": 0.42416611325259135, "learning_rate": 2.639614564998696e-06, "loss": 0.382, "step": 4981 }, { "epoch": 3.454923717059639, "grad_norm": 0.3548475153760887, "learning_rate": 2.6374808504565363e-06, "loss": 0.3577, "step": 4982 }, { "epoch": 3.455617198335645, "grad_norm": 0.445091616710834, "learning_rate": 2.6353476896717574e-06, "loss": 0.3932, "step": 4983 }, { "epoch": 3.4563106796116507, "grad_norm": 0.39396270646454185, "learning_rate": 2.6332150831443524e-06, "loss": 0.3721, "step": 4984 }, { "epoch": 3.457004160887656, "grad_norm": 0.42989417337507274, "learning_rate": 2.631083031374191e-06, "loss": 0.3795, "step": 4985 }, { "epoch": 3.4576976421636614, "grad_norm": 0.4607573109464507, "learning_rate": 2.62895153486101e-06, "loss": 0.3979, "step": 4986 }, { "epoch": 3.458391123439667, "grad_norm": 0.37516883158470005, "learning_rate": 2.626820594104418e-06, "loss": 0.3632, "step": 4987 }, { "epoch": 3.459084604715673, "grad_norm": 0.40405539795301204, "learning_rate": 2.624690209603893e-06, "loss": 0.3531, "step": 4988 }, { "epoch": 3.4597780859916782, "grad_norm": 0.37632193303013134, "learning_rate": 2.622560381858778e-06, "loss": 0.375, "step": 4989 }, { "epoch": 3.4604715672676836, "grad_norm": 0.3572816828055843, "learning_rate": 2.620431111368291e-06, "loss": 0.3211, "step": 4990 }, { "epoch": 3.4611650485436893, "grad_norm": 0.42058442276817903, "learning_rate": 2.6183023986315202e-06, "loss": 0.3904, "step": 4991 }, { "epoch": 3.461858529819695, "grad_norm": 0.3799187792338103, "learning_rate": 2.6161742441474166e-06, "loss": 0.3723, "step": 4992 }, { "epoch": 3.4625520110957004, "grad_norm": 0.43632636713018264, "learning_rate": 2.6140466484148074e-06, "loss": 0.3779, "step": 4993 }, { "epoch": 3.4632454923717058, "grad_norm": 0.41831324602323344, "learning_rate": 2.6119196119323813e-06, "loss": 0.3568, "step": 4994 }, { "epoch": 3.4639389736477115, "grad_norm": 0.43314689696278474, "learning_rate": 2.6097931351987014e-06, "loss": 0.4197, "step": 4995 }, { "epoch": 3.4646324549237173, "grad_norm": 0.3778873958768719, "learning_rate": 2.6076672187122043e-06, "loss": 0.3615, "step": 4996 }, { "epoch": 3.4653259361997226, "grad_norm": 0.47570246175112035, "learning_rate": 2.6055418629711825e-06, "loss": 0.3961, "step": 4997 }, { "epoch": 3.466019417475728, "grad_norm": 0.4443709246799145, "learning_rate": 2.6034170684738065e-06, "loss": 0.3573, "step": 4998 }, { "epoch": 3.4667128987517337, "grad_norm": 0.4011150158884296, "learning_rate": 2.6012928357181145e-06, "loss": 0.3885, "step": 4999 }, { "epoch": 3.4674063800277395, "grad_norm": 0.3628191538033867, "learning_rate": 2.599169165202008e-06, "loss": 0.3405, "step": 5000 }, { "epoch": 3.468099861303745, "grad_norm": 0.37404681848891647, "learning_rate": 2.5970460574232636e-06, "loss": 0.3653, "step": 5001 }, { "epoch": 3.46879334257975, "grad_norm": 0.4508620958026842, "learning_rate": 2.594923512879518e-06, "loss": 0.4145, "step": 5002 }, { "epoch": 3.469486823855756, "grad_norm": 0.3770502950046798, "learning_rate": 2.592801532068283e-06, "loss": 0.349, "step": 5003 }, { "epoch": 3.4701803051317617, "grad_norm": 0.40674648087365345, "learning_rate": 2.5906801154869355e-06, "loss": 0.3761, "step": 5004 }, { "epoch": 3.470873786407767, "grad_norm": 0.4437004351728985, "learning_rate": 2.588559263632719e-06, "loss": 0.3744, "step": 5005 }, { "epoch": 3.4715672676837723, "grad_norm": 0.40667005781382637, "learning_rate": 2.586438977002749e-06, "loss": 0.3388, "step": 5006 }, { "epoch": 3.472260748959778, "grad_norm": 0.3999108972621037, "learning_rate": 2.584319256094001e-06, "loss": 0.3851, "step": 5007 }, { "epoch": 3.472954230235784, "grad_norm": 0.4262331894520866, "learning_rate": 2.582200101403324e-06, "loss": 0.3799, "step": 5008 }, { "epoch": 3.473647711511789, "grad_norm": 0.3845148925025503, "learning_rate": 2.5800815134274347e-06, "loss": 0.392, "step": 5009 }, { "epoch": 3.4743411927877945, "grad_norm": 0.38613127077001597, "learning_rate": 2.5779634926629103e-06, "loss": 0.3953, "step": 5010 }, { "epoch": 3.4750346740638003, "grad_norm": 0.3714306681631704, "learning_rate": 2.575846039606203e-06, "loss": 0.3167, "step": 5011 }, { "epoch": 3.475728155339806, "grad_norm": 0.3610781947722351, "learning_rate": 2.573729154753629e-06, "loss": 0.3823, "step": 5012 }, { "epoch": 3.4764216366158114, "grad_norm": 0.6946274530018958, "learning_rate": 2.571612838601365e-06, "loss": 0.3602, "step": 5013 }, { "epoch": 3.4771151178918167, "grad_norm": 0.46945705057369436, "learning_rate": 2.5694970916454686e-06, "loss": 0.3707, "step": 5014 }, { "epoch": 3.4778085991678225, "grad_norm": 0.4229556483807219, "learning_rate": 2.56738191438185e-06, "loss": 0.3836, "step": 5015 }, { "epoch": 3.4785020804438282, "grad_norm": 0.41856241165707003, "learning_rate": 2.565267307306292e-06, "loss": 0.33, "step": 5016 }, { "epoch": 3.4791955617198336, "grad_norm": 0.3624775573869822, "learning_rate": 2.563153270914446e-06, "loss": 0.389, "step": 5017 }, { "epoch": 3.479889042995839, "grad_norm": 0.38960483916856403, "learning_rate": 2.5610398057018235e-06, "loss": 0.3649, "step": 5018 }, { "epoch": 3.4805825242718447, "grad_norm": 0.4049643602143973, "learning_rate": 2.558926912163807e-06, "loss": 0.388, "step": 5019 }, { "epoch": 3.4812760055478504, "grad_norm": 0.3903420159153272, "learning_rate": 2.5568145907956443e-06, "loss": 0.3794, "step": 5020 }, { "epoch": 3.4819694868238558, "grad_norm": 0.4064956894266744, "learning_rate": 2.5547028420924454e-06, "loss": 0.3991, "step": 5021 }, { "epoch": 3.482662968099861, "grad_norm": 0.4072342619633535, "learning_rate": 2.5525916665491907e-06, "loss": 0.3513, "step": 5022 }, { "epoch": 3.483356449375867, "grad_norm": 0.369253497607641, "learning_rate": 2.550481064660724e-06, "loss": 0.3392, "step": 5023 }, { "epoch": 3.4840499306518726, "grad_norm": 0.634110116014436, "learning_rate": 2.548371036921756e-06, "loss": 0.3435, "step": 5024 }, { "epoch": 3.484743411927878, "grad_norm": 0.40864749564981667, "learning_rate": 2.5462615838268636e-06, "loss": 0.4043, "step": 5025 }, { "epoch": 3.4854368932038833, "grad_norm": 0.4161750802751289, "learning_rate": 2.544152705870483e-06, "loss": 0.3953, "step": 5026 }, { "epoch": 3.486130374479889, "grad_norm": 0.393875121572175, "learning_rate": 2.5420444035469218e-06, "loss": 0.4, "step": 5027 }, { "epoch": 3.486823855755895, "grad_norm": 0.36920507839132877, "learning_rate": 2.539936677350353e-06, "loss": 0.3184, "step": 5028 }, { "epoch": 3.4875173370319, "grad_norm": 0.3839595302637746, "learning_rate": 2.5378295277748087e-06, "loss": 0.3655, "step": 5029 }, { "epoch": 3.4882108183079055, "grad_norm": 0.3731071529634874, "learning_rate": 2.5357229553141904e-06, "loss": 0.3978, "step": 5030 }, { "epoch": 3.4889042995839112, "grad_norm": 0.40674519331511216, "learning_rate": 2.533616960462265e-06, "loss": 0.4112, "step": 5031 }, { "epoch": 3.489597780859917, "grad_norm": 0.38259029857683174, "learning_rate": 2.531511543712662e-06, "loss": 0.3743, "step": 5032 }, { "epoch": 3.4902912621359223, "grad_norm": 0.4530269312224613, "learning_rate": 2.5294067055588765e-06, "loss": 0.3712, "step": 5033 }, { "epoch": 3.4909847434119277, "grad_norm": 0.417386544534514, "learning_rate": 2.5273024464942654e-06, "loss": 0.3516, "step": 5034 }, { "epoch": 3.4916782246879334, "grad_norm": 0.4640925026518083, "learning_rate": 2.5251987670120527e-06, "loss": 0.3607, "step": 5035 }, { "epoch": 3.492371705963939, "grad_norm": 0.4054169974866148, "learning_rate": 2.523095667605327e-06, "loss": 0.3772, "step": 5036 }, { "epoch": 3.4930651872399445, "grad_norm": 0.42471608109592923, "learning_rate": 2.5209931487670364e-06, "loss": 0.4067, "step": 5037 }, { "epoch": 3.49375866851595, "grad_norm": 0.44490904198146786, "learning_rate": 2.51889121099e-06, "loss": 0.3734, "step": 5038 }, { "epoch": 3.4944521497919556, "grad_norm": 0.42878308948564786, "learning_rate": 2.516789854766893e-06, "loss": 0.368, "step": 5039 }, { "epoch": 3.4951456310679614, "grad_norm": 0.4172092134103728, "learning_rate": 2.5146890805902575e-06, "loss": 0.3575, "step": 5040 }, { "epoch": 3.4958391123439667, "grad_norm": 0.4330252916886809, "learning_rate": 2.5125888889525057e-06, "loss": 0.3891, "step": 5041 }, { "epoch": 3.496532593619972, "grad_norm": 0.44966801182943367, "learning_rate": 2.5104892803459024e-06, "loss": 0.3717, "step": 5042 }, { "epoch": 3.497226074895978, "grad_norm": 0.45401406890894613, "learning_rate": 2.508390255262583e-06, "loss": 0.3679, "step": 5043 }, { "epoch": 3.4979195561719836, "grad_norm": 0.4090479354873949, "learning_rate": 2.5062918141945412e-06, "loss": 0.3682, "step": 5044 }, { "epoch": 3.498613037447989, "grad_norm": 0.38637435812051035, "learning_rate": 2.5041939576336383e-06, "loss": 0.367, "step": 5045 }, { "epoch": 3.4993065187239942, "grad_norm": 0.4553382546991837, "learning_rate": 2.5020966860715978e-06, "loss": 0.3932, "step": 5046 }, { "epoch": 3.5, "grad_norm": 0.4139407927285363, "learning_rate": 2.5000000000000015e-06, "loss": 0.3892, "step": 5047 }, { "epoch": 3.5006934812760058, "grad_norm": 0.38301490117254405, "learning_rate": 2.497903899910299e-06, "loss": 0.3556, "step": 5048 }, { "epoch": 3.501386962552011, "grad_norm": 0.3917710927109867, "learning_rate": 2.4958083862938015e-06, "loss": 0.4084, "step": 5049 }, { "epoch": 3.5020804438280164, "grad_norm": 0.38857832797674746, "learning_rate": 2.4937134596416823e-06, "loss": 0.3733, "step": 5050 }, { "epoch": 3.502773925104022, "grad_norm": 0.3881380743929215, "learning_rate": 2.4916191204449785e-06, "loss": 0.3907, "step": 5051 }, { "epoch": 3.503467406380028, "grad_norm": 0.4486637336228026, "learning_rate": 2.4895253691945847e-06, "loss": 0.3558, "step": 5052 }, { "epoch": 3.5041608876560333, "grad_norm": 0.4097794795215577, "learning_rate": 2.487432206381262e-06, "loss": 0.3652, "step": 5053 }, { "epoch": 3.5048543689320386, "grad_norm": 1.3921924237059429, "learning_rate": 2.4853396324956358e-06, "loss": 0.43, "step": 5054 }, { "epoch": 3.5055478502080444, "grad_norm": 0.41838093334476806, "learning_rate": 2.4832476480281857e-06, "loss": 0.3524, "step": 5055 }, { "epoch": 3.50624133148405, "grad_norm": 0.41369193499130186, "learning_rate": 2.4811562534692597e-06, "loss": 0.3896, "step": 5056 }, { "epoch": 3.5069348127600555, "grad_norm": 0.4979055135049944, "learning_rate": 2.479065449309067e-06, "loss": 0.4396, "step": 5057 }, { "epoch": 3.507628294036061, "grad_norm": 0.39550964258047344, "learning_rate": 2.4769752360376723e-06, "loss": 0.414, "step": 5058 }, { "epoch": 3.5083217753120666, "grad_norm": 0.378927606463655, "learning_rate": 2.4748856141450132e-06, "loss": 0.3528, "step": 5059 }, { "epoch": 3.5090152565880723, "grad_norm": 0.3872949218780298, "learning_rate": 2.472796584120877e-06, "loss": 0.3634, "step": 5060 }, { "epoch": 3.5097087378640777, "grad_norm": 0.397521433111934, "learning_rate": 2.470708146454918e-06, "loss": 0.3983, "step": 5061 }, { "epoch": 3.510402219140083, "grad_norm": 0.38102694479794613, "learning_rate": 2.4686203016366535e-06, "loss": 0.3318, "step": 5062 }, { "epoch": 3.5110957004160888, "grad_norm": 0.399446659559631, "learning_rate": 2.4665330501554554e-06, "loss": 0.3685, "step": 5063 }, { "epoch": 3.5117891816920945, "grad_norm": 0.38685844927839425, "learning_rate": 2.464446392500562e-06, "loss": 0.3921, "step": 5064 }, { "epoch": 3.5124826629681, "grad_norm": 0.3749103434827653, "learning_rate": 2.462360329161073e-06, "loss": 0.3747, "step": 5065 }, { "epoch": 3.513176144244105, "grad_norm": 0.37880554878461997, "learning_rate": 2.4602748606259424e-06, "loss": 0.4149, "step": 5066 }, { "epoch": 3.513869625520111, "grad_norm": 0.5888943881940791, "learning_rate": 2.4581899873839903e-06, "loss": 0.4102, "step": 5067 }, { "epoch": 3.5145631067961167, "grad_norm": 0.4044519651208474, "learning_rate": 2.4561057099238973e-06, "loss": 0.3612, "step": 5068 }, { "epoch": 3.515256588072122, "grad_norm": 0.36750360054231845, "learning_rate": 2.4540220287342022e-06, "loss": 0.3566, "step": 5069 }, { "epoch": 3.5159500693481274, "grad_norm": 0.39329357708485335, "learning_rate": 2.451938944303306e-06, "loss": 0.4011, "step": 5070 }, { "epoch": 3.516643550624133, "grad_norm": 0.3828839657383846, "learning_rate": 2.449856457119466e-06, "loss": 0.3475, "step": 5071 }, { "epoch": 3.517337031900139, "grad_norm": 0.46731411794670946, "learning_rate": 2.447774567670803e-06, "loss": 0.3809, "step": 5072 }, { "epoch": 3.5180305131761442, "grad_norm": 0.4043809708549121, "learning_rate": 2.4456932764452995e-06, "loss": 0.4005, "step": 5073 }, { "epoch": 3.5187239944521496, "grad_norm": 0.37770022911936146, "learning_rate": 2.4436125839307907e-06, "loss": 0.3502, "step": 5074 }, { "epoch": 3.5194174757281553, "grad_norm": 0.44234399066626795, "learning_rate": 2.441532490614978e-06, "loss": 0.3422, "step": 5075 }, { "epoch": 3.520110957004161, "grad_norm": 0.3885938237966985, "learning_rate": 2.43945299698542e-06, "loss": 0.3714, "step": 5076 }, { "epoch": 3.5208044382801664, "grad_norm": 0.40177653501425653, "learning_rate": 2.4373741035295357e-06, "loss": 0.3961, "step": 5077 }, { "epoch": 3.5214979195561718, "grad_norm": 0.4000854666406545, "learning_rate": 2.435295810734604e-06, "loss": 0.4198, "step": 5078 }, { "epoch": 3.5221914008321775, "grad_norm": 0.6035082194702799, "learning_rate": 2.4332181190877573e-06, "loss": 0.3566, "step": 5079 }, { "epoch": 3.5228848821081833, "grad_norm": 0.4422150008665714, "learning_rate": 2.4311410290759945e-06, "loss": 0.3611, "step": 5080 }, { "epoch": 3.5235783633841886, "grad_norm": 0.41796860801440094, "learning_rate": 2.4290645411861717e-06, "loss": 0.3918, "step": 5081 }, { "epoch": 3.524271844660194, "grad_norm": 0.38059600356752527, "learning_rate": 2.4269886559049995e-06, "loss": 0.3654, "step": 5082 }, { "epoch": 3.5249653259361997, "grad_norm": 0.4385285805622044, "learning_rate": 2.4249133737190526e-06, "loss": 0.4324, "step": 5083 }, { "epoch": 3.5256588072122055, "grad_norm": 0.3824968793825981, "learning_rate": 2.4228386951147596e-06, "loss": 0.3807, "step": 5084 }, { "epoch": 3.526352288488211, "grad_norm": 0.4058825410200674, "learning_rate": 2.42076462057841e-06, "loss": 0.3788, "step": 5085 }, { "epoch": 3.527045769764216, "grad_norm": 0.408908026325552, "learning_rate": 2.418691150596158e-06, "loss": 0.3748, "step": 5086 }, { "epoch": 3.527739251040222, "grad_norm": 0.4041296655093561, "learning_rate": 2.416618285654003e-06, "loss": 0.3462, "step": 5087 }, { "epoch": 3.5284327323162277, "grad_norm": 0.42914773447142135, "learning_rate": 2.4145460262378145e-06, "loss": 0.387, "step": 5088 }, { "epoch": 3.529126213592233, "grad_norm": 0.37428116045003906, "learning_rate": 2.4124743728333106e-06, "loss": 0.3722, "step": 5089 }, { "epoch": 3.5298196948682383, "grad_norm": 0.36828207684842446, "learning_rate": 2.4104033259260737e-06, "loss": 0.3552, "step": 5090 }, { "epoch": 3.530513176144244, "grad_norm": 0.3821252503893683, "learning_rate": 2.408332886001545e-06, "loss": 0.3779, "step": 5091 }, { "epoch": 3.53120665742025, "grad_norm": 0.40320422676898987, "learning_rate": 2.4062630535450156e-06, "loss": 0.3897, "step": 5092 }, { "epoch": 3.531900138696255, "grad_norm": 0.3957273762510209, "learning_rate": 2.4041938290416416e-06, "loss": 0.3917, "step": 5093 }, { "epoch": 3.5325936199722605, "grad_norm": 0.4101799685605539, "learning_rate": 2.402125212976435e-06, "loss": 0.3782, "step": 5094 }, { "epoch": 3.5332871012482663, "grad_norm": 0.4153548487891646, "learning_rate": 2.4000572058342637e-06, "loss": 0.3845, "step": 5095 }, { "epoch": 3.533980582524272, "grad_norm": 0.4020625598523758, "learning_rate": 2.3979898080998546e-06, "loss": 0.3843, "step": 5096 }, { "epoch": 3.5346740638002774, "grad_norm": 0.4637139216704794, "learning_rate": 2.3959230202577893e-06, "loss": 0.3767, "step": 5097 }, { "epoch": 3.5353675450762827, "grad_norm": 0.39560653135499474, "learning_rate": 2.3938568427925073e-06, "loss": 0.3734, "step": 5098 }, { "epoch": 3.5360610263522885, "grad_norm": 0.41952487368460134, "learning_rate": 2.3917912761883092e-06, "loss": 0.3838, "step": 5099 }, { "epoch": 3.5367545076282942, "grad_norm": 0.4073243407179045, "learning_rate": 2.3897263209293446e-06, "loss": 0.3693, "step": 5100 }, { "epoch": 3.5374479889042996, "grad_norm": 0.3939945386639667, "learning_rate": 2.3876619774996263e-06, "loss": 0.3393, "step": 5101 }, { "epoch": 3.538141470180305, "grad_norm": 0.36974413315504706, "learning_rate": 2.3855982463830222e-06, "loss": 0.3259, "step": 5102 }, { "epoch": 3.5388349514563107, "grad_norm": 0.42464960403658675, "learning_rate": 2.3835351280632514e-06, "loss": 0.3199, "step": 5103 }, { "epoch": 3.5395284327323164, "grad_norm": 0.4219050841635451, "learning_rate": 2.3814726230239006e-06, "loss": 0.4182, "step": 5104 }, { "epoch": 3.5402219140083218, "grad_norm": 0.4384143436449698, "learning_rate": 2.379410731748401e-06, "loss": 0.3716, "step": 5105 }, { "epoch": 3.540915395284327, "grad_norm": 0.4283665946687072, "learning_rate": 2.3773494547200463e-06, "loss": 0.3936, "step": 5106 }, { "epoch": 3.541608876560333, "grad_norm": 0.3967184806789085, "learning_rate": 2.375288792421988e-06, "loss": 0.3334, "step": 5107 }, { "epoch": 3.5423023578363386, "grad_norm": 0.3948682884165533, "learning_rate": 2.3732287453372254e-06, "loss": 0.3801, "step": 5108 }, { "epoch": 3.542995839112344, "grad_norm": 0.40342021294617914, "learning_rate": 2.371169313948621e-06, "loss": 0.4084, "step": 5109 }, { "epoch": 3.5436893203883493, "grad_norm": 0.39350008152705535, "learning_rate": 2.3691104987388923e-06, "loss": 0.3347, "step": 5110 }, { "epoch": 3.544382801664355, "grad_norm": 0.41690554675472585, "learning_rate": 2.367052300190607e-06, "loss": 0.3863, "step": 5111 }, { "epoch": 3.545076282940361, "grad_norm": 0.43141064905132337, "learning_rate": 2.364994718786194e-06, "loss": 0.3758, "step": 5112 }, { "epoch": 3.545769764216366, "grad_norm": 0.40127986009187966, "learning_rate": 2.362937755007935e-06, "loss": 0.4047, "step": 5113 }, { "epoch": 3.5464632454923715, "grad_norm": 0.4010495150000716, "learning_rate": 2.360881409337968e-06, "loss": 0.3889, "step": 5114 }, { "epoch": 3.5471567267683772, "grad_norm": 0.4127465105466682, "learning_rate": 2.3588256822582874e-06, "loss": 0.3607, "step": 5115 }, { "epoch": 3.547850208044383, "grad_norm": 0.3774381045847109, "learning_rate": 2.3567705742507364e-06, "loss": 0.3763, "step": 5116 }, { "epoch": 3.5485436893203883, "grad_norm": 0.4120927121466516, "learning_rate": 2.3547160857970198e-06, "loss": 0.3948, "step": 5117 }, { "epoch": 3.5492371705963937, "grad_norm": 0.4292601858966053, "learning_rate": 2.352662217378696e-06, "loss": 0.3482, "step": 5118 }, { "epoch": 3.5499306518723994, "grad_norm": 0.5319920019326687, "learning_rate": 2.3506089694771737e-06, "loss": 0.3561, "step": 5119 }, { "epoch": 3.550624133148405, "grad_norm": 0.39384389412021664, "learning_rate": 2.3485563425737234e-06, "loss": 0.3281, "step": 5120 }, { "epoch": 3.5513176144244105, "grad_norm": 0.3933910607219121, "learning_rate": 2.34650433714946e-06, "loss": 0.3821, "step": 5121 }, { "epoch": 3.552011095700416, "grad_norm": 0.4656496497791498, "learning_rate": 2.3444529536853645e-06, "loss": 0.4006, "step": 5122 }, { "epoch": 3.5527045769764216, "grad_norm": 0.4354125644486449, "learning_rate": 2.342402192662266e-06, "loss": 0.3535, "step": 5123 }, { "epoch": 3.5533980582524274, "grad_norm": 0.37343046982874845, "learning_rate": 2.3403520545608442e-06, "loss": 0.3546, "step": 5124 }, { "epoch": 3.5540915395284327, "grad_norm": 0.37100827250947604, "learning_rate": 2.338302539861639e-06, "loss": 0.3589, "step": 5125 }, { "epoch": 3.554785020804438, "grad_norm": 0.3744864477167227, "learning_rate": 2.3362536490450434e-06, "loss": 0.3411, "step": 5126 }, { "epoch": 3.555478502080444, "grad_norm": 0.3908935070654357, "learning_rate": 2.3342053825912987e-06, "loss": 0.3888, "step": 5127 }, { "epoch": 3.5561719833564496, "grad_norm": 0.4093299461242767, "learning_rate": 2.3321577409805074e-06, "loss": 0.3393, "step": 5128 }, { "epoch": 3.556865464632455, "grad_norm": 0.40844912948944956, "learning_rate": 2.3301107246926187e-06, "loss": 0.4082, "step": 5129 }, { "epoch": 3.5575589459084602, "grad_norm": 0.3820327365173448, "learning_rate": 2.3280643342074377e-06, "loss": 0.401, "step": 5130 }, { "epoch": 3.558252427184466, "grad_norm": 0.44093755257143197, "learning_rate": 2.3260185700046295e-06, "loss": 0.3968, "step": 5131 }, { "epoch": 3.5589459084604718, "grad_norm": 0.3847255774776019, "learning_rate": 2.3239734325637007e-06, "loss": 0.3724, "step": 5132 }, { "epoch": 3.559639389736477, "grad_norm": 0.36905387048496696, "learning_rate": 2.3219289223640207e-06, "loss": 0.3823, "step": 5133 }, { "epoch": 3.5603328710124824, "grad_norm": 0.3920318487160868, "learning_rate": 2.319885039884804e-06, "loss": 0.3751, "step": 5134 }, { "epoch": 3.561026352288488, "grad_norm": 0.4568631125919066, "learning_rate": 2.3178417856051232e-06, "loss": 0.3909, "step": 5135 }, { "epoch": 3.561719833564494, "grad_norm": 0.3993058283619273, "learning_rate": 2.3157991600039055e-06, "loss": 0.4318, "step": 5136 }, { "epoch": 3.5624133148404993, "grad_norm": 0.3888013021125074, "learning_rate": 2.313757163559922e-06, "loss": 0.3148, "step": 5137 }, { "epoch": 3.5631067961165046, "grad_norm": 0.43380746426691436, "learning_rate": 2.3117157967518052e-06, "loss": 0.38, "step": 5138 }, { "epoch": 3.5638002773925104, "grad_norm": 0.3689918620136303, "learning_rate": 2.309675060058036e-06, "loss": 0.3817, "step": 5139 }, { "epoch": 3.564493758668516, "grad_norm": 0.4358658667147603, "learning_rate": 2.307634953956948e-06, "loss": 0.3935, "step": 5140 }, { "epoch": 3.5651872399445215, "grad_norm": 0.39093448447171747, "learning_rate": 2.3055954789267306e-06, "loss": 0.3829, "step": 5141 }, { "epoch": 3.565880721220527, "grad_norm": 0.5781377056891382, "learning_rate": 2.3035566354454163e-06, "loss": 0.3524, "step": 5142 }, { "epoch": 3.5665742024965326, "grad_norm": 0.393615679603404, "learning_rate": 2.301518423990899e-06, "loss": 0.3897, "step": 5143 }, { "epoch": 3.5672676837725383, "grad_norm": 0.4279025338548436, "learning_rate": 2.299480845040921e-06, "loss": 0.397, "step": 5144 }, { "epoch": 3.5679611650485437, "grad_norm": 0.40230372264558606, "learning_rate": 2.2974438990730734e-06, "loss": 0.3984, "step": 5145 }, { "epoch": 3.568654646324549, "grad_norm": 0.38299653364237374, "learning_rate": 2.2954075865648027e-06, "loss": 0.377, "step": 5146 }, { "epoch": 3.5693481276005548, "grad_norm": 0.37389128567166785, "learning_rate": 2.2933719079934064e-06, "loss": 0.3493, "step": 5147 }, { "epoch": 3.5700416088765605, "grad_norm": 0.38239529686987483, "learning_rate": 2.291336863836032e-06, "loss": 0.3627, "step": 5148 }, { "epoch": 3.570735090152566, "grad_norm": 0.4229452525792137, "learning_rate": 2.2893024545696822e-06, "loss": 0.3427, "step": 5149 }, { "epoch": 3.571428571428571, "grad_norm": 0.37419667452307964, "learning_rate": 2.2872686806712037e-06, "loss": 0.3416, "step": 5150 }, { "epoch": 3.572122052704577, "grad_norm": 0.38368312027831164, "learning_rate": 2.285235542617299e-06, "loss": 0.4175, "step": 5151 }, { "epoch": 3.5728155339805827, "grad_norm": 0.4458706093811005, "learning_rate": 2.283203040884524e-06, "loss": 0.4039, "step": 5152 }, { "epoch": 3.573509015256588, "grad_norm": 0.4468861626451133, "learning_rate": 2.2811711759492783e-06, "loss": 0.3766, "step": 5153 }, { "epoch": 3.5742024965325934, "grad_norm": 0.4091472015005601, "learning_rate": 2.2791399482878184e-06, "loss": 0.3722, "step": 5154 }, { "epoch": 3.574895977808599, "grad_norm": 0.37328416454054747, "learning_rate": 2.2771093583762517e-06, "loss": 0.338, "step": 5155 }, { "epoch": 3.575589459084605, "grad_norm": 0.37681455419911747, "learning_rate": 2.2750794066905268e-06, "loss": 0.3718, "step": 5156 }, { "epoch": 3.5762829403606102, "grad_norm": 0.4031006990308686, "learning_rate": 2.273050093706458e-06, "loss": 0.3773, "step": 5157 }, { "epoch": 3.5769764216366156, "grad_norm": 0.5014256774511401, "learning_rate": 2.271021419899696e-06, "loss": 0.3968, "step": 5158 }, { "epoch": 3.5776699029126213, "grad_norm": 0.374929393250059, "learning_rate": 2.2689933857457492e-06, "loss": 0.3706, "step": 5159 }, { "epoch": 3.578363384188627, "grad_norm": 0.39485848072786517, "learning_rate": 2.2669659917199755e-06, "loss": 0.3597, "step": 5160 }, { "epoch": 3.5790568654646324, "grad_norm": 0.5046269872671587, "learning_rate": 2.264939238297578e-06, "loss": 0.3977, "step": 5161 }, { "epoch": 3.5797503467406377, "grad_norm": 0.3932101896956029, "learning_rate": 2.2629131259536147e-06, "loss": 0.3884, "step": 5162 }, { "epoch": 3.5804438280166435, "grad_norm": 0.8090460602154435, "learning_rate": 2.2608876551629933e-06, "loss": 0.3655, "step": 5163 }, { "epoch": 3.5811373092926493, "grad_norm": 1.212908152197435, "learning_rate": 2.2588628264004663e-06, "loss": 0.3473, "step": 5164 }, { "epoch": 3.5818307905686546, "grad_norm": 0.3903522565004271, "learning_rate": 2.256838640140641e-06, "loss": 0.4017, "step": 5165 }, { "epoch": 3.58252427184466, "grad_norm": 0.3739733604519432, "learning_rate": 2.2548150968579712e-06, "loss": 0.373, "step": 5166 }, { "epoch": 3.5832177531206657, "grad_norm": 0.3800201802447021, "learning_rate": 2.2527921970267614e-06, "loss": 0.3788, "step": 5167 }, { "epoch": 3.5839112343966715, "grad_norm": 0.4120563299207978, "learning_rate": 2.2507699411211658e-06, "loss": 0.3403, "step": 5168 }, { "epoch": 3.584604715672677, "grad_norm": 0.4957857029161015, "learning_rate": 2.2487483296151836e-06, "loss": 0.3921, "step": 5169 }, { "epoch": 3.585298196948682, "grad_norm": 0.3930781419612483, "learning_rate": 2.2467273629826674e-06, "loss": 0.3803, "step": 5170 }, { "epoch": 3.585991678224688, "grad_norm": 0.4114561838764046, "learning_rate": 2.244707041697319e-06, "loss": 0.4147, "step": 5171 }, { "epoch": 3.5866851595006937, "grad_norm": 0.3704017677315467, "learning_rate": 2.242687366232683e-06, "loss": 0.313, "step": 5172 }, { "epoch": 3.587378640776699, "grad_norm": 0.38027636666060016, "learning_rate": 2.240668337062162e-06, "loss": 0.4064, "step": 5173 }, { "epoch": 3.5880721220527043, "grad_norm": 0.3975355553666112, "learning_rate": 2.238649954658994e-06, "loss": 0.3724, "step": 5174 }, { "epoch": 3.58876560332871, "grad_norm": 0.4489736751899162, "learning_rate": 2.2366322194962804e-06, "loss": 0.3733, "step": 5175 }, { "epoch": 3.589459084604716, "grad_norm": 0.4006245394676718, "learning_rate": 2.234615132046962e-06, "loss": 0.3374, "step": 5176 }, { "epoch": 3.590152565880721, "grad_norm": 0.4117978416065849, "learning_rate": 2.2325986927838286e-06, "loss": 0.3791, "step": 5177 }, { "epoch": 3.5908460471567265, "grad_norm": 0.43688854097432955, "learning_rate": 2.23058290217952e-06, "loss": 0.3162, "step": 5178 }, { "epoch": 3.5915395284327323, "grad_norm": 0.38144373688509364, "learning_rate": 2.2285677607065204e-06, "loss": 0.3404, "step": 5179 }, { "epoch": 3.592233009708738, "grad_norm": 0.38334174411156685, "learning_rate": 2.226553268837166e-06, "loss": 0.3931, "step": 5180 }, { "epoch": 3.5929264909847434, "grad_norm": 0.865409573635709, "learning_rate": 2.224539427043641e-06, "loss": 0.3609, "step": 5181 }, { "epoch": 3.5936199722607487, "grad_norm": 0.5822693629401792, "learning_rate": 2.2225262357979714e-06, "loss": 0.3555, "step": 5182 }, { "epoch": 3.5943134535367545, "grad_norm": 0.43446102175494017, "learning_rate": 2.2205136955720373e-06, "loss": 0.4107, "step": 5183 }, { "epoch": 3.5950069348127602, "grad_norm": 0.37732508892255723, "learning_rate": 2.218501806837563e-06, "loss": 0.3828, "step": 5184 }, { "epoch": 3.5957004160887656, "grad_norm": 0.40385338731973575, "learning_rate": 2.21649057006612e-06, "loss": 0.3169, "step": 5185 }, { "epoch": 3.596393897364771, "grad_norm": 0.4004502528288485, "learning_rate": 2.2144799857291305e-06, "loss": 0.3899, "step": 5186 }, { "epoch": 3.5970873786407767, "grad_norm": 0.41078660164304187, "learning_rate": 2.2124700542978566e-06, "loss": 0.3769, "step": 5187 }, { "epoch": 3.5977808599167824, "grad_norm": 0.4357819082768897, "learning_rate": 2.210460776243414e-06, "loss": 0.413, "step": 5188 }, { "epoch": 3.5984743411927878, "grad_norm": 0.44925926321763543, "learning_rate": 2.208452152036764e-06, "loss": 0.3736, "step": 5189 }, { "epoch": 3.599167822468793, "grad_norm": 0.3885083649446202, "learning_rate": 2.2064441821487107e-06, "loss": 0.385, "step": 5190 }, { "epoch": 3.599861303744799, "grad_norm": 0.39902687081781524, "learning_rate": 2.2044368670499093e-06, "loss": 0.375, "step": 5191 }, { "epoch": 3.6005547850208046, "grad_norm": 0.4080053013914103, "learning_rate": 2.20243020721086e-06, "loss": 0.3943, "step": 5192 }, { "epoch": 3.60124826629681, "grad_norm": 0.403542067467956, "learning_rate": 2.200424203101909e-06, "loss": 0.4101, "step": 5193 }, { "epoch": 3.6019417475728153, "grad_norm": 0.4155584753912239, "learning_rate": 2.1984188551932513e-06, "loss": 0.4447, "step": 5194 }, { "epoch": 3.602635228848821, "grad_norm": 0.37401766239256234, "learning_rate": 2.1964141639549217e-06, "loss": 0.3474, "step": 5195 }, { "epoch": 3.603328710124827, "grad_norm": 0.37833107582867914, "learning_rate": 2.1944101298568076e-06, "loss": 0.3278, "step": 5196 }, { "epoch": 3.604022191400832, "grad_norm": 0.41213190344062667, "learning_rate": 2.192406753368642e-06, "loss": 0.3648, "step": 5197 }, { "epoch": 3.6047156726768375, "grad_norm": 0.603651947577292, "learning_rate": 2.1904040349599986e-06, "loss": 0.3793, "step": 5198 }, { "epoch": 3.6054091539528432, "grad_norm": 0.3918556428222181, "learning_rate": 2.1884019751003003e-06, "loss": 0.3799, "step": 5199 }, { "epoch": 3.606102635228849, "grad_norm": 0.3712233854423895, "learning_rate": 2.186400574258819e-06, "loss": 0.3555, "step": 5200 }, { "epoch": 3.6067961165048543, "grad_norm": 0.4253416479267744, "learning_rate": 2.184399832904662e-06, "loss": 0.4025, "step": 5201 }, { "epoch": 3.6074895977808596, "grad_norm": 0.37654802991971503, "learning_rate": 2.182399751506797e-06, "loss": 0.3927, "step": 5202 }, { "epoch": 3.6081830790568654, "grad_norm": 0.36641420344233405, "learning_rate": 2.1804003305340217e-06, "loss": 0.3237, "step": 5203 }, { "epoch": 3.608876560332871, "grad_norm": 0.4157666780151188, "learning_rate": 2.178401570454989e-06, "loss": 0.3956, "step": 5204 }, { "epoch": 3.6095700416088765, "grad_norm": 0.423908334019587, "learning_rate": 2.1764034717381943e-06, "loss": 0.393, "step": 5205 }, { "epoch": 3.610263522884882, "grad_norm": 0.38338421385044325, "learning_rate": 2.1744060348519753e-06, "loss": 0.3736, "step": 5206 }, { "epoch": 3.6109570041608876, "grad_norm": 0.471407961354846, "learning_rate": 2.1724092602645177e-06, "loss": 0.3792, "step": 5207 }, { "epoch": 3.6116504854368934, "grad_norm": 0.4868353994286507, "learning_rate": 2.1704131484438523e-06, "loss": 0.4532, "step": 5208 }, { "epoch": 3.6123439667128987, "grad_norm": 0.3861177034846141, "learning_rate": 2.1684176998578506e-06, "loss": 0.3391, "step": 5209 }, { "epoch": 3.613037447988904, "grad_norm": 0.3927332943918114, "learning_rate": 2.1664229149742328e-06, "loss": 0.3354, "step": 5210 }, { "epoch": 3.61373092926491, "grad_norm": 0.4096638230494183, "learning_rate": 2.1644287942605618e-06, "loss": 0.3739, "step": 5211 }, { "epoch": 3.6144244105409156, "grad_norm": 0.4116400461050245, "learning_rate": 2.1624353381842457e-06, "loss": 0.3364, "step": 5212 }, { "epoch": 3.615117891816921, "grad_norm": 0.7713549114821813, "learning_rate": 2.1604425472125375e-06, "loss": 0.3669, "step": 5213 }, { "epoch": 3.615811373092926, "grad_norm": 0.38062243046752114, "learning_rate": 2.1584504218125293e-06, "loss": 0.3466, "step": 5214 }, { "epoch": 3.616504854368932, "grad_norm": 0.39060587592483537, "learning_rate": 2.156458962451164e-06, "loss": 0.3289, "step": 5215 }, { "epoch": 3.6171983356449378, "grad_norm": 0.42825069685408346, "learning_rate": 2.154468169595223e-06, "loss": 0.4044, "step": 5216 }, { "epoch": 3.617891816920943, "grad_norm": 0.4182024432537755, "learning_rate": 2.1524780437113343e-06, "loss": 0.3812, "step": 5217 }, { "epoch": 3.6185852981969484, "grad_norm": 0.3913450957068026, "learning_rate": 2.1504885852659713e-06, "loss": 0.418, "step": 5218 }, { "epoch": 3.619278779472954, "grad_norm": 0.39355566557808314, "learning_rate": 2.1484997947254432e-06, "loss": 0.3725, "step": 5219 }, { "epoch": 3.61997226074896, "grad_norm": 0.3865207872346973, "learning_rate": 2.1465116725559133e-06, "loss": 0.383, "step": 5220 }, { "epoch": 3.6206657420249653, "grad_norm": 0.38882907571891734, "learning_rate": 2.144524219223383e-06, "loss": 0.3864, "step": 5221 }, { "epoch": 3.6213592233009706, "grad_norm": 0.4207062110836621, "learning_rate": 2.1425374351936946e-06, "loss": 0.415, "step": 5222 }, { "epoch": 3.6220527045769764, "grad_norm": 0.38788059027756255, "learning_rate": 2.140551320932538e-06, "loss": 0.3918, "step": 5223 }, { "epoch": 3.622746185852982, "grad_norm": 0.39990187598462223, "learning_rate": 2.1385658769054406e-06, "loss": 0.4229, "step": 5224 }, { "epoch": 3.6234396671289875, "grad_norm": 0.3915087703589566, "learning_rate": 2.1365811035777783e-06, "loss": 0.3641, "step": 5225 }, { "epoch": 3.624133148404993, "grad_norm": 0.38639127097736065, "learning_rate": 2.1345970014147695e-06, "loss": 0.3469, "step": 5226 }, { "epoch": 3.6248266296809986, "grad_norm": 0.38141827418371743, "learning_rate": 2.1326135708814695e-06, "loss": 0.3696, "step": 5227 }, { "epoch": 3.6255201109570043, "grad_norm": 0.37335636095513836, "learning_rate": 2.1306308124427818e-06, "loss": 0.3885, "step": 5228 }, { "epoch": 3.6262135922330097, "grad_norm": 0.37237848011590075, "learning_rate": 2.1286487265634503e-06, "loss": 0.365, "step": 5229 }, { "epoch": 3.6269070735090154, "grad_norm": 0.40953904046546347, "learning_rate": 2.126667313708062e-06, "loss": 0.3883, "step": 5230 }, { "epoch": 3.6276005547850207, "grad_norm": 0.36473035125010644, "learning_rate": 2.1246865743410485e-06, "loss": 0.3911, "step": 5231 }, { "epoch": 3.6282940360610265, "grad_norm": 0.40849967577779733, "learning_rate": 2.122706508926675e-06, "loss": 0.3997, "step": 5232 }, { "epoch": 3.628987517337032, "grad_norm": 0.3934298257638829, "learning_rate": 2.1207271179290582e-06, "loss": 0.4122, "step": 5233 }, { "epoch": 3.6296809986130376, "grad_norm": 0.4458587643751225, "learning_rate": 2.118748401812154e-06, "loss": 0.3703, "step": 5234 }, { "epoch": 3.630374479889043, "grad_norm": 0.35756973639921996, "learning_rate": 2.1167703610397557e-06, "loss": 0.3577, "step": 5235 }, { "epoch": 3.6310679611650487, "grad_norm": 0.3823509901853653, "learning_rate": 2.1147929960755033e-06, "loss": 0.3832, "step": 5236 }, { "epoch": 3.631761442441054, "grad_norm": 0.4289814467794912, "learning_rate": 2.1128163073828766e-06, "loss": 0.3614, "step": 5237 }, { "epoch": 3.63245492371706, "grad_norm": 0.4005712963390427, "learning_rate": 2.1108402954251978e-06, "loss": 0.325, "step": 5238 }, { "epoch": 3.633148404993065, "grad_norm": 0.4025266583207819, "learning_rate": 2.1088649606656313e-06, "loss": 0.4357, "step": 5239 }, { "epoch": 3.633841886269071, "grad_norm": 0.4057167364696131, "learning_rate": 2.1068903035671777e-06, "loss": 0.3378, "step": 5240 }, { "epoch": 3.6345353675450762, "grad_norm": 0.40736315320025795, "learning_rate": 2.104916324592684e-06, "loss": 0.3879, "step": 5241 }, { "epoch": 3.635228848821082, "grad_norm": 0.4039867441121231, "learning_rate": 2.102943024204838e-06, "loss": 0.3768, "step": 5242 }, { "epoch": 3.6359223300970873, "grad_norm": 0.39300261998214253, "learning_rate": 2.1009704028661643e-06, "loss": 0.38, "step": 5243 }, { "epoch": 3.636615811373093, "grad_norm": 0.40640668117504203, "learning_rate": 2.0989984610390325e-06, "loss": 0.3656, "step": 5244 }, { "epoch": 3.6373092926490984, "grad_norm": 0.4222430820699589, "learning_rate": 2.097027199185653e-06, "loss": 0.3962, "step": 5245 }, { "epoch": 3.638002773925104, "grad_norm": 0.42119368631648546, "learning_rate": 2.0950566177680706e-06, "loss": 0.3717, "step": 5246 }, { "epoch": 3.6386962552011095, "grad_norm": 0.4265118119973986, "learning_rate": 2.093086717248181e-06, "loss": 0.3809, "step": 5247 }, { "epoch": 3.6393897364771153, "grad_norm": 0.40525527616582613, "learning_rate": 2.0911174980877106e-06, "loss": 0.3448, "step": 5248 }, { "epoch": 3.6400832177531206, "grad_norm": 0.4194810086531931, "learning_rate": 2.0891489607482322e-06, "loss": 0.3511, "step": 5249 }, { "epoch": 3.6407766990291264, "grad_norm": 0.37969005824143925, "learning_rate": 2.0871811056911574e-06, "loss": 0.4411, "step": 5250 }, { "epoch": 3.6414701803051317, "grad_norm": 0.40178574873091283, "learning_rate": 2.085213933377734e-06, "loss": 0.3827, "step": 5251 }, { "epoch": 3.6421636615811375, "grad_norm": 0.4367908327295253, "learning_rate": 2.083247444269055e-06, "loss": 0.3677, "step": 5252 }, { "epoch": 3.642857142857143, "grad_norm": 0.39093543065160924, "learning_rate": 2.081281638826052e-06, "loss": 0.3937, "step": 5253 }, { "epoch": 3.6435506241331486, "grad_norm": 0.4135113531118907, "learning_rate": 2.079316517509493e-06, "loss": 0.4199, "step": 5254 }, { "epoch": 3.644244105409154, "grad_norm": 0.41908321135583315, "learning_rate": 2.0773520807799903e-06, "loss": 0.3686, "step": 5255 }, { "epoch": 3.6449375866851597, "grad_norm": 0.38419745499152985, "learning_rate": 2.075388329097992e-06, "loss": 0.3587, "step": 5256 }, { "epoch": 3.645631067961165, "grad_norm": 0.3874963260170016, "learning_rate": 2.0734252629237892e-06, "loss": 0.3582, "step": 5257 }, { "epoch": 3.6463245492371708, "grad_norm": 0.35940525524850075, "learning_rate": 2.071462882717511e-06, "loss": 0.3759, "step": 5258 }, { "epoch": 3.647018030513176, "grad_norm": 0.3764549928368571, "learning_rate": 2.069501188939122e-06, "loss": 0.4275, "step": 5259 }, { "epoch": 3.647711511789182, "grad_norm": 0.3711125453577655, "learning_rate": 2.0675401820484325e-06, "loss": 0.3717, "step": 5260 }, { "epoch": 3.648404993065187, "grad_norm": 0.37295534035883965, "learning_rate": 2.0655798625050842e-06, "loss": 0.3534, "step": 5261 }, { "epoch": 3.649098474341193, "grad_norm": 0.4324254663511878, "learning_rate": 2.063620230768564e-06, "loss": 0.3809, "step": 5262 }, { "epoch": 3.6497919556171983, "grad_norm": 0.4219545422880887, "learning_rate": 2.061661287298198e-06, "loss": 0.3908, "step": 5263 }, { "epoch": 3.650485436893204, "grad_norm": 0.40626037071603777, "learning_rate": 2.059703032553142e-06, "loss": 0.3552, "step": 5264 }, { "epoch": 3.6511789181692094, "grad_norm": 0.3970658629986061, "learning_rate": 2.057745466992404e-06, "loss": 0.3668, "step": 5265 }, { "epoch": 3.651872399445215, "grad_norm": 0.3590732341243277, "learning_rate": 2.0557885910748177e-06, "loss": 0.3613, "step": 5266 }, { "epoch": 3.6525658807212205, "grad_norm": 0.4029705835190139, "learning_rate": 2.053832405259063e-06, "loss": 0.3718, "step": 5267 }, { "epoch": 3.6532593619972262, "grad_norm": 0.4336394056599201, "learning_rate": 2.0518769100036567e-06, "loss": 0.38, "step": 5268 }, { "epoch": 3.6539528432732316, "grad_norm": 0.4114631371488936, "learning_rate": 2.04992210576695e-06, "loss": 0.3768, "step": 5269 }, { "epoch": 3.6546463245492373, "grad_norm": 0.3968008258109872, "learning_rate": 2.0479679930071362e-06, "loss": 0.3704, "step": 5270 }, { "epoch": 3.6553398058252426, "grad_norm": 0.4342862056399891, "learning_rate": 2.0460145721822467e-06, "loss": 0.4137, "step": 5271 }, { "epoch": 3.6560332871012484, "grad_norm": 0.38812134703640805, "learning_rate": 2.0440618437501466e-06, "loss": 0.3615, "step": 5272 }, { "epoch": 3.6567267683772537, "grad_norm": 0.38389853571050464, "learning_rate": 2.042109808168542e-06, "loss": 0.3204, "step": 5273 }, { "epoch": 3.6574202496532595, "grad_norm": 0.4165238531784227, "learning_rate": 2.040158465894976e-06, "loss": 0.414, "step": 5274 }, { "epoch": 3.658113730929265, "grad_norm": 0.4009628554735647, "learning_rate": 2.03820781738683e-06, "loss": 0.4092, "step": 5275 }, { "epoch": 3.6588072122052706, "grad_norm": 0.40852166635369597, "learning_rate": 2.0362578631013225e-06, "loss": 0.3599, "step": 5276 }, { "epoch": 3.659500693481276, "grad_norm": 0.746066717427677, "learning_rate": 2.0343086034955064e-06, "loss": 0.3649, "step": 5277 }, { "epoch": 3.6601941747572817, "grad_norm": 0.5389555368446969, "learning_rate": 2.0323600390262743e-06, "loss": 0.3518, "step": 5278 }, { "epoch": 3.660887656033287, "grad_norm": 0.39812197559818674, "learning_rate": 2.030412170150359e-06, "loss": 0.3504, "step": 5279 }, { "epoch": 3.661581137309293, "grad_norm": 0.6868818943003177, "learning_rate": 2.0284649973243214e-06, "loss": 0.3647, "step": 5280 }, { "epoch": 3.662274618585298, "grad_norm": 0.4448039060870248, "learning_rate": 2.0265185210045686e-06, "loss": 0.3553, "step": 5281 }, { "epoch": 3.662968099861304, "grad_norm": 0.46477284084904175, "learning_rate": 2.0245727416473388e-06, "loss": 0.3757, "step": 5282 }, { "epoch": 3.663661581137309, "grad_norm": 0.3952221652676267, "learning_rate": 2.0226276597087095e-06, "loss": 0.3398, "step": 5283 }, { "epoch": 3.664355062413315, "grad_norm": 0.44734737821828086, "learning_rate": 2.0206832756445954e-06, "loss": 0.4001, "step": 5284 }, { "epoch": 3.6650485436893203, "grad_norm": 0.3992605563509741, "learning_rate": 2.0187395899107427e-06, "loss": 0.4067, "step": 5285 }, { "epoch": 3.665742024965326, "grad_norm": 0.40523073495924555, "learning_rate": 2.016796602962739e-06, "loss": 0.3697, "step": 5286 }, { "epoch": 3.6664355062413314, "grad_norm": 0.42897580319741746, "learning_rate": 2.014854315256007e-06, "loss": 0.4396, "step": 5287 }, { "epoch": 3.667128987517337, "grad_norm": 0.37582277128821234, "learning_rate": 2.0129127272458034e-06, "loss": 0.3518, "step": 5288 }, { "epoch": 3.6678224687933425, "grad_norm": 0.3830032308147283, "learning_rate": 2.0109718393872223e-06, "loss": 0.3998, "step": 5289 }, { "epoch": 3.6685159500693483, "grad_norm": 0.3954512366682606, "learning_rate": 2.0090316521351973e-06, "loss": 0.3968, "step": 5290 }, { "epoch": 3.6692094313453536, "grad_norm": 0.3980477177049583, "learning_rate": 2.007092165944487e-06, "loss": 0.391, "step": 5291 }, { "epoch": 3.6699029126213594, "grad_norm": 0.41782975999023964, "learning_rate": 2.005153381269701e-06, "loss": 0.3869, "step": 5292 }, { "epoch": 3.6705963938973647, "grad_norm": 0.42948888554482567, "learning_rate": 2.0032152985652708e-06, "loss": 0.4096, "step": 5293 }, { "epoch": 3.6712898751733705, "grad_norm": 0.43441263350808534, "learning_rate": 2.001277918285471e-06, "loss": 0.442, "step": 5294 }, { "epoch": 3.671983356449376, "grad_norm": 0.3857274273216944, "learning_rate": 1.9993412408844114e-06, "loss": 0.3885, "step": 5295 }, { "epoch": 3.6726768377253816, "grad_norm": 0.38980671828455116, "learning_rate": 1.997405266816031e-06, "loss": 0.3614, "step": 5296 }, { "epoch": 3.673370319001387, "grad_norm": 0.41422107066383607, "learning_rate": 1.995469996534111e-06, "loss": 0.3901, "step": 5297 }, { "epoch": 3.6740638002773927, "grad_norm": 0.3923980975330044, "learning_rate": 1.993535430492265e-06, "loss": 0.3463, "step": 5298 }, { "epoch": 3.674757281553398, "grad_norm": 0.3807959062075781, "learning_rate": 1.991601569143938e-06, "loss": 0.3461, "step": 5299 }, { "epoch": 3.6754507628294038, "grad_norm": 0.3974929649988162, "learning_rate": 1.9896684129424164e-06, "loss": 0.4339, "step": 5300 }, { "epoch": 3.676144244105409, "grad_norm": 0.39478911535342665, "learning_rate": 1.9877359623408167e-06, "loss": 0.41, "step": 5301 }, { "epoch": 3.676837725381415, "grad_norm": 0.418901214268293, "learning_rate": 1.9858042177920915e-06, "loss": 0.3946, "step": 5302 }, { "epoch": 3.67753120665742, "grad_norm": 0.38307831985414426, "learning_rate": 1.9838731797490295e-06, "loss": 0.3753, "step": 5303 }, { "epoch": 3.678224687933426, "grad_norm": 0.408751199189437, "learning_rate": 1.9819428486642488e-06, "loss": 0.4058, "step": 5304 }, { "epoch": 3.6789181692094313, "grad_norm": 0.41520334290650174, "learning_rate": 1.9800132249902084e-06, "loss": 0.4078, "step": 5305 }, { "epoch": 3.679611650485437, "grad_norm": 0.3911739104132286, "learning_rate": 1.978084309179194e-06, "loss": 0.3555, "step": 5306 }, { "epoch": 3.6803051317614424, "grad_norm": 0.43896781884169084, "learning_rate": 1.976156101683332e-06, "loss": 0.4057, "step": 5307 }, { "epoch": 3.680998613037448, "grad_norm": 0.4334563690105161, "learning_rate": 1.9742286029545823e-06, "loss": 0.3928, "step": 5308 }, { "epoch": 3.6816920943134535, "grad_norm": 0.40411046688184127, "learning_rate": 1.9723018134447303e-06, "loss": 0.3775, "step": 5309 }, { "epoch": 3.6823855755894592, "grad_norm": 0.4290264322409643, "learning_rate": 1.970375733605409e-06, "loss": 0.3756, "step": 5310 }, { "epoch": 3.6830790568654646, "grad_norm": 0.34689918706352113, "learning_rate": 1.968450363888073e-06, "loss": 0.323, "step": 5311 }, { "epoch": 3.6837725381414703, "grad_norm": 0.3852458491943254, "learning_rate": 1.966525704744016e-06, "loss": 0.3956, "step": 5312 }, { "epoch": 3.6844660194174756, "grad_norm": 0.5183324471904935, "learning_rate": 1.964601756624366e-06, "loss": 0.375, "step": 5313 }, { "epoch": 3.6851595006934814, "grad_norm": 0.42972473805123057, "learning_rate": 1.962678519980079e-06, "loss": 0.4329, "step": 5314 }, { "epoch": 3.6858529819694867, "grad_norm": 0.38771107037895286, "learning_rate": 1.9607559952619497e-06, "loss": 0.3433, "step": 5315 }, { "epoch": 3.6865464632454925, "grad_norm": 0.4479177429977054, "learning_rate": 1.9588341829206057e-06, "loss": 0.3611, "step": 5316 }, { "epoch": 3.687239944521498, "grad_norm": 0.4042270457950642, "learning_rate": 1.9569130834065025e-06, "loss": 0.3614, "step": 5317 }, { "epoch": 3.6879334257975036, "grad_norm": 0.44695599347825055, "learning_rate": 1.9549926971699334e-06, "loss": 0.4132, "step": 5318 }, { "epoch": 3.688626907073509, "grad_norm": 0.3745917664545771, "learning_rate": 1.953073024661023e-06, "loss": 0.3743, "step": 5319 }, { "epoch": 3.6893203883495147, "grad_norm": 0.40351364311898186, "learning_rate": 1.9511540663297284e-06, "loss": 0.3476, "step": 5320 }, { "epoch": 3.69001386962552, "grad_norm": 0.4441667258291502, "learning_rate": 1.949235822625842e-06, "loss": 0.3439, "step": 5321 }, { "epoch": 3.690707350901526, "grad_norm": 0.3978070659280194, "learning_rate": 1.9473182939989828e-06, "loss": 0.46, "step": 5322 }, { "epoch": 3.691400832177531, "grad_norm": 0.6674323596632495, "learning_rate": 1.945401480898606e-06, "loss": 0.3639, "step": 5323 }, { "epoch": 3.692094313453537, "grad_norm": 0.42511417504718546, "learning_rate": 1.943485383774002e-06, "loss": 0.3712, "step": 5324 }, { "epoch": 3.692787794729542, "grad_norm": 0.45108591244684154, "learning_rate": 1.9415700030742855e-06, "loss": 0.3685, "step": 5325 }, { "epoch": 3.693481276005548, "grad_norm": 0.37479248618189814, "learning_rate": 1.9396553392484108e-06, "loss": 0.3845, "step": 5326 }, { "epoch": 3.6941747572815533, "grad_norm": 0.41130250834242243, "learning_rate": 1.9377413927451598e-06, "loss": 0.3206, "step": 5327 }, { "epoch": 3.694868238557559, "grad_norm": 0.3879707868506853, "learning_rate": 1.9358281640131488e-06, "loss": 0.3983, "step": 5328 }, { "epoch": 3.6955617198335644, "grad_norm": 0.40385556484309626, "learning_rate": 1.933915653500826e-06, "loss": 0.3823, "step": 5329 }, { "epoch": 3.69625520110957, "grad_norm": 0.39556605551863505, "learning_rate": 1.932003861656467e-06, "loss": 0.3723, "step": 5330 }, { "epoch": 3.6969486823855755, "grad_norm": 0.3764209278128449, "learning_rate": 1.930092788928183e-06, "loss": 0.3707, "step": 5331 }, { "epoch": 3.6976421636615813, "grad_norm": 0.38952718475498654, "learning_rate": 1.9281824357639178e-06, "loss": 0.361, "step": 5332 }, { "epoch": 3.6983356449375866, "grad_norm": 0.40343682984359014, "learning_rate": 1.926272802611441e-06, "loss": 0.3408, "step": 5333 }, { "epoch": 3.6990291262135924, "grad_norm": 1.1020813762226875, "learning_rate": 1.9243638899183577e-06, "loss": 0.3693, "step": 5334 }, { "epoch": 3.6997226074895977, "grad_norm": 0.4520957815713009, "learning_rate": 1.922455698132104e-06, "loss": 0.3831, "step": 5335 }, { "epoch": 3.7004160887656035, "grad_norm": 0.3865871728402252, "learning_rate": 1.920548227699946e-06, "loss": 0.3866, "step": 5336 }, { "epoch": 3.701109570041609, "grad_norm": 0.3999319677398349, "learning_rate": 1.918641479068983e-06, "loss": 0.3176, "step": 5337 }, { "epoch": 3.7018030513176146, "grad_norm": 0.3954960005167473, "learning_rate": 1.916735452686139e-06, "loss": 0.4194, "step": 5338 }, { "epoch": 3.70249653259362, "grad_norm": 0.39470818701966953, "learning_rate": 1.9148301489981753e-06, "loss": 0.3721, "step": 5339 }, { "epoch": 3.7031900138696257, "grad_norm": 0.410901626788164, "learning_rate": 1.9129255684516824e-06, "loss": 0.3701, "step": 5340 }, { "epoch": 3.703883495145631, "grad_norm": 0.4030037016470731, "learning_rate": 1.9110217114930766e-06, "loss": 0.3949, "step": 5341 }, { "epoch": 3.7045769764216367, "grad_norm": 0.37219093581630314, "learning_rate": 1.9091185785686106e-06, "loss": 0.3626, "step": 5342 }, { "epoch": 3.705270457697642, "grad_norm": 0.3826855962147528, "learning_rate": 1.9072161701243664e-06, "loss": 0.3355, "step": 5343 }, { "epoch": 3.705963938973648, "grad_norm": 0.3872617031204001, "learning_rate": 1.905314486606249e-06, "loss": 0.3948, "step": 5344 }, { "epoch": 3.706657420249653, "grad_norm": 0.37607961761091796, "learning_rate": 1.9034135284600064e-06, "loss": 0.408, "step": 5345 }, { "epoch": 3.707350901525659, "grad_norm": 0.541932364786649, "learning_rate": 1.9015132961312049e-06, "loss": 0.3459, "step": 5346 }, { "epoch": 3.7080443828016643, "grad_norm": 1.102306237579518, "learning_rate": 1.8996137900652468e-06, "loss": 0.3954, "step": 5347 }, { "epoch": 3.70873786407767, "grad_norm": 0.4654186258181047, "learning_rate": 1.8977150107073632e-06, "loss": 0.3868, "step": 5348 }, { "epoch": 3.7094313453536754, "grad_norm": 0.38346577307458257, "learning_rate": 1.895816958502612e-06, "loss": 0.3314, "step": 5349 }, { "epoch": 3.710124826629681, "grad_norm": 0.3998605753085461, "learning_rate": 1.893919633895886e-06, "loss": 0.4209, "step": 5350 }, { "epoch": 3.7108183079056865, "grad_norm": 0.44792728653393216, "learning_rate": 1.892023037331901e-06, "loss": 0.4073, "step": 5351 }, { "epoch": 3.7115117891816922, "grad_norm": 0.3732161102516748, "learning_rate": 1.8901271692552065e-06, "loss": 0.3595, "step": 5352 }, { "epoch": 3.7122052704576975, "grad_norm": 0.4245500412639838, "learning_rate": 1.888232030110181e-06, "loss": 0.3955, "step": 5353 }, { "epoch": 3.7128987517337033, "grad_norm": 0.42570444772569693, "learning_rate": 1.886337620341031e-06, "loss": 0.4127, "step": 5354 }, { "epoch": 3.7135922330097086, "grad_norm": 0.38576977020656966, "learning_rate": 1.8844439403917947e-06, "loss": 0.3632, "step": 5355 }, { "epoch": 3.7142857142857144, "grad_norm": 0.43437060498772284, "learning_rate": 1.8825509907063328e-06, "loss": 0.4052, "step": 5356 }, { "epoch": 3.7149791955617197, "grad_norm": 0.4439072161217333, "learning_rate": 1.8806587717283415e-06, "loss": 0.3832, "step": 5357 }, { "epoch": 3.7156726768377255, "grad_norm": 0.39488729614130524, "learning_rate": 1.8787672839013438e-06, "loss": 0.4106, "step": 5358 }, { "epoch": 3.716366158113731, "grad_norm": 0.5259586927761669, "learning_rate": 1.8768765276686885e-06, "loss": 0.3571, "step": 5359 }, { "epoch": 3.7170596393897366, "grad_norm": 0.5127617716108047, "learning_rate": 1.874986503473556e-06, "loss": 0.3734, "step": 5360 }, { "epoch": 3.717753120665742, "grad_norm": 0.41067658295379306, "learning_rate": 1.8730972117589568e-06, "loss": 0.4116, "step": 5361 }, { "epoch": 3.7184466019417477, "grad_norm": 0.4330698754437948, "learning_rate": 1.8712086529677214e-06, "loss": 0.379, "step": 5362 }, { "epoch": 3.719140083217753, "grad_norm": 0.4855578889819878, "learning_rate": 1.8693208275425217e-06, "loss": 0.3608, "step": 5363 }, { "epoch": 3.719833564493759, "grad_norm": 0.3893809211511023, "learning_rate": 1.8674337359258443e-06, "loss": 0.3511, "step": 5364 }, { "epoch": 3.720527045769764, "grad_norm": 0.42694298428814276, "learning_rate": 1.8655473785600125e-06, "loss": 0.3513, "step": 5365 }, { "epoch": 3.72122052704577, "grad_norm": 0.4060773284838115, "learning_rate": 1.863661755887176e-06, "loss": 0.3863, "step": 5366 }, { "epoch": 3.721914008321775, "grad_norm": 0.3988787844042086, "learning_rate": 1.8617768683493082e-06, "loss": 0.3802, "step": 5367 }, { "epoch": 3.722607489597781, "grad_norm": 0.39907097823078685, "learning_rate": 1.8598927163882136e-06, "loss": 0.3977, "step": 5368 }, { "epoch": 3.7233009708737863, "grad_norm": 0.37537444469144365, "learning_rate": 1.858009300445527e-06, "loss": 0.3428, "step": 5369 }, { "epoch": 3.723994452149792, "grad_norm": 0.4179301049596186, "learning_rate": 1.8561266209627026e-06, "loss": 0.3368, "step": 5370 }, { "epoch": 3.7246879334257974, "grad_norm": 0.38146528935992374, "learning_rate": 1.8542446783810298e-06, "loss": 0.3806, "step": 5371 }, { "epoch": 3.725381414701803, "grad_norm": 0.3829881979556981, "learning_rate": 1.8523634731416218e-06, "loss": 0.3431, "step": 5372 }, { "epoch": 3.7260748959778085, "grad_norm": 0.4090834194179888, "learning_rate": 1.850483005685419e-06, "loss": 0.334, "step": 5373 }, { "epoch": 3.7267683772538143, "grad_norm": 0.3696294472764767, "learning_rate": 1.8486032764531918e-06, "loss": 0.3432, "step": 5374 }, { "epoch": 3.7274618585298196, "grad_norm": 0.39134696174347977, "learning_rate": 1.8467242858855312e-06, "loss": 0.3474, "step": 5375 }, { "epoch": 3.7281553398058254, "grad_norm": 0.42424056109601543, "learning_rate": 1.8448460344228609e-06, "loss": 0.3586, "step": 5376 }, { "epoch": 3.7288488210818307, "grad_norm": 0.3806350558278461, "learning_rate": 1.842968522505431e-06, "loss": 0.379, "step": 5377 }, { "epoch": 3.7295423023578365, "grad_norm": 0.37868728143788455, "learning_rate": 1.841091750573314e-06, "loss": 0.361, "step": 5378 }, { "epoch": 3.730235783633842, "grad_norm": 0.39072976839261975, "learning_rate": 1.8392157190664123e-06, "loss": 0.3391, "step": 5379 }, { "epoch": 3.7309292649098476, "grad_norm": 0.3981970209721558, "learning_rate": 1.837340428424455e-06, "loss": 0.3593, "step": 5380 }, { "epoch": 3.731622746185853, "grad_norm": 0.42051350946993543, "learning_rate": 1.8354658790869956e-06, "loss": 0.413, "step": 5381 }, { "epoch": 3.7323162274618586, "grad_norm": 0.4117186830406726, "learning_rate": 1.833592071493418e-06, "loss": 0.332, "step": 5382 }, { "epoch": 3.733009708737864, "grad_norm": 0.38356762025880703, "learning_rate": 1.8317190060829242e-06, "loss": 0.3496, "step": 5383 }, { "epoch": 3.7337031900138697, "grad_norm": 0.41540321153530235, "learning_rate": 1.8298466832945499e-06, "loss": 0.396, "step": 5384 }, { "epoch": 3.734396671289875, "grad_norm": 0.4100911525683332, "learning_rate": 1.8279751035671556e-06, "loss": 0.4158, "step": 5385 }, { "epoch": 3.735090152565881, "grad_norm": 0.4076169553299872, "learning_rate": 1.8261042673394219e-06, "loss": 0.3619, "step": 5386 }, { "epoch": 3.735783633841886, "grad_norm": 0.4750286575047852, "learning_rate": 1.8242341750498638e-06, "loss": 0.3954, "step": 5387 }, { "epoch": 3.736477115117892, "grad_norm": 0.4507221022178403, "learning_rate": 1.8223648271368133e-06, "loss": 0.408, "step": 5388 }, { "epoch": 3.7371705963938973, "grad_norm": 0.37142770619311544, "learning_rate": 1.8204962240384316e-06, "loss": 0.3777, "step": 5389 }, { "epoch": 3.737864077669903, "grad_norm": 0.3827900357951355, "learning_rate": 1.8186283661927117e-06, "loss": 0.401, "step": 5390 }, { "epoch": 3.7385575589459084, "grad_norm": 0.4436942607606533, "learning_rate": 1.8167612540374606e-06, "loss": 0.3814, "step": 5391 }, { "epoch": 3.739251040221914, "grad_norm": 0.3974637766807117, "learning_rate": 1.8148948880103174e-06, "loss": 0.3768, "step": 5392 }, { "epoch": 3.7399445214979194, "grad_norm": 0.4554658431213549, "learning_rate": 1.8130292685487466e-06, "loss": 0.3855, "step": 5393 }, { "epoch": 3.740638002773925, "grad_norm": 0.41099824440573857, "learning_rate": 1.8111643960900321e-06, "loss": 0.3534, "step": 5394 }, { "epoch": 3.7413314840499305, "grad_norm": 0.43434492828653515, "learning_rate": 1.8093002710712903e-06, "loss": 0.3554, "step": 5395 }, { "epoch": 3.7420249653259363, "grad_norm": 0.3632681244633984, "learning_rate": 1.8074368939294555e-06, "loss": 0.3315, "step": 5396 }, { "epoch": 3.7427184466019416, "grad_norm": 0.39875603610965515, "learning_rate": 1.8055742651012908e-06, "loss": 0.4208, "step": 5397 }, { "epoch": 3.7434119278779474, "grad_norm": 0.44358660190842997, "learning_rate": 1.8037123850233833e-06, "loss": 0.3694, "step": 5398 }, { "epoch": 3.7441054091539527, "grad_norm": 0.3672623223520126, "learning_rate": 1.8018512541321442e-06, "loss": 0.3816, "step": 5399 }, { "epoch": 3.7447988904299585, "grad_norm": 0.41301627423255255, "learning_rate": 1.7999908728638104e-06, "loss": 0.406, "step": 5400 }, { "epoch": 3.745492371705964, "grad_norm": 0.3864285252967822, "learning_rate": 1.7981312416544394e-06, "loss": 0.3498, "step": 5401 }, { "epoch": 3.7461858529819696, "grad_norm": 0.379854908989512, "learning_rate": 1.7962723609399158e-06, "loss": 0.3751, "step": 5402 }, { "epoch": 3.746879334257975, "grad_norm": 0.3810957492909113, "learning_rate": 1.7944142311559504e-06, "loss": 0.3735, "step": 5403 }, { "epoch": 3.7475728155339807, "grad_norm": 0.4075746678188746, "learning_rate": 1.7925568527380717e-06, "loss": 0.3557, "step": 5404 }, { "epoch": 3.748266296809986, "grad_norm": 0.41268868819624693, "learning_rate": 1.7907002261216367e-06, "loss": 0.41, "step": 5405 }, { "epoch": 3.748959778085992, "grad_norm": 0.4291613824698058, "learning_rate": 1.788844351741828e-06, "loss": 0.3818, "step": 5406 }, { "epoch": 3.749653259361997, "grad_norm": 0.42162892709676997, "learning_rate": 1.7869892300336434e-06, "loss": 0.3789, "step": 5407 }, { "epoch": 3.750346740638003, "grad_norm": 0.41320771168784265, "learning_rate": 1.785134861431917e-06, "loss": 0.3524, "step": 5408 }, { "epoch": 3.751040221914008, "grad_norm": 0.3823140088359749, "learning_rate": 1.783281246371294e-06, "loss": 0.3471, "step": 5409 }, { "epoch": 3.751733703190014, "grad_norm": 0.5566639626393827, "learning_rate": 1.7814283852862507e-06, "loss": 0.3626, "step": 5410 }, { "epoch": 3.7524271844660193, "grad_norm": 0.5675627780797136, "learning_rate": 1.7795762786110854e-06, "loss": 0.3395, "step": 5411 }, { "epoch": 3.753120665742025, "grad_norm": 0.4004817695691878, "learning_rate": 1.777724926779915e-06, "loss": 0.4007, "step": 5412 }, { "epoch": 3.7538141470180304, "grad_norm": 0.37930504637433377, "learning_rate": 1.7758743302266856e-06, "loss": 0.3445, "step": 5413 }, { "epoch": 3.754507628294036, "grad_norm": 0.7074186613546151, "learning_rate": 1.7740244893851644e-06, "loss": 0.3911, "step": 5414 }, { "epoch": 3.7552011095700415, "grad_norm": 0.3891171357387995, "learning_rate": 1.7721754046889373e-06, "loss": 0.3487, "step": 5415 }, { "epoch": 3.7558945908460473, "grad_norm": 0.6629439605906323, "learning_rate": 1.7703270765714186e-06, "loss": 0.3923, "step": 5416 }, { "epoch": 3.7565880721220526, "grad_norm": 0.39184537825893406, "learning_rate": 1.7684795054658427e-06, "loss": 0.3557, "step": 5417 }, { "epoch": 3.7572815533980584, "grad_norm": 0.42051575149751486, "learning_rate": 1.7666326918052667e-06, "loss": 0.3856, "step": 5418 }, { "epoch": 3.7579750346740637, "grad_norm": 0.41142882284450016, "learning_rate": 1.764786636022573e-06, "loss": 0.3997, "step": 5419 }, { "epoch": 3.7586685159500695, "grad_norm": 0.3811422041346554, "learning_rate": 1.762941338550459e-06, "loss": 0.3855, "step": 5420 }, { "epoch": 3.759361997226075, "grad_norm": 0.4180845691643529, "learning_rate": 1.7610967998214518e-06, "loss": 0.4111, "step": 5421 }, { "epoch": 3.7600554785020806, "grad_norm": 0.38518282689142475, "learning_rate": 1.7592530202678986e-06, "loss": 0.3812, "step": 5422 }, { "epoch": 3.760748959778086, "grad_norm": 0.3768274803511191, "learning_rate": 1.7574100003219657e-06, "loss": 0.3326, "step": 5423 }, { "epoch": 3.7614424410540916, "grad_norm": 0.405186290224963, "learning_rate": 1.7555677404156446e-06, "loss": 0.4337, "step": 5424 }, { "epoch": 3.762135922330097, "grad_norm": 0.3765713509544661, "learning_rate": 1.7537262409807476e-06, "loss": 0.3695, "step": 5425 }, { "epoch": 3.7628294036061027, "grad_norm": 0.37706643784277205, "learning_rate": 1.7518855024489095e-06, "loss": 0.3742, "step": 5426 }, { "epoch": 3.763522884882108, "grad_norm": 0.3951727712955745, "learning_rate": 1.7500455252515868e-06, "loss": 0.3982, "step": 5427 }, { "epoch": 3.764216366158114, "grad_norm": 0.4248050806632716, "learning_rate": 1.7482063098200547e-06, "loss": 0.361, "step": 5428 }, { "epoch": 3.764909847434119, "grad_norm": 0.5283979933592003, "learning_rate": 1.7463678565854126e-06, "loss": 0.3808, "step": 5429 }, { "epoch": 3.765603328710125, "grad_norm": 0.3972254502668182, "learning_rate": 1.744530165978583e-06, "loss": 0.3809, "step": 5430 }, { "epoch": 3.7662968099861303, "grad_norm": 0.368240759054133, "learning_rate": 1.742693238430303e-06, "loss": 0.3829, "step": 5431 }, { "epoch": 3.766990291262136, "grad_norm": 0.4037095185818751, "learning_rate": 1.7408570743711394e-06, "loss": 0.405, "step": 5432 }, { "epoch": 3.7676837725381414, "grad_norm": 0.35559829706158025, "learning_rate": 1.739021674231472e-06, "loss": 0.3601, "step": 5433 }, { "epoch": 3.768377253814147, "grad_norm": 0.38383401348562074, "learning_rate": 1.7371870384415056e-06, "loss": 0.3845, "step": 5434 }, { "epoch": 3.7690707350901524, "grad_norm": 0.48961010708711433, "learning_rate": 1.7353531674312702e-06, "loss": 0.3369, "step": 5435 }, { "epoch": 3.769764216366158, "grad_norm": 0.5793593113058321, "learning_rate": 1.733520061630607e-06, "loss": 0.3644, "step": 5436 }, { "epoch": 3.7704576976421635, "grad_norm": 0.4257211794162759, "learning_rate": 1.7316877214691863e-06, "loss": 0.4077, "step": 5437 }, { "epoch": 3.7711511789181693, "grad_norm": 0.5665832728275388, "learning_rate": 1.7298561473764913e-06, "loss": 0.3676, "step": 5438 }, { "epoch": 3.7718446601941746, "grad_norm": 0.8191280650022053, "learning_rate": 1.7280253397818319e-06, "loss": 0.3684, "step": 5439 }, { "epoch": 3.7725381414701804, "grad_norm": 0.6408806565485978, "learning_rate": 1.7261952991143383e-06, "loss": 0.3855, "step": 5440 }, { "epoch": 3.7732316227461857, "grad_norm": 0.34447507846391334, "learning_rate": 1.7243660258029543e-06, "loss": 0.3373, "step": 5441 }, { "epoch": 3.7739251040221915, "grad_norm": 0.3752033900856243, "learning_rate": 1.722537520276451e-06, "loss": 0.3965, "step": 5442 }, { "epoch": 3.774618585298197, "grad_norm": 0.3775413093957538, "learning_rate": 1.7207097829634168e-06, "loss": 0.3541, "step": 5443 }, { "epoch": 3.7753120665742026, "grad_norm": 0.3486904471046564, "learning_rate": 1.7188828142922586e-06, "loss": 0.3514, "step": 5444 }, { "epoch": 3.776005547850208, "grad_norm": 0.37464889394532563, "learning_rate": 1.7170566146912083e-06, "loss": 0.3729, "step": 5445 }, { "epoch": 3.7766990291262137, "grad_norm": 0.3610622241819802, "learning_rate": 1.7152311845883096e-06, "loss": 0.3462, "step": 5446 }, { "epoch": 3.777392510402219, "grad_norm": 0.39130213083198795, "learning_rate": 1.7134065244114318e-06, "loss": 0.38, "step": 5447 }, { "epoch": 3.778085991678225, "grad_norm": 0.39809161010492583, "learning_rate": 1.7115826345882635e-06, "loss": 0.3747, "step": 5448 }, { "epoch": 3.77877947295423, "grad_norm": 0.40575456218582356, "learning_rate": 1.7097595155463082e-06, "loss": 0.3248, "step": 5449 }, { "epoch": 3.779472954230236, "grad_norm": 0.3737679094783724, "learning_rate": 1.7079371677128937e-06, "loss": 0.3832, "step": 5450 }, { "epoch": 3.780166435506241, "grad_norm": 0.4116349378860452, "learning_rate": 1.706115591515166e-06, "loss": 0.3867, "step": 5451 }, { "epoch": 3.780859916782247, "grad_norm": 0.400088369393815, "learning_rate": 1.7042947873800853e-06, "loss": 0.3699, "step": 5452 }, { "epoch": 3.7815533980582523, "grad_norm": 0.4285870819140972, "learning_rate": 1.7024747557344411e-06, "loss": 0.3582, "step": 5453 }, { "epoch": 3.782246879334258, "grad_norm": 0.46475979435347403, "learning_rate": 1.7006554970048305e-06, "loss": 0.3912, "step": 5454 }, { "epoch": 3.7829403606102634, "grad_norm": 0.38994414406106287, "learning_rate": 1.6988370116176766e-06, "loss": 0.3443, "step": 5455 }, { "epoch": 3.783633841886269, "grad_norm": 0.3676251739064922, "learning_rate": 1.6970192999992209e-06, "loss": 0.376, "step": 5456 }, { "epoch": 3.7843273231622745, "grad_norm": 0.3766643128839153, "learning_rate": 1.6952023625755176e-06, "loss": 0.4045, "step": 5457 }, { "epoch": 3.7850208044382803, "grad_norm": 0.421874601290078, "learning_rate": 1.6933861997724466e-06, "loss": 0.3826, "step": 5458 }, { "epoch": 3.7857142857142856, "grad_norm": 0.3950155661205429, "learning_rate": 1.6915708120157042e-06, "loss": 0.4299, "step": 5459 }, { "epoch": 3.7864077669902914, "grad_norm": 0.3855825900703638, "learning_rate": 1.6897561997308015e-06, "loss": 0.4045, "step": 5460 }, { "epoch": 3.7871012482662967, "grad_norm": 0.41393549300354765, "learning_rate": 1.6879423633430708e-06, "loss": 0.3766, "step": 5461 }, { "epoch": 3.7877947295423025, "grad_norm": 0.41426592591495687, "learning_rate": 1.6861293032776637e-06, "loss": 0.3884, "step": 5462 }, { "epoch": 3.7884882108183078, "grad_norm": 0.3961452540539228, "learning_rate": 1.6843170199595476e-06, "loss": 0.396, "step": 5463 }, { "epoch": 3.7891816920943135, "grad_norm": 0.386811719574885, "learning_rate": 1.6825055138135105e-06, "loss": 0.3322, "step": 5464 }, { "epoch": 3.789875173370319, "grad_norm": 0.3992245548766229, "learning_rate": 1.680694785264153e-06, "loss": 0.4281, "step": 5465 }, { "epoch": 3.7905686546463246, "grad_norm": 0.45470336572811776, "learning_rate": 1.6788848347358977e-06, "loss": 0.3562, "step": 5466 }, { "epoch": 3.79126213592233, "grad_norm": 0.40341159288487516, "learning_rate": 1.6770756626529866e-06, "loss": 0.3561, "step": 5467 }, { "epoch": 3.7919556171983357, "grad_norm": 0.38860531381598035, "learning_rate": 1.675267269439473e-06, "loss": 0.3702, "step": 5468 }, { "epoch": 3.792649098474341, "grad_norm": 0.3830640348037389, "learning_rate": 1.6734596555192323e-06, "loss": 0.309, "step": 5469 }, { "epoch": 3.793342579750347, "grad_norm": 0.3981754230115332, "learning_rate": 1.671652821315956e-06, "loss": 0.4194, "step": 5470 }, { "epoch": 3.794036061026352, "grad_norm": 0.41845316099717067, "learning_rate": 1.6698467672531538e-06, "loss": 0.3983, "step": 5471 }, { "epoch": 3.794729542302358, "grad_norm": 0.3614566715835641, "learning_rate": 1.6680414937541528e-06, "loss": 0.3638, "step": 5472 }, { "epoch": 3.7954230235783633, "grad_norm": 0.3959607555342449, "learning_rate": 1.6662370012420931e-06, "loss": 0.3792, "step": 5473 }, { "epoch": 3.796116504854369, "grad_norm": 0.40082331373332036, "learning_rate": 1.6644332901399357e-06, "loss": 0.349, "step": 5474 }, { "epoch": 3.7968099861303743, "grad_norm": 0.4534061975835003, "learning_rate": 1.6626303608704597e-06, "loss": 0.4056, "step": 5475 }, { "epoch": 3.79750346740638, "grad_norm": 0.354771762862086, "learning_rate": 1.6608282138562554e-06, "loss": 0.3616, "step": 5476 }, { "epoch": 3.7981969486823854, "grad_norm": 0.5109905447615416, "learning_rate": 1.6590268495197354e-06, "loss": 0.3448, "step": 5477 }, { "epoch": 3.798890429958391, "grad_norm": 0.38350273614016517, "learning_rate": 1.6572262682831241e-06, "loss": 0.3924, "step": 5478 }, { "epoch": 3.7995839112343965, "grad_norm": 0.5148563619196473, "learning_rate": 1.655426470568464e-06, "loss": 0.3874, "step": 5479 }, { "epoch": 3.8002773925104023, "grad_norm": 0.4039022629715126, "learning_rate": 1.6536274567976202e-06, "loss": 0.3808, "step": 5480 }, { "epoch": 3.8009708737864076, "grad_norm": 0.3649295170517451, "learning_rate": 1.6518292273922631e-06, "loss": 0.325, "step": 5481 }, { "epoch": 3.8016643550624134, "grad_norm": 0.460181294015958, "learning_rate": 1.6500317827738887e-06, "loss": 0.3713, "step": 5482 }, { "epoch": 3.8023578363384187, "grad_norm": 0.3983541745056531, "learning_rate": 1.6482351233638006e-06, "loss": 0.3985, "step": 5483 }, { "epoch": 3.8030513176144245, "grad_norm": 0.4264831632946882, "learning_rate": 1.6464392495831254e-06, "loss": 0.4347, "step": 5484 }, { "epoch": 3.80374479889043, "grad_norm": 0.35874030024608955, "learning_rate": 1.6446441618528037e-06, "loss": 0.351, "step": 5485 }, { "epoch": 3.8044382801664356, "grad_norm": 0.4704866595577384, "learning_rate": 1.6428498605935884e-06, "loss": 0.4184, "step": 5486 }, { "epoch": 3.805131761442441, "grad_norm": 0.37762854782032007, "learning_rate": 1.641056346226052e-06, "loss": 0.3565, "step": 5487 }, { "epoch": 3.8058252427184467, "grad_norm": 0.3791613377168742, "learning_rate": 1.6392636191705818e-06, "loss": 0.3844, "step": 5488 }, { "epoch": 3.806518723994452, "grad_norm": 0.5263251153146938, "learning_rate": 1.6374716798473795e-06, "loss": 0.379, "step": 5489 }, { "epoch": 3.807212205270458, "grad_norm": 0.396715833344976, "learning_rate": 1.6356805286764644e-06, "loss": 0.3649, "step": 5490 }, { "epoch": 3.807905686546463, "grad_norm": 0.39121884866498163, "learning_rate": 1.6338901660776662e-06, "loss": 0.3606, "step": 5491 }, { "epoch": 3.808599167822469, "grad_norm": 0.40171467739184663, "learning_rate": 1.6321005924706346e-06, "loss": 0.3598, "step": 5492 }, { "epoch": 3.809292649098474, "grad_norm": 0.38101576787076014, "learning_rate": 1.6303118082748342e-06, "loss": 0.3951, "step": 5493 }, { "epoch": 3.80998613037448, "grad_norm": 0.429169630095379, "learning_rate": 1.62852381390954e-06, "loss": 0.3907, "step": 5494 }, { "epoch": 3.8106796116504853, "grad_norm": 0.39039933858681025, "learning_rate": 1.6267366097938464e-06, "loss": 0.3363, "step": 5495 }, { "epoch": 3.811373092926491, "grad_norm": 0.4236816842037406, "learning_rate": 1.624950196346663e-06, "loss": 0.3463, "step": 5496 }, { "epoch": 3.8120665742024964, "grad_norm": 0.39379346995752024, "learning_rate": 1.6231645739867062e-06, "loss": 0.3396, "step": 5497 }, { "epoch": 3.812760055478502, "grad_norm": 0.39719999992746835, "learning_rate": 1.6213797431325212e-06, "loss": 0.3458, "step": 5498 }, { "epoch": 3.8134535367545075, "grad_norm": 0.3961760109207296, "learning_rate": 1.6195957042024536e-06, "loss": 0.4086, "step": 5499 }, { "epoch": 3.8141470180305133, "grad_norm": 0.37441061629038924, "learning_rate": 1.6178124576146708e-06, "loss": 0.3603, "step": 5500 }, { "epoch": 3.8148404993065186, "grad_norm": 0.3727463774789075, "learning_rate": 1.6160300037871547e-06, "loss": 0.3843, "step": 5501 }, { "epoch": 3.8155339805825244, "grad_norm": 0.38891618422764807, "learning_rate": 1.6142483431376959e-06, "loss": 0.333, "step": 5502 }, { "epoch": 3.8162274618585297, "grad_norm": 0.416956163142673, "learning_rate": 1.612467476083905e-06, "loss": 0.39, "step": 5503 }, { "epoch": 3.8169209431345354, "grad_norm": 0.3937672991999547, "learning_rate": 1.610687403043205e-06, "loss": 0.375, "step": 5504 }, { "epoch": 3.8176144244105408, "grad_norm": 0.3668701628736196, "learning_rate": 1.6089081244328285e-06, "loss": 0.3484, "step": 5505 }, { "epoch": 3.8183079056865465, "grad_norm": 0.38609486838441315, "learning_rate": 1.6071296406698317e-06, "loss": 0.4038, "step": 5506 }, { "epoch": 3.819001386962552, "grad_norm": 0.4121516078389864, "learning_rate": 1.6053519521710726e-06, "loss": 0.385, "step": 5507 }, { "epoch": 3.8196948682385576, "grad_norm": 0.3967922793826194, "learning_rate": 1.6035750593532312e-06, "loss": 0.3684, "step": 5508 }, { "epoch": 3.820388349514563, "grad_norm": 0.46902752964958344, "learning_rate": 1.601798962632799e-06, "loss": 0.3707, "step": 5509 }, { "epoch": 3.8210818307905687, "grad_norm": 0.3973459652136806, "learning_rate": 1.600023662426078e-06, "loss": 0.3653, "step": 5510 }, { "epoch": 3.821775312066574, "grad_norm": 0.4770230619345637, "learning_rate": 1.5982491591491861e-06, "loss": 0.3606, "step": 5511 }, { "epoch": 3.82246879334258, "grad_norm": 0.4368077722486454, "learning_rate": 1.5964754532180564e-06, "loss": 0.3884, "step": 5512 }, { "epoch": 3.823162274618585, "grad_norm": 0.3862600988526136, "learning_rate": 1.59470254504843e-06, "loss": 0.3755, "step": 5513 }, { "epoch": 3.823855755894591, "grad_norm": 0.6897639021099272, "learning_rate": 1.592930435055864e-06, "loss": 0.3878, "step": 5514 }, { "epoch": 3.8245492371705962, "grad_norm": 0.39283478602548016, "learning_rate": 1.5911591236557288e-06, "loss": 0.3815, "step": 5515 }, { "epoch": 3.825242718446602, "grad_norm": 0.4040414188632428, "learning_rate": 1.589388611263208e-06, "loss": 0.4013, "step": 5516 }, { "epoch": 3.8259361997226073, "grad_norm": 0.6232878797513218, "learning_rate": 1.5876188982932966e-06, "loss": 0.3434, "step": 5517 }, { "epoch": 3.826629680998613, "grad_norm": 0.39002720526485685, "learning_rate": 1.5858499851608006e-06, "loss": 0.4065, "step": 5518 }, { "epoch": 3.8273231622746184, "grad_norm": 0.3841094765149276, "learning_rate": 1.5840818722803413e-06, "loss": 0.3786, "step": 5519 }, { "epoch": 3.828016643550624, "grad_norm": 0.3968586001557126, "learning_rate": 1.5823145600663536e-06, "loss": 0.3983, "step": 5520 }, { "epoch": 3.8287101248266295, "grad_norm": 0.40974053765869695, "learning_rate": 1.5805480489330798e-06, "loss": 0.4008, "step": 5521 }, { "epoch": 3.8294036061026353, "grad_norm": 0.41412119996826335, "learning_rate": 1.5787823392945794e-06, "loss": 0.3833, "step": 5522 }, { "epoch": 3.8300970873786406, "grad_norm": 0.3735504340365167, "learning_rate": 1.5770174315647185e-06, "loss": 0.361, "step": 5523 }, { "epoch": 3.8307905686546464, "grad_norm": 0.42026566451047853, "learning_rate": 1.575253326157183e-06, "loss": 0.3776, "step": 5524 }, { "epoch": 3.8314840499306517, "grad_norm": 0.4014227174885103, "learning_rate": 1.5734900234854655e-06, "loss": 0.4175, "step": 5525 }, { "epoch": 3.8321775312066575, "grad_norm": 0.5201046773001922, "learning_rate": 1.5717275239628693e-06, "loss": 0.413, "step": 5526 }, { "epoch": 3.832871012482663, "grad_norm": 0.38269945858021276, "learning_rate": 1.5699658280025143e-06, "loss": 0.3735, "step": 5527 }, { "epoch": 3.8335644937586686, "grad_norm": 0.40546504630034674, "learning_rate": 1.5682049360173263e-06, "loss": 0.3789, "step": 5528 }, { "epoch": 3.834257975034674, "grad_norm": 0.4188339497934117, "learning_rate": 1.5664448484200468e-06, "loss": 0.3659, "step": 5529 }, { "epoch": 3.8349514563106797, "grad_norm": 0.43039825713556573, "learning_rate": 1.5646855656232296e-06, "loss": 0.4021, "step": 5530 }, { "epoch": 3.835644937586685, "grad_norm": 0.4469033144459223, "learning_rate": 1.5629270880392345e-06, "loss": 0.4061, "step": 5531 }, { "epoch": 3.836338418862691, "grad_norm": 0.36411846985312857, "learning_rate": 1.5611694160802377e-06, "loss": 0.3798, "step": 5532 }, { "epoch": 3.837031900138696, "grad_norm": 0.4165456791596007, "learning_rate": 1.5594125501582241e-06, "loss": 0.3771, "step": 5533 }, { "epoch": 3.837725381414702, "grad_norm": 0.4798051615631476, "learning_rate": 1.5576564906849918e-06, "loss": 0.4217, "step": 5534 }, { "epoch": 3.838418862690707, "grad_norm": 0.4654707514678164, "learning_rate": 1.5559012380721484e-06, "loss": 0.4034, "step": 5535 }, { "epoch": 3.839112343966713, "grad_norm": 0.43916135815319507, "learning_rate": 1.5541467927311093e-06, "loss": 0.3637, "step": 5536 }, { "epoch": 3.8398058252427183, "grad_norm": 0.40499701686065925, "learning_rate": 1.552393155073107e-06, "loss": 0.3487, "step": 5537 }, { "epoch": 3.840499306518724, "grad_norm": 0.5294692785153393, "learning_rate": 1.5506403255091812e-06, "loss": 0.335, "step": 5538 }, { "epoch": 3.8411927877947294, "grad_norm": 0.39026294615982876, "learning_rate": 1.5488883044501807e-06, "loss": 0.3956, "step": 5539 }, { "epoch": 3.841886269070735, "grad_norm": 0.368752034052551, "learning_rate": 1.5471370923067668e-06, "loss": 0.3897, "step": 5540 }, { "epoch": 3.8425797503467405, "grad_norm": 0.40896760470203153, "learning_rate": 1.5453866894894126e-06, "loss": 0.3944, "step": 5541 }, { "epoch": 3.8432732316227463, "grad_norm": 0.4001042563506018, "learning_rate": 1.543637096408398e-06, "loss": 0.3849, "step": 5542 }, { "epoch": 3.8439667128987516, "grad_norm": 0.4139012941974305, "learning_rate": 1.5418883134738178e-06, "loss": 0.3809, "step": 5543 }, { "epoch": 3.8446601941747574, "grad_norm": 0.3973421077339701, "learning_rate": 1.5401403410955707e-06, "loss": 0.4022, "step": 5544 }, { "epoch": 3.8453536754507627, "grad_norm": 0.37951204611751804, "learning_rate": 1.5383931796833702e-06, "loss": 0.3424, "step": 5545 }, { "epoch": 3.8460471567267684, "grad_norm": 0.4156068002622164, "learning_rate": 1.5366468296467397e-06, "loss": 0.4001, "step": 5546 }, { "epoch": 3.8467406380027738, "grad_norm": 0.37240364982023866, "learning_rate": 1.534901291395008e-06, "loss": 0.3422, "step": 5547 }, { "epoch": 3.8474341192787795, "grad_norm": 0.37031637090926967, "learning_rate": 1.5331565653373176e-06, "loss": 0.3451, "step": 5548 }, { "epoch": 3.848127600554785, "grad_norm": 0.4410069310255653, "learning_rate": 1.5314126518826222e-06, "loss": 0.3871, "step": 5549 }, { "epoch": 3.8488210818307906, "grad_norm": 0.4299440737340537, "learning_rate": 1.5296695514396776e-06, "loss": 0.4399, "step": 5550 }, { "epoch": 3.849514563106796, "grad_norm": 0.9455654023015365, "learning_rate": 1.52792726441706e-06, "loss": 0.4149, "step": 5551 }, { "epoch": 3.8502080443828017, "grad_norm": 0.3790694636307669, "learning_rate": 1.5261857912231438e-06, "loss": 0.3137, "step": 5552 }, { "epoch": 3.850901525658807, "grad_norm": 0.4147510509227642, "learning_rate": 1.5244451322661197e-06, "loss": 0.3942, "step": 5553 }, { "epoch": 3.851595006934813, "grad_norm": 0.43063851256519503, "learning_rate": 1.5227052879539872e-06, "loss": 0.3755, "step": 5554 }, { "epoch": 3.852288488210818, "grad_norm": 0.45412017914645153, "learning_rate": 1.5209662586945496e-06, "loss": 0.414, "step": 5555 }, { "epoch": 3.852981969486824, "grad_norm": 0.4675562645996263, "learning_rate": 1.5192280448954244e-06, "loss": 0.3895, "step": 5556 }, { "epoch": 3.8536754507628292, "grad_norm": 0.39443791189051286, "learning_rate": 1.5174906469640387e-06, "loss": 0.381, "step": 5557 }, { "epoch": 3.854368932038835, "grad_norm": 0.38723469162640956, "learning_rate": 1.515754065307622e-06, "loss": 0.3542, "step": 5558 }, { "epoch": 3.8550624133148403, "grad_norm": 0.4032888749927211, "learning_rate": 1.5140183003332182e-06, "loss": 0.3902, "step": 5559 }, { "epoch": 3.855755894590846, "grad_norm": 0.43150375891932785, "learning_rate": 1.5122833524476782e-06, "loss": 0.3759, "step": 5560 }, { "epoch": 3.8564493758668514, "grad_norm": 0.37765765952643526, "learning_rate": 1.5105492220576612e-06, "loss": 0.4069, "step": 5561 }, { "epoch": 3.857142857142857, "grad_norm": 0.4376355760621776, "learning_rate": 1.5088159095696365e-06, "loss": 0.3218, "step": 5562 }, { "epoch": 3.8578363384188625, "grad_norm": 0.41721282862940745, "learning_rate": 1.5070834153898766e-06, "loss": 0.3419, "step": 5563 }, { "epoch": 3.8585298196948683, "grad_norm": 0.35353951227298963, "learning_rate": 1.5053517399244672e-06, "loss": 0.3654, "step": 5564 }, { "epoch": 3.8592233009708736, "grad_norm": 0.40098806127352365, "learning_rate": 1.503620883579302e-06, "loss": 0.3776, "step": 5565 }, { "epoch": 3.8599167822468794, "grad_norm": 0.4240605348191167, "learning_rate": 1.5018908467600778e-06, "loss": 0.3789, "step": 5566 }, { "epoch": 3.8606102635228847, "grad_norm": 0.3803201875493985, "learning_rate": 1.500161629872307e-06, "loss": 0.4193, "step": 5567 }, { "epoch": 3.8613037447988905, "grad_norm": 0.4062339254914316, "learning_rate": 1.4984332333212998e-06, "loss": 0.4119, "step": 5568 }, { "epoch": 3.861997226074896, "grad_norm": 0.4358161580514588, "learning_rate": 1.4967056575121842e-06, "loss": 0.4269, "step": 5569 }, { "epoch": 3.8626907073509016, "grad_norm": 0.5367904873711541, "learning_rate": 1.4949789028498923e-06, "loss": 0.3777, "step": 5570 }, { "epoch": 3.863384188626907, "grad_norm": 0.39838761560473596, "learning_rate": 1.4932529697391596e-06, "loss": 0.3914, "step": 5571 }, { "epoch": 3.8640776699029127, "grad_norm": 0.4300381966464757, "learning_rate": 1.491527858584535e-06, "loss": 0.3728, "step": 5572 }, { "epoch": 3.864771151178918, "grad_norm": 0.4246401021355444, "learning_rate": 1.4898035697903694e-06, "loss": 0.3806, "step": 5573 }, { "epoch": 3.8654646324549238, "grad_norm": 0.3955762135866378, "learning_rate": 1.488080103760825e-06, "loss": 0.378, "step": 5574 }, { "epoch": 3.866158113730929, "grad_norm": 0.37439770032132935, "learning_rate": 1.4863574608998716e-06, "loss": 0.3427, "step": 5575 }, { "epoch": 3.866851595006935, "grad_norm": 0.4127301950287792, "learning_rate": 1.4846356416112805e-06, "loss": 0.4105, "step": 5576 }, { "epoch": 3.86754507628294, "grad_norm": 0.4176112273482195, "learning_rate": 1.4829146462986354e-06, "loss": 0.3045, "step": 5577 }, { "epoch": 3.868238557558946, "grad_norm": 0.3920737918779999, "learning_rate": 1.4811944753653256e-06, "loss": 0.3682, "step": 5578 }, { "epoch": 3.8689320388349513, "grad_norm": 0.41651613206030125, "learning_rate": 1.4794751292145465e-06, "loss": 0.4184, "step": 5579 }, { "epoch": 3.869625520110957, "grad_norm": 0.3790228975308277, "learning_rate": 1.4777566082493017e-06, "loss": 0.3893, "step": 5580 }, { "epoch": 3.8703190013869624, "grad_norm": 0.43957142965703966, "learning_rate": 1.4760389128723968e-06, "loss": 0.4308, "step": 5581 }, { "epoch": 3.871012482662968, "grad_norm": 0.37992295707935186, "learning_rate": 1.4743220434864492e-06, "loss": 0.3832, "step": 5582 }, { "epoch": 3.8717059639389735, "grad_norm": 0.419379184564309, "learning_rate": 1.4726060004938819e-06, "loss": 0.4206, "step": 5583 }, { "epoch": 3.8723994452149793, "grad_norm": 0.42429959323969607, "learning_rate": 1.47089078429692e-06, "loss": 0.3823, "step": 5584 }, { "epoch": 3.8730929264909846, "grad_norm": 0.36699942270029495, "learning_rate": 1.4691763952975996e-06, "loss": 0.3717, "step": 5585 }, { "epoch": 3.8737864077669903, "grad_norm": 0.3875732509319577, "learning_rate": 1.4674628338977604e-06, "loss": 0.3553, "step": 5586 }, { "epoch": 3.8744798890429957, "grad_norm": 0.3850202003941385, "learning_rate": 1.4657501004990488e-06, "loss": 0.364, "step": 5587 }, { "epoch": 3.8751733703190014, "grad_norm": 1.5284101176859395, "learning_rate": 1.4640381955029193e-06, "loss": 0.4049, "step": 5588 }, { "epoch": 3.875866851595007, "grad_norm": 0.38956167245449974, "learning_rate": 1.4623271193106264e-06, "loss": 0.3588, "step": 5589 }, { "epoch": 3.8765603328710125, "grad_norm": 0.40227087344775136, "learning_rate": 1.460616872323236e-06, "loss": 0.3976, "step": 5590 }, { "epoch": 3.877253814147018, "grad_norm": 0.3940537743117503, "learning_rate": 1.4589074549416188e-06, "loss": 0.3805, "step": 5591 }, { "epoch": 3.8779472954230236, "grad_norm": 0.41411441939648697, "learning_rate": 1.4571988675664467e-06, "loss": 0.4063, "step": 5592 }, { "epoch": 3.8786407766990294, "grad_norm": 0.5979411231554002, "learning_rate": 1.4554911105982022e-06, "loss": 0.3726, "step": 5593 }, { "epoch": 3.8793342579750347, "grad_norm": 0.36969918434948273, "learning_rate": 1.4537841844371719e-06, "loss": 0.3836, "step": 5594 }, { "epoch": 3.88002773925104, "grad_norm": 0.40293111629174605, "learning_rate": 1.452078089483443e-06, "loss": 0.3681, "step": 5595 }, { "epoch": 3.880721220527046, "grad_norm": 0.4069882307746121, "learning_rate": 1.4503728261369176e-06, "loss": 0.3766, "step": 5596 }, { "epoch": 3.8814147018030516, "grad_norm": 0.3714067860738447, "learning_rate": 1.448668394797293e-06, "loss": 0.3473, "step": 5597 }, { "epoch": 3.882108183079057, "grad_norm": 0.4829169868726462, "learning_rate": 1.4469647958640758e-06, "loss": 0.3978, "step": 5598 }, { "epoch": 3.8828016643550622, "grad_norm": 0.4325020462857424, "learning_rate": 1.4452620297365804e-06, "loss": 0.4641, "step": 5599 }, { "epoch": 3.883495145631068, "grad_norm": 0.3964269076699933, "learning_rate": 1.4435600968139192e-06, "loss": 0.4125, "step": 5600 }, { "epoch": 3.884188626907074, "grad_norm": 0.3801322339862502, "learning_rate": 1.4418589974950142e-06, "loss": 0.3813, "step": 5601 }, { "epoch": 3.884882108183079, "grad_norm": 1.120420651707417, "learning_rate": 1.4401587321785927e-06, "loss": 0.3464, "step": 5602 }, { "epoch": 3.8855755894590844, "grad_norm": 0.3850211652434562, "learning_rate": 1.438459301263181e-06, "loss": 0.3426, "step": 5603 }, { "epoch": 3.88626907073509, "grad_norm": 0.5067998945282288, "learning_rate": 1.436760705147115e-06, "loss": 0.4075, "step": 5604 }, { "epoch": 3.886962552011096, "grad_norm": 0.4173378607138257, "learning_rate": 1.4350629442285336e-06, "loss": 0.3854, "step": 5605 }, { "epoch": 3.8876560332871013, "grad_norm": 0.38862495150285326, "learning_rate": 1.4333660189053794e-06, "loss": 0.3792, "step": 5606 }, { "epoch": 3.8883495145631066, "grad_norm": 0.449690086866131, "learning_rate": 1.4316699295754016e-06, "loss": 0.38, "step": 5607 }, { "epoch": 3.8890429958391124, "grad_norm": 1.0670879888431246, "learning_rate": 1.4299746766361477e-06, "loss": 0.3595, "step": 5608 }, { "epoch": 3.889736477115118, "grad_norm": 0.5401108553541221, "learning_rate": 1.4282802604849754e-06, "loss": 0.4003, "step": 5609 }, { "epoch": 3.8904299583911235, "grad_norm": 0.3794215135031165, "learning_rate": 1.426586681519041e-06, "loss": 0.3628, "step": 5610 }, { "epoch": 3.891123439667129, "grad_norm": 0.37930390905488165, "learning_rate": 1.424893940135309e-06, "loss": 0.3797, "step": 5611 }, { "epoch": 3.8918169209431346, "grad_norm": 0.48747810963585964, "learning_rate": 1.4232020367305466e-06, "loss": 0.3522, "step": 5612 }, { "epoch": 3.8925104022191404, "grad_norm": 0.3823598611920294, "learning_rate": 1.4215109717013193e-06, "loss": 0.3676, "step": 5613 }, { "epoch": 3.8932038834951457, "grad_norm": 0.3884920884811159, "learning_rate": 1.4198207454440048e-06, "loss": 0.3588, "step": 5614 }, { "epoch": 3.893897364771151, "grad_norm": 0.42249768676696087, "learning_rate": 1.4181313583547807e-06, "loss": 0.4018, "step": 5615 }, { "epoch": 3.8945908460471568, "grad_norm": 0.3855575000463757, "learning_rate": 1.416442810829623e-06, "loss": 0.3596, "step": 5616 }, { "epoch": 3.8952843273231625, "grad_norm": 0.435829374064413, "learning_rate": 1.4147551032643192e-06, "loss": 0.3645, "step": 5617 }, { "epoch": 3.895977808599168, "grad_norm": 0.6052173329254579, "learning_rate": 1.4130682360544518e-06, "loss": 0.4031, "step": 5618 }, { "epoch": 3.896671289875173, "grad_norm": 0.4072705047921821, "learning_rate": 1.4113822095954122e-06, "loss": 0.3953, "step": 5619 }, { "epoch": 3.897364771151179, "grad_norm": 0.429474477464326, "learning_rate": 1.4096970242823943e-06, "loss": 0.3595, "step": 5620 }, { "epoch": 3.8980582524271847, "grad_norm": 0.5287925548365876, "learning_rate": 1.40801268051039e-06, "loss": 0.3784, "step": 5621 }, { "epoch": 3.89875173370319, "grad_norm": 0.3807454717369341, "learning_rate": 1.406329178674199e-06, "loss": 0.3722, "step": 5622 }, { "epoch": 3.8994452149791954, "grad_norm": 0.38783373190927745, "learning_rate": 1.4046465191684223e-06, "loss": 0.3748, "step": 5623 }, { "epoch": 3.900138696255201, "grad_norm": 0.4601025319851407, "learning_rate": 1.4029647023874621e-06, "loss": 0.4235, "step": 5624 }, { "epoch": 3.900832177531207, "grad_norm": 0.38491422370673306, "learning_rate": 1.4012837287255266e-06, "loss": 0.3693, "step": 5625 }, { "epoch": 3.9015256588072122, "grad_norm": 0.3699367415628008, "learning_rate": 1.3996035985766205e-06, "loss": 0.3546, "step": 5626 }, { "epoch": 3.9022191400832176, "grad_norm": 0.3912934283652068, "learning_rate": 1.3979243123345554e-06, "loss": 0.3539, "step": 5627 }, { "epoch": 3.9029126213592233, "grad_norm": 0.41547712306912676, "learning_rate": 1.396245870392946e-06, "loss": 0.3968, "step": 5628 }, { "epoch": 3.903606102635229, "grad_norm": 0.41775988278850457, "learning_rate": 1.3945682731452032e-06, "loss": 0.3588, "step": 5629 }, { "epoch": 3.9042995839112344, "grad_norm": 0.4095847301634584, "learning_rate": 1.3928915209845451e-06, "loss": 0.3949, "step": 5630 }, { "epoch": 3.9049930651872398, "grad_norm": 0.3992895852152204, "learning_rate": 1.3912156143039906e-06, "loss": 0.4135, "step": 5631 }, { "epoch": 3.9056865464632455, "grad_norm": 0.3959167841660771, "learning_rate": 1.3895405534963607e-06, "loss": 0.3448, "step": 5632 }, { "epoch": 3.9063800277392513, "grad_norm": 0.40669169154979523, "learning_rate": 1.3878663389542779e-06, "loss": 0.3697, "step": 5633 }, { "epoch": 3.9070735090152566, "grad_norm": 0.40659905161168647, "learning_rate": 1.3861929710701633e-06, "loss": 0.3569, "step": 5634 }, { "epoch": 3.907766990291262, "grad_norm": 0.3980486492646797, "learning_rate": 1.3845204502362442e-06, "loss": 0.3224, "step": 5635 }, { "epoch": 3.9084604715672677, "grad_norm": 0.38492900940580815, "learning_rate": 1.3828487768445482e-06, "loss": 0.3685, "step": 5636 }, { "epoch": 3.9091539528432735, "grad_norm": 0.38464183219509007, "learning_rate": 1.381177951286901e-06, "loss": 0.3892, "step": 5637 }, { "epoch": 3.909847434119279, "grad_norm": 0.38406230364296096, "learning_rate": 1.3795079739549332e-06, "loss": 0.3962, "step": 5638 }, { "epoch": 3.910540915395284, "grad_norm": 0.3655593241734113, "learning_rate": 1.377838845240077e-06, "loss": 0.3683, "step": 5639 }, { "epoch": 3.91123439667129, "grad_norm": 0.4028183338449733, "learning_rate": 1.3761705655335595e-06, "loss": 0.3823, "step": 5640 }, { "epoch": 3.9119278779472957, "grad_norm": 0.37947637095830566, "learning_rate": 1.37450313522642e-06, "loss": 0.339, "step": 5641 }, { "epoch": 3.912621359223301, "grad_norm": 0.41724287597837223, "learning_rate": 1.3728365547094863e-06, "loss": 0.3946, "step": 5642 }, { "epoch": 3.9133148404993063, "grad_norm": 0.417172806676931, "learning_rate": 1.3711708243733951e-06, "loss": 0.3921, "step": 5643 }, { "epoch": 3.914008321775312, "grad_norm": 0.35462495501465946, "learning_rate": 1.369505944608583e-06, "loss": 0.3398, "step": 5644 }, { "epoch": 3.914701803051318, "grad_norm": 0.4499823715764414, "learning_rate": 1.367841915805283e-06, "loss": 0.4093, "step": 5645 }, { "epoch": 3.915395284327323, "grad_norm": 0.4173737991050466, "learning_rate": 1.3661787383535324e-06, "loss": 0.411, "step": 5646 }, { "epoch": 3.9160887656033285, "grad_norm": 0.43895724692895477, "learning_rate": 1.3645164126431697e-06, "loss": 0.3407, "step": 5647 }, { "epoch": 3.9167822468793343, "grad_norm": 0.41000579747124954, "learning_rate": 1.362854939063829e-06, "loss": 0.3834, "step": 5648 }, { "epoch": 3.91747572815534, "grad_norm": 0.48735253813451135, "learning_rate": 1.3611943180049491e-06, "loss": 0.4073, "step": 5649 }, { "epoch": 3.9181692094313454, "grad_norm": 0.3858682781790578, "learning_rate": 1.3595345498557677e-06, "loss": 0.3388, "step": 5650 }, { "epoch": 3.9188626907073507, "grad_norm": 0.47724094074796924, "learning_rate": 1.3578756350053219e-06, "loss": 0.3776, "step": 5651 }, { "epoch": 3.9195561719833565, "grad_norm": 0.3862194568565286, "learning_rate": 1.3562175738424515e-06, "loss": 0.3993, "step": 5652 }, { "epoch": 3.9202496532593623, "grad_norm": 0.41274317453823883, "learning_rate": 1.3545603667557911e-06, "loss": 0.3737, "step": 5653 }, { "epoch": 3.9209431345353676, "grad_norm": 0.3887218217029318, "learning_rate": 1.3529040141337801e-06, "loss": 0.3912, "step": 5654 }, { "epoch": 3.921636615811373, "grad_norm": 0.5143223803741318, "learning_rate": 1.3512485163646537e-06, "loss": 0.3523, "step": 5655 }, { "epoch": 3.9223300970873787, "grad_norm": 0.43330889320819416, "learning_rate": 1.3495938738364496e-06, "loss": 0.3946, "step": 5656 }, { "epoch": 3.9230235783633844, "grad_norm": 0.4027388474860819, "learning_rate": 1.3479400869370052e-06, "loss": 0.3851, "step": 5657 }, { "epoch": 3.9237170596393898, "grad_norm": 0.42135575554887195, "learning_rate": 1.346287156053952e-06, "loss": 0.3982, "step": 5658 }, { "epoch": 3.924410540915395, "grad_norm": 0.43683122299483046, "learning_rate": 1.344635081574731e-06, "loss": 0.4228, "step": 5659 }, { "epoch": 3.925104022191401, "grad_norm": 0.45956794064458506, "learning_rate": 1.3429838638865721e-06, "loss": 0.3567, "step": 5660 }, { "epoch": 3.9257975034674066, "grad_norm": 0.4253665212883945, "learning_rate": 1.3413335033765102e-06, "loss": 0.3552, "step": 5661 }, { "epoch": 3.926490984743412, "grad_norm": 0.39429871151008744, "learning_rate": 1.3396840004313789e-06, "loss": 0.382, "step": 5662 }, { "epoch": 3.9271844660194173, "grad_norm": 0.4259865437463772, "learning_rate": 1.3380353554378074e-06, "loss": 0.3401, "step": 5663 }, { "epoch": 3.927877947295423, "grad_norm": 0.40489142307187015, "learning_rate": 1.3363875687822276e-06, "loss": 0.3548, "step": 5664 }, { "epoch": 3.928571428571429, "grad_norm": 0.4493377903861885, "learning_rate": 1.3347406408508695e-06, "loss": 0.4119, "step": 5665 }, { "epoch": 3.929264909847434, "grad_norm": 0.9507820720744213, "learning_rate": 1.3330945720297594e-06, "loss": 0.4008, "step": 5666 }, { "epoch": 3.9299583911234395, "grad_norm": 0.5555360984295558, "learning_rate": 1.3314493627047242e-06, "loss": 0.4077, "step": 5667 }, { "epoch": 3.9306518723994452, "grad_norm": 0.38394572692282086, "learning_rate": 1.3298050132613893e-06, "loss": 0.374, "step": 5668 }, { "epoch": 3.931345353675451, "grad_norm": 0.4016585953908722, "learning_rate": 1.3281615240851787e-06, "loss": 0.335, "step": 5669 }, { "epoch": 3.9320388349514563, "grad_norm": 0.3521420652161348, "learning_rate": 1.3265188955613156e-06, "loss": 0.3518, "step": 5670 }, { "epoch": 3.9327323162274617, "grad_norm": 0.39605405300661, "learning_rate": 1.3248771280748174e-06, "loss": 0.3511, "step": 5671 }, { "epoch": 3.9334257975034674, "grad_norm": 0.4060404594464979, "learning_rate": 1.3232362220105038e-06, "loss": 0.3475, "step": 5672 }, { "epoch": 3.934119278779473, "grad_norm": 0.3945904332268029, "learning_rate": 1.3215961777529928e-06, "loss": 0.3639, "step": 5673 }, { "epoch": 3.9348127600554785, "grad_norm": 0.6167005425479277, "learning_rate": 1.3199569956866964e-06, "loss": 0.3306, "step": 5674 }, { "epoch": 3.935506241331484, "grad_norm": 0.4431107557237783, "learning_rate": 1.3183186761958278e-06, "loss": 0.3546, "step": 5675 }, { "epoch": 3.9361997226074896, "grad_norm": 0.4196952370798847, "learning_rate": 1.3166812196643974e-06, "loss": 0.3617, "step": 5676 }, { "epoch": 3.9368932038834954, "grad_norm": 0.5529102129775423, "learning_rate": 1.3150446264762134e-06, "loss": 0.3736, "step": 5677 }, { "epoch": 3.9375866851595007, "grad_norm": 0.36804803724176843, "learning_rate": 1.3134088970148828e-06, "loss": 0.3351, "step": 5678 }, { "epoch": 3.938280166435506, "grad_norm": 0.42122798419618446, "learning_rate": 1.3117740316638055e-06, "loss": 0.3655, "step": 5679 }, { "epoch": 3.938973647711512, "grad_norm": 0.3950545471418325, "learning_rate": 1.310140030806184e-06, "loss": 0.3039, "step": 5680 }, { "epoch": 3.9396671289875176, "grad_norm": 0.43208510261210614, "learning_rate": 1.3085068948250174e-06, "loss": 0.4075, "step": 5681 }, { "epoch": 3.940360610263523, "grad_norm": 0.4096528815786458, "learning_rate": 1.3068746241030983e-06, "loss": 0.3788, "step": 5682 }, { "epoch": 3.9410540915395282, "grad_norm": 0.39946228193954975, "learning_rate": 1.3052432190230202e-06, "loss": 0.4224, "step": 5683 }, { "epoch": 3.941747572815534, "grad_norm": 0.3741483253809102, "learning_rate": 1.3036126799671734e-06, "loss": 0.3911, "step": 5684 }, { "epoch": 3.9424410540915398, "grad_norm": 0.380714868716621, "learning_rate": 1.301983007317743e-06, "loss": 0.3652, "step": 5685 }, { "epoch": 3.943134535367545, "grad_norm": 0.386574857583611, "learning_rate": 1.3003542014567156e-06, "loss": 0.4137, "step": 5686 }, { "epoch": 3.9438280166435504, "grad_norm": 0.3730005133971035, "learning_rate": 1.2987262627658676e-06, "loss": 0.3908, "step": 5687 }, { "epoch": 3.944521497919556, "grad_norm": 0.38425886028701634, "learning_rate": 1.2970991916267779e-06, "loss": 0.3417, "step": 5688 }, { "epoch": 3.945214979195562, "grad_norm": 0.4158896032084831, "learning_rate": 1.2954729884208212e-06, "loss": 0.431, "step": 5689 }, { "epoch": 3.9459084604715673, "grad_norm": 0.5780251321763046, "learning_rate": 1.293847653529165e-06, "loss": 0.4322, "step": 5690 }, { "epoch": 3.9466019417475726, "grad_norm": 0.4082134245329464, "learning_rate": 1.2922231873327779e-06, "loss": 0.3749, "step": 5691 }, { "epoch": 3.9472954230235784, "grad_norm": 0.39724644301906575, "learning_rate": 1.2905995902124242e-06, "loss": 0.3916, "step": 5692 }, { "epoch": 3.947988904299584, "grad_norm": 0.46077900169185376, "learning_rate": 1.2889768625486588e-06, "loss": 0.4387, "step": 5693 }, { "epoch": 3.9486823855755895, "grad_norm": 0.3837876641922954, "learning_rate": 1.287355004721843e-06, "loss": 0.3935, "step": 5694 }, { "epoch": 3.949375866851595, "grad_norm": 0.42749401495919626, "learning_rate": 1.2857340171121246e-06, "loss": 0.4038, "step": 5695 }, { "epoch": 3.9500693481276006, "grad_norm": 0.40319675594681176, "learning_rate": 1.2841139000994524e-06, "loss": 0.4052, "step": 5696 }, { "epoch": 3.9507628294036063, "grad_norm": 0.39906374254605154, "learning_rate": 1.2824946540635725e-06, "loss": 0.3877, "step": 5697 }, { "epoch": 3.9514563106796117, "grad_norm": 0.5190401484270997, "learning_rate": 1.28087627938402e-06, "loss": 0.3673, "step": 5698 }, { "epoch": 3.952149791955617, "grad_norm": 0.3896400631056696, "learning_rate": 1.2792587764401343e-06, "loss": 0.3508, "step": 5699 }, { "epoch": 3.9528432732316228, "grad_norm": 0.44636566145299666, "learning_rate": 1.2776421456110427e-06, "loss": 0.4022, "step": 5700 }, { "epoch": 3.9535367545076285, "grad_norm": 0.4054930476058675, "learning_rate": 1.276026387275674e-06, "loss": 0.3983, "step": 5701 }, { "epoch": 3.954230235783634, "grad_norm": 0.41704845024078757, "learning_rate": 1.2744115018127494e-06, "loss": 0.406, "step": 5702 }, { "epoch": 3.954923717059639, "grad_norm": 0.3559236138431055, "learning_rate": 1.2727974896007871e-06, "loss": 0.3662, "step": 5703 }, { "epoch": 3.955617198335645, "grad_norm": 0.3939122948268484, "learning_rate": 1.271184351018101e-06, "loss": 0.3952, "step": 5704 }, { "epoch": 3.9563106796116507, "grad_norm": 0.5070237755574002, "learning_rate": 1.2695720864427963e-06, "loss": 0.3815, "step": 5705 }, { "epoch": 3.957004160887656, "grad_norm": 0.37621573730475083, "learning_rate": 1.2679606962527774e-06, "loss": 0.3958, "step": 5706 }, { "epoch": 3.9576976421636614, "grad_norm": 0.3873877405344169, "learning_rate": 1.2663501808257444e-06, "loss": 0.3198, "step": 5707 }, { "epoch": 3.958391123439667, "grad_norm": 0.40384457113927813, "learning_rate": 1.2647405405391867e-06, "loss": 0.3937, "step": 5708 }, { "epoch": 3.959084604715673, "grad_norm": 0.5014507156443899, "learning_rate": 1.2631317757703942e-06, "loss": 0.4072, "step": 5709 }, { "epoch": 3.9597780859916782, "grad_norm": 0.37142258371238035, "learning_rate": 1.261523886896452e-06, "loss": 0.3422, "step": 5710 }, { "epoch": 3.9604715672676836, "grad_norm": 0.5402782594966292, "learning_rate": 1.259916874294232e-06, "loss": 0.4034, "step": 5711 }, { "epoch": 3.9611650485436893, "grad_norm": 0.4056859169048281, "learning_rate": 1.2583107383404125e-06, "loss": 0.3558, "step": 5712 }, { "epoch": 3.961858529819695, "grad_norm": 0.38955498094338076, "learning_rate": 1.2567054794114558e-06, "loss": 0.3587, "step": 5713 }, { "epoch": 3.9625520110957004, "grad_norm": 0.38933492535119524, "learning_rate": 1.2551010978836247e-06, "loss": 0.387, "step": 5714 }, { "epoch": 3.9632454923717058, "grad_norm": 0.389806625283031, "learning_rate": 1.2534975941329758e-06, "loss": 0.3761, "step": 5715 }, { "epoch": 3.9639389736477115, "grad_norm": 3.454146150584736, "learning_rate": 1.251894968535356e-06, "loss": 0.3615, "step": 5716 }, { "epoch": 3.9646324549237173, "grad_norm": 0.3867506930870249, "learning_rate": 1.250293221466411e-06, "loss": 0.3645, "step": 5717 }, { "epoch": 3.9653259361997226, "grad_norm": 0.41817483163562436, "learning_rate": 1.2486923533015788e-06, "loss": 0.3917, "step": 5718 }, { "epoch": 3.966019417475728, "grad_norm": 0.4368784443128529, "learning_rate": 1.2470923644160898e-06, "loss": 0.3642, "step": 5719 }, { "epoch": 3.9667128987517337, "grad_norm": 0.4413408560547274, "learning_rate": 1.2454932551849708e-06, "loss": 0.3581, "step": 5720 }, { "epoch": 3.9674063800277395, "grad_norm": 0.4303083673464683, "learning_rate": 1.2438950259830412e-06, "loss": 0.3489, "step": 5721 }, { "epoch": 3.968099861303745, "grad_norm": 0.41365343058986725, "learning_rate": 1.2422976771849144e-06, "loss": 0.3979, "step": 5722 }, { "epoch": 3.96879334257975, "grad_norm": 0.4046215559080561, "learning_rate": 1.2407012091649996e-06, "loss": 0.3596, "step": 5723 }, { "epoch": 3.969486823855756, "grad_norm": 0.42790200349990853, "learning_rate": 1.2391056222974928e-06, "loss": 0.3709, "step": 5724 }, { "epoch": 3.9701803051317617, "grad_norm": 0.40952428765560905, "learning_rate": 1.2375109169563915e-06, "loss": 0.3867, "step": 5725 }, { "epoch": 3.970873786407767, "grad_norm": 0.44226633604353166, "learning_rate": 1.235917093515483e-06, "loss": 0.3773, "step": 5726 }, { "epoch": 3.9715672676837723, "grad_norm": 0.37553802642405126, "learning_rate": 1.2343241523483452e-06, "loss": 0.3348, "step": 5727 }, { "epoch": 3.972260748959778, "grad_norm": 0.39257206808193185, "learning_rate": 1.2327320938283543e-06, "loss": 0.3726, "step": 5728 }, { "epoch": 3.972954230235784, "grad_norm": 0.393711723909782, "learning_rate": 1.2311409183286765e-06, "loss": 0.3728, "step": 5729 }, { "epoch": 3.973647711511789, "grad_norm": 0.4296746345286358, "learning_rate": 1.2295506262222723e-06, "loss": 0.4195, "step": 5730 }, { "epoch": 3.9743411927877945, "grad_norm": 0.41507157274395867, "learning_rate": 1.2279612178818955e-06, "loss": 0.3618, "step": 5731 }, { "epoch": 3.9750346740638003, "grad_norm": 0.38761962885321694, "learning_rate": 1.2263726936800895e-06, "loss": 0.371, "step": 5732 }, { "epoch": 3.975728155339806, "grad_norm": 0.393850569894318, "learning_rate": 1.2247850539891947e-06, "loss": 0.3788, "step": 5733 }, { "epoch": 3.9764216366158114, "grad_norm": 0.6760980892034429, "learning_rate": 1.2231982991813428e-06, "loss": 0.3643, "step": 5734 }, { "epoch": 3.9771151178918167, "grad_norm": 0.4030177735429908, "learning_rate": 1.2216124296284554e-06, "loss": 0.3996, "step": 5735 }, { "epoch": 3.9778085991678225, "grad_norm": 0.5547147831236997, "learning_rate": 1.2200274457022503e-06, "loss": 0.3668, "step": 5736 }, { "epoch": 3.9785020804438282, "grad_norm": 0.39146584408378815, "learning_rate": 1.2184433477742375e-06, "loss": 0.3693, "step": 5737 }, { "epoch": 3.9791955617198336, "grad_norm": 0.3911153887592586, "learning_rate": 1.2168601362157134e-06, "loss": 0.38, "step": 5738 }, { "epoch": 3.979889042995839, "grad_norm": 0.41342194857756964, "learning_rate": 1.2152778113977776e-06, "loss": 0.4073, "step": 5739 }, { "epoch": 3.9805825242718447, "grad_norm": 0.38444966834365457, "learning_rate": 1.2136963736913117e-06, "loss": 0.3887, "step": 5740 }, { "epoch": 3.9812760055478504, "grad_norm": 0.42775677441910975, "learning_rate": 1.2121158234669933e-06, "loss": 0.3941, "step": 5741 }, { "epoch": 3.9819694868238558, "grad_norm": 0.40064894573830545, "learning_rate": 1.210536161095295e-06, "loss": 0.4037, "step": 5742 }, { "epoch": 3.982662968099861, "grad_norm": 0.4604868136975249, "learning_rate": 1.2089573869464738e-06, "loss": 0.4009, "step": 5743 }, { "epoch": 3.983356449375867, "grad_norm": 0.38853712077021296, "learning_rate": 1.2073795013905865e-06, "loss": 0.3396, "step": 5744 }, { "epoch": 3.9840499306518726, "grad_norm": 0.4119203939330921, "learning_rate": 1.2058025047974753e-06, "loss": 0.3719, "step": 5745 }, { "epoch": 3.984743411927878, "grad_norm": 0.3683332161664147, "learning_rate": 1.2042263975367785e-06, "loss": 0.3709, "step": 5746 }, { "epoch": 3.9854368932038833, "grad_norm": 0.43071177651480674, "learning_rate": 1.2026511799779234e-06, "loss": 0.4027, "step": 5747 }, { "epoch": 3.986130374479889, "grad_norm": 0.41169056270401494, "learning_rate": 1.20107685249013e-06, "loss": 0.3624, "step": 5748 }, { "epoch": 3.986823855755895, "grad_norm": 0.4329112779834001, "learning_rate": 1.1995034154424111e-06, "loss": 0.3882, "step": 5749 }, { "epoch": 3.9875173370319, "grad_norm": 0.3797986135112781, "learning_rate": 1.1979308692035658e-06, "loss": 0.4042, "step": 5750 }, { "epoch": 3.9882108183079055, "grad_norm": 0.3960584676111404, "learning_rate": 1.1963592141421882e-06, "loss": 0.3884, "step": 5751 }, { "epoch": 3.9889042995839112, "grad_norm": 0.4180044032794779, "learning_rate": 1.1947884506266655e-06, "loss": 0.3754, "step": 5752 }, { "epoch": 3.989597780859917, "grad_norm": 0.3824105358910305, "learning_rate": 1.1932185790251698e-06, "loss": 0.3716, "step": 5753 }, { "epoch": 3.9902912621359223, "grad_norm": 0.5620414926778071, "learning_rate": 1.1916495997056693e-06, "loss": 0.4353, "step": 5754 }, { "epoch": 3.9909847434119277, "grad_norm": 0.41368148127812576, "learning_rate": 1.1900815130359223e-06, "loss": 0.4048, "step": 5755 }, { "epoch": 3.9916782246879334, "grad_norm": 0.4418519608109611, "learning_rate": 1.1885143193834735e-06, "loss": 0.3507, "step": 5756 }, { "epoch": 3.992371705963939, "grad_norm": 0.4403404777799451, "learning_rate": 1.1869480191156668e-06, "loss": 0.4034, "step": 5757 }, { "epoch": 3.9930651872399445, "grad_norm": 0.4105981298795642, "learning_rate": 1.1853826125996277e-06, "loss": 0.3473, "step": 5758 }, { "epoch": 3.99375866851595, "grad_norm": 0.4595860775755933, "learning_rate": 1.183818100202277e-06, "loss": 0.3586, "step": 5759 }, { "epoch": 3.9944521497919556, "grad_norm": 0.38993742758248, "learning_rate": 1.1822544822903275e-06, "loss": 0.358, "step": 5760 }, { "epoch": 3.9951456310679614, "grad_norm": 0.37988196429486354, "learning_rate": 1.1806917592302763e-06, "loss": 0.3809, "step": 5761 }, { "epoch": 3.9958391123439667, "grad_norm": 0.40003527774922076, "learning_rate": 1.1791299313884158e-06, "loss": 0.4013, "step": 5762 }, { "epoch": 3.996532593619972, "grad_norm": 0.362320198547181, "learning_rate": 1.1775689991308292e-06, "loss": 0.3186, "step": 5763 }, { "epoch": 3.997226074895978, "grad_norm": 0.4375356344905873, "learning_rate": 1.176008962823384e-06, "loss": 0.3893, "step": 5764 }, { "epoch": 3.9979195561719836, "grad_norm": 0.984875063636234, "learning_rate": 1.1744498228317436e-06, "loss": 0.3639, "step": 5765 }, { "epoch": 3.998613037447989, "grad_norm": 0.39050098862356036, "learning_rate": 1.1728915795213586e-06, "loss": 0.376, "step": 5766 }, { "epoch": 3.9993065187239942, "grad_norm": 0.352767593457289, "learning_rate": 1.1713342332574702e-06, "loss": 0.3783, "step": 5767 }, { "epoch": 4.0, "grad_norm": 0.41523151464963964, "learning_rate": 1.1697777844051105e-06, "loss": 0.3621, "step": 5768 }, { "epoch": 4.000693481276006, "grad_norm": 0.44340135856638263, "learning_rate": 1.168222233329097e-06, "loss": 0.3281, "step": 5769 }, { "epoch": 4.0013869625520115, "grad_norm": 0.37844197093984355, "learning_rate": 1.166667580394041e-06, "loss": 0.35, "step": 5770 }, { "epoch": 4.002080443828016, "grad_norm": 0.3888541375827065, "learning_rate": 1.165113825964343e-06, "loss": 0.2985, "step": 5771 }, { "epoch": 4.002773925104022, "grad_norm": 0.4093681633758278, "learning_rate": 1.1635609704041896e-06, "loss": 0.3409, "step": 5772 }, { "epoch": 4.003467406380028, "grad_norm": 0.38915627815727133, "learning_rate": 1.1620090140775598e-06, "loss": 0.3878, "step": 5773 }, { "epoch": 4.004160887656034, "grad_norm": 0.36655796389465645, "learning_rate": 1.1604579573482205e-06, "loss": 0.3688, "step": 5774 }, { "epoch": 4.004854368932039, "grad_norm": 0.38891063951151855, "learning_rate": 1.1589078005797294e-06, "loss": 0.3708, "step": 5775 }, { "epoch": 4.005547850208044, "grad_norm": 0.37569681015935014, "learning_rate": 1.1573585441354324e-06, "loss": 0.3471, "step": 5776 }, { "epoch": 4.00624133148405, "grad_norm": 0.3830565705662529, "learning_rate": 1.1558101883784616e-06, "loss": 0.3802, "step": 5777 }, { "epoch": 4.006934812760056, "grad_norm": 0.40636187669279616, "learning_rate": 1.1542627336717422e-06, "loss": 0.3797, "step": 5778 }, { "epoch": 4.007628294036061, "grad_norm": 0.42411507237677165, "learning_rate": 1.1527161803779868e-06, "loss": 0.3936, "step": 5779 }, { "epoch": 4.008321775312067, "grad_norm": 0.38796922795575917, "learning_rate": 1.151170528859694e-06, "loss": 0.307, "step": 5780 }, { "epoch": 4.009015256588072, "grad_norm": 0.38224374417128243, "learning_rate": 1.149625779479156e-06, "loss": 0.3419, "step": 5781 }, { "epoch": 4.009708737864078, "grad_norm": 0.37525329652006423, "learning_rate": 1.1480819325984489e-06, "loss": 0.344, "step": 5782 }, { "epoch": 4.010402219140083, "grad_norm": 0.4250747270169149, "learning_rate": 1.1465389885794376e-06, "loss": 0.402, "step": 5783 }, { "epoch": 4.011095700416089, "grad_norm": 0.46700080359458773, "learning_rate": 1.1449969477837825e-06, "loss": 0.3944, "step": 5784 }, { "epoch": 4.0117891816920945, "grad_norm": 0.36829362749736344, "learning_rate": 1.143455810572922e-06, "loss": 0.3254, "step": 5785 }, { "epoch": 4.0124826629681, "grad_norm": 0.43814893513893954, "learning_rate": 1.1419155773080893e-06, "loss": 0.3457, "step": 5786 }, { "epoch": 4.013176144244105, "grad_norm": 0.38702579584264457, "learning_rate": 1.140376248350305e-06, "loss": 0.3641, "step": 5787 }, { "epoch": 4.013869625520111, "grad_norm": 0.38462620213273563, "learning_rate": 1.1388378240603742e-06, "loss": 0.3292, "step": 5788 }, { "epoch": 4.014563106796117, "grad_norm": 0.41151676921839425, "learning_rate": 1.1373003047988952e-06, "loss": 0.3326, "step": 5789 }, { "epoch": 4.0152565880721225, "grad_norm": 0.3810523645944822, "learning_rate": 1.1357636909262477e-06, "loss": 0.3398, "step": 5790 }, { "epoch": 4.015950069348127, "grad_norm": 0.4253137185116376, "learning_rate": 1.1342279828026054e-06, "loss": 0.3407, "step": 5791 }, { "epoch": 4.016643550624133, "grad_norm": 0.5676880788011671, "learning_rate": 1.1326931807879266e-06, "loss": 0.3715, "step": 5792 }, { "epoch": 4.017337031900139, "grad_norm": 0.42492385061892185, "learning_rate": 1.1311592852419574e-06, "loss": 0.3416, "step": 5793 }, { "epoch": 4.018030513176145, "grad_norm": 0.4274375983234086, "learning_rate": 1.1296262965242345e-06, "loss": 0.3459, "step": 5794 }, { "epoch": 4.01872399445215, "grad_norm": 0.425841892769472, "learning_rate": 1.128094214994075e-06, "loss": 0.3563, "step": 5795 }, { "epoch": 4.019417475728155, "grad_norm": 0.4009974210815654, "learning_rate": 1.1265630410105905e-06, "loss": 0.3872, "step": 5796 }, { "epoch": 4.020110957004161, "grad_norm": 0.37085478252614934, "learning_rate": 1.1250327749326772e-06, "loss": 0.3191, "step": 5797 }, { "epoch": 4.020804438280167, "grad_norm": 0.41412108196268654, "learning_rate": 1.1235034171190167e-06, "loss": 0.3312, "step": 5798 }, { "epoch": 4.021497919556172, "grad_norm": 0.3904627089081514, "learning_rate": 1.12197496792808e-06, "loss": 0.3492, "step": 5799 }, { "epoch": 4.0221914008321775, "grad_norm": 0.7116073319918248, "learning_rate": 1.1204474277181265e-06, "loss": 0.3642, "step": 5800 }, { "epoch": 4.022884882108183, "grad_norm": 0.38217680849421404, "learning_rate": 1.118920796847196e-06, "loss": 0.3412, "step": 5801 }, { "epoch": 4.023578363384189, "grad_norm": 0.39098852916806814, "learning_rate": 1.1173950756731256e-06, "loss": 0.3271, "step": 5802 }, { "epoch": 4.024271844660194, "grad_norm": 0.4464142485224417, "learning_rate": 1.1158702645535285e-06, "loss": 0.3637, "step": 5803 }, { "epoch": 4.0249653259362, "grad_norm": 0.4173176453800928, "learning_rate": 1.1143463638458113e-06, "loss": 0.3696, "step": 5804 }, { "epoch": 4.0256588072122055, "grad_norm": 0.4913713837177101, "learning_rate": 1.112823373907167e-06, "loss": 0.3638, "step": 5805 }, { "epoch": 4.026352288488211, "grad_norm": 0.42034173246720674, "learning_rate": 1.1113012950945695e-06, "loss": 0.3667, "step": 5806 }, { "epoch": 4.027045769764216, "grad_norm": 0.35948903960334144, "learning_rate": 1.1097801277647858e-06, "loss": 0.308, "step": 5807 }, { "epoch": 4.027739251040222, "grad_norm": 0.40789981807820574, "learning_rate": 1.108259872274367e-06, "loss": 0.3499, "step": 5808 }, { "epoch": 4.028432732316228, "grad_norm": 0.4066987525319391, "learning_rate": 1.1067405289796474e-06, "loss": 0.3539, "step": 5809 }, { "epoch": 4.029126213592233, "grad_norm": 0.39672304986182455, "learning_rate": 1.1052220982367518e-06, "loss": 0.3452, "step": 5810 }, { "epoch": 4.029819694868238, "grad_norm": 0.454528790508705, "learning_rate": 1.103704580401589e-06, "loss": 0.3611, "step": 5811 }, { "epoch": 4.030513176144244, "grad_norm": 0.39579841288328005, "learning_rate": 1.1021879758298538e-06, "loss": 0.3343, "step": 5812 }, { "epoch": 4.03120665742025, "grad_norm": 0.44487426916818584, "learning_rate": 1.1006722848770295e-06, "loss": 0.361, "step": 5813 }, { "epoch": 4.031900138696256, "grad_norm": 0.43506610552830605, "learning_rate": 1.0991575078983802e-06, "loss": 0.3671, "step": 5814 }, { "epoch": 4.0325936199722605, "grad_norm": 0.38249264145269624, "learning_rate": 1.0976436452489592e-06, "loss": 0.3084, "step": 5815 }, { "epoch": 4.033287101248266, "grad_norm": 0.41508076013418466, "learning_rate": 1.0961306972836079e-06, "loss": 0.336, "step": 5816 }, { "epoch": 4.033980582524272, "grad_norm": 0.38701935717543295, "learning_rate": 1.0946186643569456e-06, "loss": 0.3503, "step": 5817 }, { "epoch": 4.034674063800278, "grad_norm": 0.4519688878270693, "learning_rate": 1.0931075468233849e-06, "loss": 0.3409, "step": 5818 }, { "epoch": 4.035367545076283, "grad_norm": 0.41867670357078235, "learning_rate": 1.0915973450371198e-06, "loss": 0.3737, "step": 5819 }, { "epoch": 4.0360610263522885, "grad_norm": 0.5001610437351757, "learning_rate": 1.0900880593521312e-06, "loss": 0.3505, "step": 5820 }, { "epoch": 4.036754507628294, "grad_norm": 0.40845155003360933, "learning_rate": 1.0885796901221863e-06, "loss": 0.3388, "step": 5821 }, { "epoch": 4.0374479889043, "grad_norm": 0.40410482736945486, "learning_rate": 1.0870722377008324e-06, "loss": 0.3463, "step": 5822 }, { "epoch": 4.038141470180305, "grad_norm": 0.42609479627173835, "learning_rate": 1.0855657024414074e-06, "loss": 0.3836, "step": 5823 }, { "epoch": 4.038834951456311, "grad_norm": 0.4221382488108252, "learning_rate": 1.0840600846970333e-06, "loss": 0.3815, "step": 5824 }, { "epoch": 4.039528432732316, "grad_norm": 0.4192382643715479, "learning_rate": 1.0825553848206133e-06, "loss": 0.3773, "step": 5825 }, { "epoch": 4.040221914008322, "grad_norm": 0.3643725487085566, "learning_rate": 1.0810516031648415e-06, "loss": 0.3345, "step": 5826 }, { "epoch": 4.040915395284327, "grad_norm": 0.4376391868602908, "learning_rate": 1.0795487400821897e-06, "loss": 0.3359, "step": 5827 }, { "epoch": 4.041608876560333, "grad_norm": 0.4169450078097028, "learning_rate": 1.078046795924919e-06, "loss": 0.3166, "step": 5828 }, { "epoch": 4.042302357836339, "grad_norm": 0.42587114592007663, "learning_rate": 1.0765457710450777e-06, "loss": 0.3008, "step": 5829 }, { "epoch": 4.042995839112344, "grad_norm": 0.38854199686484153, "learning_rate": 1.0750456657944913e-06, "loss": 0.3545, "step": 5830 }, { "epoch": 4.043689320388349, "grad_norm": 0.3964140826905079, "learning_rate": 1.0735464805247763e-06, "loss": 0.3201, "step": 5831 }, { "epoch": 4.044382801664355, "grad_norm": 0.38700042933700374, "learning_rate": 1.0720482155873286e-06, "loss": 0.3495, "step": 5832 }, { "epoch": 4.045076282940361, "grad_norm": 0.3654272643891456, "learning_rate": 1.0705508713333313e-06, "loss": 0.3343, "step": 5833 }, { "epoch": 4.045769764216367, "grad_norm": 0.42851269987260987, "learning_rate": 1.0690544481137527e-06, "loss": 0.3717, "step": 5834 }, { "epoch": 4.0464632454923715, "grad_norm": 0.4321138900128039, "learning_rate": 1.0675589462793406e-06, "loss": 0.3158, "step": 5835 }, { "epoch": 4.047156726768377, "grad_norm": 0.3930406818219885, "learning_rate": 1.0660643661806319e-06, "loss": 0.3426, "step": 5836 }, { "epoch": 4.047850208044383, "grad_norm": 0.44830600167565265, "learning_rate": 1.0645707081679446e-06, "loss": 0.339, "step": 5837 }, { "epoch": 4.048543689320389, "grad_norm": 0.4348642546430056, "learning_rate": 1.063077972591382e-06, "loss": 0.3435, "step": 5838 }, { "epoch": 4.049237170596394, "grad_norm": 0.4027083954062146, "learning_rate": 1.061586159800831e-06, "loss": 0.3635, "step": 5839 }, { "epoch": 4.049930651872399, "grad_norm": 0.41830438917877455, "learning_rate": 1.0600952701459595e-06, "loss": 0.4183, "step": 5840 }, { "epoch": 4.050624133148405, "grad_norm": 0.39023486938742546, "learning_rate": 1.0586053039762228e-06, "loss": 0.3425, "step": 5841 }, { "epoch": 4.051317614424411, "grad_norm": 0.4523300636524953, "learning_rate": 1.0571162616408586e-06, "loss": 0.3569, "step": 5842 }, { "epoch": 4.052011095700416, "grad_norm": 0.45787467512678826, "learning_rate": 1.0556281434888865e-06, "loss": 0.3735, "step": 5843 }, { "epoch": 4.052704576976422, "grad_norm": 0.3753388491079622, "learning_rate": 1.0541409498691109e-06, "loss": 0.3019, "step": 5844 }, { "epoch": 4.053398058252427, "grad_norm": 0.4100187041060469, "learning_rate": 1.0526546811301203e-06, "loss": 0.3652, "step": 5845 }, { "epoch": 4.054091539528433, "grad_norm": 0.3889532168033599, "learning_rate": 1.051169337620282e-06, "loss": 0.3368, "step": 5846 }, { "epoch": 4.054785020804438, "grad_norm": 0.4226088476714647, "learning_rate": 1.0496849196877545e-06, "loss": 0.3572, "step": 5847 }, { "epoch": 4.055478502080444, "grad_norm": 0.42250312136606627, "learning_rate": 1.0482014276804713e-06, "loss": 0.3532, "step": 5848 }, { "epoch": 4.05617198335645, "grad_norm": 0.3852870358893622, "learning_rate": 1.0467188619461532e-06, "loss": 0.3477, "step": 5849 }, { "epoch": 4.056865464632455, "grad_norm": 0.39866227735554816, "learning_rate": 1.0452372228323044e-06, "loss": 0.3189, "step": 5850 }, { "epoch": 4.05755894590846, "grad_norm": 0.3843644290783127, "learning_rate": 1.0437565106862075e-06, "loss": 0.3868, "step": 5851 }, { "epoch": 4.058252427184466, "grad_norm": 0.3615829358056039, "learning_rate": 1.0422767258549317e-06, "loss": 0.323, "step": 5852 }, { "epoch": 4.058945908460472, "grad_norm": 0.4439796019335204, "learning_rate": 1.0407978686853299e-06, "loss": 0.3447, "step": 5853 }, { "epoch": 4.0596393897364775, "grad_norm": 0.3768287003413617, "learning_rate": 1.0393199395240317e-06, "loss": 0.3475, "step": 5854 }, { "epoch": 4.060332871012482, "grad_norm": 0.503640439688754, "learning_rate": 1.037842938717456e-06, "loss": 0.3474, "step": 5855 }, { "epoch": 4.061026352288488, "grad_norm": 0.49567142797013075, "learning_rate": 1.0363668666117992e-06, "loss": 0.3005, "step": 5856 }, { "epoch": 4.061719833564494, "grad_norm": 0.37315626340508873, "learning_rate": 1.0348917235530437e-06, "loss": 0.3179, "step": 5857 }, { "epoch": 4.0624133148405, "grad_norm": 0.4498268830274607, "learning_rate": 1.0334175098869526e-06, "loss": 0.3839, "step": 5858 }, { "epoch": 4.063106796116505, "grad_norm": 0.40218182598713376, "learning_rate": 1.0319442259590683e-06, "loss": 0.3357, "step": 5859 }, { "epoch": 4.06380027739251, "grad_norm": 0.4095507314444169, "learning_rate": 1.03047187211472e-06, "loss": 0.3736, "step": 5860 }, { "epoch": 4.064493758668516, "grad_norm": 0.37354967270115313, "learning_rate": 1.0290004486990169e-06, "loss": 0.3463, "step": 5861 }, { "epoch": 4.065187239944522, "grad_norm": 0.38827261263086965, "learning_rate": 1.0275299560568486e-06, "loss": 0.3524, "step": 5862 }, { "epoch": 4.065880721220527, "grad_norm": 0.39194774468101096, "learning_rate": 1.0260603945328878e-06, "loss": 0.3647, "step": 5863 }, { "epoch": 4.066574202496533, "grad_norm": 0.37407480909251034, "learning_rate": 1.0245917644715908e-06, "loss": 0.3454, "step": 5864 }, { "epoch": 4.067267683772538, "grad_norm": 0.3945946671705303, "learning_rate": 1.023124066217192e-06, "loss": 0.3709, "step": 5865 }, { "epoch": 4.067961165048544, "grad_norm": 0.40054137089556113, "learning_rate": 1.0216573001137125e-06, "loss": 0.3764, "step": 5866 }, { "epoch": 4.068654646324549, "grad_norm": 0.3874714076684071, "learning_rate": 1.0201914665049472e-06, "loss": 0.3226, "step": 5867 }, { "epoch": 4.069348127600555, "grad_norm": 0.4331782843327501, "learning_rate": 1.0187265657344796e-06, "loss": 0.4019, "step": 5868 }, { "epoch": 4.0700416088765605, "grad_norm": 0.4302859450348201, "learning_rate": 1.0172625981456723e-06, "loss": 0.3213, "step": 5869 }, { "epoch": 4.070735090152566, "grad_norm": 0.38244038205125636, "learning_rate": 1.0157995640816665e-06, "loss": 0.3849, "step": 5870 }, { "epoch": 4.071428571428571, "grad_norm": 0.38936163775891824, "learning_rate": 1.0143374638853892e-06, "loss": 0.3651, "step": 5871 }, { "epoch": 4.072122052704577, "grad_norm": 0.40641703382552175, "learning_rate": 1.0128762978995422e-06, "loss": 0.3693, "step": 5872 }, { "epoch": 4.072815533980583, "grad_norm": 0.46322072472640285, "learning_rate": 1.0114160664666156e-06, "loss": 0.348, "step": 5873 }, { "epoch": 4.0735090152565885, "grad_norm": 0.41789040908115627, "learning_rate": 1.0099567699288786e-06, "loss": 0.3719, "step": 5874 }, { "epoch": 4.074202496532593, "grad_norm": 0.4272050712727168, "learning_rate": 1.0084984086283755e-06, "loss": 0.3652, "step": 5875 }, { "epoch": 4.074895977808599, "grad_norm": 0.37398271118769805, "learning_rate": 1.0070409829069394e-06, "loss": 0.3402, "step": 5876 }, { "epoch": 4.075589459084605, "grad_norm": 0.40278854472036707, "learning_rate": 1.005584493106177e-06, "loss": 0.3741, "step": 5877 }, { "epoch": 4.076282940360611, "grad_norm": 0.40458204526559793, "learning_rate": 1.0041289395674802e-06, "loss": 0.332, "step": 5878 }, { "epoch": 4.0769764216366156, "grad_norm": 0.4404472793691462, "learning_rate": 1.0026743226320223e-06, "loss": 0.3343, "step": 5879 }, { "epoch": 4.077669902912621, "grad_norm": 0.45093009069115725, "learning_rate": 1.0012206426407518e-06, "loss": 0.346, "step": 5880 }, { "epoch": 4.078363384188627, "grad_norm": 0.3908810592958925, "learning_rate": 9.99767899934402e-07, "loss": 0.3303, "step": 5881 }, { "epoch": 4.079056865464633, "grad_norm": 0.38634414914175186, "learning_rate": 9.983160948534854e-07, "loss": 0.35, "step": 5882 }, { "epoch": 4.079750346740638, "grad_norm": 0.4354320078855466, "learning_rate": 9.968652277382946e-07, "loss": 0.3374, "step": 5883 }, { "epoch": 4.0804438280166435, "grad_norm": 0.529310444422906, "learning_rate": 9.95415298928904e-07, "loss": 0.3336, "step": 5884 }, { "epoch": 4.081137309292649, "grad_norm": 0.40194326137499664, "learning_rate": 9.93966308765163e-07, "loss": 0.3417, "step": 5885 }, { "epoch": 4.081830790568655, "grad_norm": 0.6448066822501168, "learning_rate": 9.925182575867066e-07, "loss": 0.4051, "step": 5886 }, { "epoch": 4.08252427184466, "grad_norm": 0.9929883512578793, "learning_rate": 9.91071145732948e-07, "loss": 0.3598, "step": 5887 }, { "epoch": 4.083217753120666, "grad_norm": 0.41716032255389357, "learning_rate": 9.896249735430774e-07, "loss": 0.3477, "step": 5888 }, { "epoch": 4.0839112343966715, "grad_norm": 0.384175744172019, "learning_rate": 9.88179741356069e-07, "loss": 0.309, "step": 5889 }, { "epoch": 4.084604715672677, "grad_norm": 0.4018504510083021, "learning_rate": 9.86735449510674e-07, "loss": 0.3624, "step": 5890 }, { "epoch": 4.085298196948682, "grad_norm": 0.41067440319039933, "learning_rate": 9.852920983454239e-07, "loss": 0.3689, "step": 5891 }, { "epoch": 4.085991678224688, "grad_norm": 0.3958140720628448, "learning_rate": 9.83849688198632e-07, "loss": 0.328, "step": 5892 }, { "epoch": 4.086685159500694, "grad_norm": 0.36029107973716085, "learning_rate": 9.82408219408385e-07, "loss": 0.3096, "step": 5893 }, { "epoch": 4.087378640776699, "grad_norm": 0.4065999334232476, "learning_rate": 9.809676923125549e-07, "loss": 0.3268, "step": 5894 }, { "epoch": 4.088072122052704, "grad_norm": 0.41157403666730424, "learning_rate": 9.795281072487917e-07, "loss": 0.3241, "step": 5895 }, { "epoch": 4.08876560332871, "grad_norm": 0.5145356643195996, "learning_rate": 9.780894645545215e-07, "loss": 0.363, "step": 5896 }, { "epoch": 4.089459084604716, "grad_norm": 0.3803748193319084, "learning_rate": 9.76651764566952e-07, "loss": 0.3753, "step": 5897 }, { "epoch": 4.090152565880722, "grad_norm": 0.42946475021254504, "learning_rate": 9.752150076230727e-07, "loss": 0.3482, "step": 5898 }, { "epoch": 4.0908460471567265, "grad_norm": 0.39938763817123146, "learning_rate": 9.737791940596436e-07, "loss": 0.319, "step": 5899 }, { "epoch": 4.091539528432732, "grad_norm": 0.42568017656411583, "learning_rate": 9.723443242132152e-07, "loss": 0.3413, "step": 5900 }, { "epoch": 4.092233009708738, "grad_norm": 0.38800643997655987, "learning_rate": 9.709103984201058e-07, "loss": 0.36, "step": 5901 }, { "epoch": 4.092926490984744, "grad_norm": 0.3792710196069069, "learning_rate": 9.69477417016419e-07, "loss": 0.3158, "step": 5902 }, { "epoch": 4.093619972260749, "grad_norm": 0.4367420226668608, "learning_rate": 9.680453803380368e-07, "loss": 0.3745, "step": 5903 }, { "epoch": 4.0943134535367545, "grad_norm": 0.4136972954177396, "learning_rate": 9.666142887206153e-07, "loss": 0.3122, "step": 5904 }, { "epoch": 4.09500693481276, "grad_norm": 0.4449547954714318, "learning_rate": 9.651841424995933e-07, "loss": 0.3539, "step": 5905 }, { "epoch": 4.095700416088766, "grad_norm": 0.4268763192142163, "learning_rate": 9.637549420101877e-07, "loss": 0.3224, "step": 5906 }, { "epoch": 4.096393897364771, "grad_norm": 0.4298972184021089, "learning_rate": 9.62326687587391e-07, "loss": 0.3361, "step": 5907 }, { "epoch": 4.097087378640777, "grad_norm": 0.3866986165135999, "learning_rate": 9.608993795659766e-07, "loss": 0.3259, "step": 5908 }, { "epoch": 4.097780859916782, "grad_norm": 0.43035224091724156, "learning_rate": 9.59473018280495e-07, "loss": 0.3551, "step": 5909 }, { "epoch": 4.098474341192788, "grad_norm": 0.39659620986529964, "learning_rate": 9.580476040652748e-07, "loss": 0.3592, "step": 5910 }, { "epoch": 4.099167822468793, "grad_norm": 0.3942190192841053, "learning_rate": 9.566231372544244e-07, "loss": 0.3405, "step": 5911 }, { "epoch": 4.099861303744799, "grad_norm": 0.40724862990032207, "learning_rate": 9.551996181818263e-07, "loss": 0.3423, "step": 5912 }, { "epoch": 4.100554785020805, "grad_norm": 0.4696136676485921, "learning_rate": 9.53777047181143e-07, "loss": 0.3506, "step": 5913 }, { "epoch": 4.10124826629681, "grad_norm": 0.4474527847864227, "learning_rate": 9.52355424585818e-07, "loss": 0.3231, "step": 5914 }, { "epoch": 4.101941747572815, "grad_norm": 0.40233172511560633, "learning_rate": 9.50934750729065e-07, "loss": 0.3463, "step": 5915 }, { "epoch": 4.102635228848821, "grad_norm": 0.44204400797241866, "learning_rate": 9.495150259438835e-07, "loss": 0.3997, "step": 5916 }, { "epoch": 4.103328710124827, "grad_norm": 0.3905413372346672, "learning_rate": 9.48096250563042e-07, "loss": 0.3351, "step": 5917 }, { "epoch": 4.104022191400833, "grad_norm": 0.4214916449942419, "learning_rate": 9.466784249190952e-07, "loss": 0.3823, "step": 5918 }, { "epoch": 4.1047156726768375, "grad_norm": 0.4693407755650625, "learning_rate": 9.452615493443718e-07, "loss": 0.4101, "step": 5919 }, { "epoch": 4.105409153952843, "grad_norm": 0.4044415532348893, "learning_rate": 9.438456241709742e-07, "loss": 0.3501, "step": 5920 }, { "epoch": 4.106102635228849, "grad_norm": 0.38990588957861816, "learning_rate": 9.424306497307873e-07, "loss": 0.3529, "step": 5921 }, { "epoch": 4.106796116504855, "grad_norm": 0.3892388756390445, "learning_rate": 9.410166263554687e-07, "loss": 0.3555, "step": 5922 }, { "epoch": 4.10748959778086, "grad_norm": 0.39392931065563647, "learning_rate": 9.396035543764559e-07, "loss": 0.3293, "step": 5923 }, { "epoch": 4.108183079056865, "grad_norm": 0.4369203656868996, "learning_rate": 9.381914341249648e-07, "loss": 0.3242, "step": 5924 }, { "epoch": 4.108876560332871, "grad_norm": 0.3794263858141677, "learning_rate": 9.367802659319835e-07, "loss": 0.3378, "step": 5925 }, { "epoch": 4.109570041608877, "grad_norm": 0.40245901640539045, "learning_rate": 9.353700501282803e-07, "loss": 0.3622, "step": 5926 }, { "epoch": 4.110263522884882, "grad_norm": 0.4093408644622792, "learning_rate": 9.339607870444001e-07, "loss": 0.3772, "step": 5927 }, { "epoch": 4.110957004160888, "grad_norm": 0.4384772975164476, "learning_rate": 9.325524770106637e-07, "loss": 0.3555, "step": 5928 }, { "epoch": 4.111650485436893, "grad_norm": 0.3961629506761391, "learning_rate": 9.311451203571697e-07, "loss": 0.3173, "step": 5929 }, { "epoch": 4.112343966712899, "grad_norm": 0.39463723101452375, "learning_rate": 9.297387174137912e-07, "loss": 0.3583, "step": 5930 }, { "epoch": 4.113037447988904, "grad_norm": 0.4836288365850371, "learning_rate": 9.283332685101782e-07, "loss": 0.3444, "step": 5931 }, { "epoch": 4.11373092926491, "grad_norm": 0.40424304754832785, "learning_rate": 9.269287739757604e-07, "loss": 0.3742, "step": 5932 }, { "epoch": 4.114424410540916, "grad_norm": 0.39021018133113516, "learning_rate": 9.25525234139738e-07, "loss": 0.3604, "step": 5933 }, { "epoch": 4.115117891816921, "grad_norm": 0.4221221170168946, "learning_rate": 9.241226493310917e-07, "loss": 0.3373, "step": 5934 }, { "epoch": 4.115811373092926, "grad_norm": 0.39570986963436744, "learning_rate": 9.22721019878578e-07, "loss": 0.3286, "step": 5935 }, { "epoch": 4.116504854368932, "grad_norm": 0.7735755872938727, "learning_rate": 9.213203461107278e-07, "loss": 0.3301, "step": 5936 }, { "epoch": 4.117198335644938, "grad_norm": 0.5641395627446331, "learning_rate": 9.19920628355851e-07, "loss": 0.3625, "step": 5937 }, { "epoch": 4.1178918169209435, "grad_norm": 0.41126362889400475, "learning_rate": 9.185218669420282e-07, "loss": 0.3114, "step": 5938 }, { "epoch": 4.118585298196948, "grad_norm": 0.44893621491319846, "learning_rate": 9.1712406219712e-07, "loss": 0.3674, "step": 5939 }, { "epoch": 4.119278779472954, "grad_norm": 0.3922788482957072, "learning_rate": 9.157272144487634e-07, "loss": 0.3372, "step": 5940 }, { "epoch": 4.11997226074896, "grad_norm": 0.42533536484553314, "learning_rate": 9.143313240243668e-07, "loss": 0.3729, "step": 5941 }, { "epoch": 4.120665742024966, "grad_norm": 0.39574415186274814, "learning_rate": 9.129363912511185e-07, "loss": 0.3431, "step": 5942 }, { "epoch": 4.121359223300971, "grad_norm": 0.37377146082736573, "learning_rate": 9.11542416455981e-07, "loss": 0.312, "step": 5943 }, { "epoch": 4.122052704576976, "grad_norm": 0.38868937143851934, "learning_rate": 9.101493999656885e-07, "loss": 0.3245, "step": 5944 }, { "epoch": 4.122746185852982, "grad_norm": 0.43829911951592504, "learning_rate": 9.087573421067591e-07, "loss": 0.3338, "step": 5945 }, { "epoch": 4.123439667128988, "grad_norm": 0.406545811090325, "learning_rate": 9.073662432054775e-07, "loss": 0.3589, "step": 5946 }, { "epoch": 4.124133148404993, "grad_norm": 0.42980007882605714, "learning_rate": 9.059761035879083e-07, "loss": 0.3624, "step": 5947 }, { "epoch": 4.124826629680999, "grad_norm": 0.4208110418250585, "learning_rate": 9.04586923579891e-07, "loss": 0.3287, "step": 5948 }, { "epoch": 4.125520110957004, "grad_norm": 0.4490583891032571, "learning_rate": 9.031987035070378e-07, "loss": 0.3536, "step": 5949 }, { "epoch": 4.12621359223301, "grad_norm": 0.40240423539407566, "learning_rate": 9.018114436947373e-07, "loss": 0.3243, "step": 5950 }, { "epoch": 4.126907073509015, "grad_norm": 0.39077472347062414, "learning_rate": 9.004251444681556e-07, "loss": 0.3186, "step": 5951 }, { "epoch": 4.127600554785021, "grad_norm": 0.40872134210578254, "learning_rate": 8.990398061522282e-07, "loss": 0.3755, "step": 5952 }, { "epoch": 4.1282940360610265, "grad_norm": 0.40280782359654577, "learning_rate": 8.976554290716699e-07, "loss": 0.3417, "step": 5953 }, { "epoch": 4.128987517337032, "grad_norm": 0.4519412795671945, "learning_rate": 8.962720135509678e-07, "loss": 0.4052, "step": 5954 }, { "epoch": 4.129680998613037, "grad_norm": 0.4498345766489382, "learning_rate": 8.948895599143859e-07, "loss": 0.3573, "step": 5955 }, { "epoch": 4.130374479889043, "grad_norm": 0.8581724152914949, "learning_rate": 8.935080684859615e-07, "loss": 0.3591, "step": 5956 }, { "epoch": 4.131067961165049, "grad_norm": 0.408925491212508, "learning_rate": 8.921275395895041e-07, "loss": 0.3293, "step": 5957 }, { "epoch": 4.1317614424410545, "grad_norm": 0.4218063585602398, "learning_rate": 8.907479735486002e-07, "loss": 0.3174, "step": 5958 }, { "epoch": 4.132454923717059, "grad_norm": 0.4434643511461139, "learning_rate": 8.893693706866124e-07, "loss": 0.3907, "step": 5959 }, { "epoch": 4.133148404993065, "grad_norm": 0.4006570102175896, "learning_rate": 8.879917313266728e-07, "loss": 0.3735, "step": 5960 }, { "epoch": 4.133841886269071, "grad_norm": 0.40069527693707424, "learning_rate": 8.866150557916914e-07, "loss": 0.3619, "step": 5961 }, { "epoch": 4.134535367545077, "grad_norm": 0.4299383824455946, "learning_rate": 8.852393444043478e-07, "loss": 0.3409, "step": 5962 }, { "epoch": 4.1352288488210815, "grad_norm": 0.3962675644371142, "learning_rate": 8.838645974871029e-07, "loss": 0.3119, "step": 5963 }, { "epoch": 4.135922330097087, "grad_norm": 0.4933003927011079, "learning_rate": 8.824908153621875e-07, "loss": 0.4305, "step": 5964 }, { "epoch": 4.136615811373093, "grad_norm": 0.4183634729188962, "learning_rate": 8.811179983516027e-07, "loss": 0.3732, "step": 5965 }, { "epoch": 4.137309292649099, "grad_norm": 0.40206622991495006, "learning_rate": 8.797461467771301e-07, "loss": 0.3261, "step": 5966 }, { "epoch": 4.138002773925104, "grad_norm": 0.377053128798786, "learning_rate": 8.783752609603197e-07, "loss": 0.338, "step": 5967 }, { "epoch": 4.1386962552011095, "grad_norm": 0.40261347438723244, "learning_rate": 8.770053412224972e-07, "loss": 0.3026, "step": 5968 }, { "epoch": 4.139389736477115, "grad_norm": 0.39967669849322496, "learning_rate": 8.756363878847646e-07, "loss": 0.3854, "step": 5969 }, { "epoch": 4.140083217753121, "grad_norm": 0.43750729509161024, "learning_rate": 8.742684012679908e-07, "loss": 0.3753, "step": 5970 }, { "epoch": 4.140776699029126, "grad_norm": 0.36648013514674793, "learning_rate": 8.729013816928239e-07, "loss": 0.3214, "step": 5971 }, { "epoch": 4.141470180305132, "grad_norm": 0.46374835610045173, "learning_rate": 8.715353294796835e-07, "loss": 0.4039, "step": 5972 }, { "epoch": 4.1421636615811375, "grad_norm": 0.41435647361787886, "learning_rate": 8.70170244948762e-07, "loss": 0.3908, "step": 5973 }, { "epoch": 4.142857142857143, "grad_norm": 0.38278049151288557, "learning_rate": 8.688061284200266e-07, "loss": 0.3379, "step": 5974 }, { "epoch": 4.143550624133148, "grad_norm": 0.4432340706873847, "learning_rate": 8.67442980213214e-07, "loss": 0.3513, "step": 5975 }, { "epoch": 4.144244105409154, "grad_norm": 0.38333512090241434, "learning_rate": 8.660808006478371e-07, "loss": 0.3252, "step": 5976 }, { "epoch": 4.14493758668516, "grad_norm": 0.4037543696388098, "learning_rate": 8.647195900431832e-07, "loss": 0.3137, "step": 5977 }, { "epoch": 4.145631067961165, "grad_norm": 0.44960096169288244, "learning_rate": 8.633593487183067e-07, "loss": 0.4103, "step": 5978 }, { "epoch": 4.14632454923717, "grad_norm": 0.40445276088021065, "learning_rate": 8.6200007699204e-07, "loss": 0.3763, "step": 5979 }, { "epoch": 4.147018030513176, "grad_norm": 1.0045933412764532, "learning_rate": 8.60641775182986e-07, "loss": 0.3595, "step": 5980 }, { "epoch": 4.147711511789182, "grad_norm": 0.7626982371670392, "learning_rate": 8.592844436095216e-07, "loss": 0.3185, "step": 5981 }, { "epoch": 4.148404993065188, "grad_norm": 0.43361266885090044, "learning_rate": 8.579280825897968e-07, "loss": 0.3635, "step": 5982 }, { "epoch": 4.1490984743411925, "grad_norm": 0.4164723941003652, "learning_rate": 8.565726924417295e-07, "loss": 0.3923, "step": 5983 }, { "epoch": 4.149791955617198, "grad_norm": 0.3900446498113785, "learning_rate": 8.55218273483015e-07, "loss": 0.3261, "step": 5984 }, { "epoch": 4.150485436893204, "grad_norm": 0.36950207591264156, "learning_rate": 8.538648260311205e-07, "loss": 0.3114, "step": 5985 }, { "epoch": 4.15117891816921, "grad_norm": 0.4048665697589546, "learning_rate": 8.525123504032817e-07, "loss": 0.3273, "step": 5986 }, { "epoch": 4.151872399445215, "grad_norm": 0.36152353071455035, "learning_rate": 8.511608469165106e-07, "loss": 0.3212, "step": 5987 }, { "epoch": 4.1525658807212205, "grad_norm": 0.44967533876443433, "learning_rate": 8.498103158875909e-07, "loss": 0.3581, "step": 5988 }, { "epoch": 4.153259361997226, "grad_norm": 0.3985874198622114, "learning_rate": 8.484607576330733e-07, "loss": 0.3388, "step": 5989 }, { "epoch": 4.153952843273232, "grad_norm": 0.5023222614355668, "learning_rate": 8.471121724692905e-07, "loss": 0.3592, "step": 5990 }, { "epoch": 4.154646324549237, "grad_norm": 0.4497067355037071, "learning_rate": 8.457645607123361e-07, "loss": 0.3461, "step": 5991 }, { "epoch": 4.155339805825243, "grad_norm": 0.3565151091482395, "learning_rate": 8.444179226780824e-07, "loss": 0.3374, "step": 5992 }, { "epoch": 4.156033287101248, "grad_norm": 0.4077691814134206, "learning_rate": 8.430722586821721e-07, "loss": 0.3254, "step": 5993 }, { "epoch": 4.156726768377254, "grad_norm": 0.43058583104823883, "learning_rate": 8.417275690400178e-07, "loss": 0.3292, "step": 5994 }, { "epoch": 4.157420249653259, "grad_norm": 0.40196706942006355, "learning_rate": 8.403838540668058e-07, "loss": 0.3636, "step": 5995 }, { "epoch": 4.158113730929265, "grad_norm": 0.41010432217836973, "learning_rate": 8.390411140774945e-07, "loss": 0.3366, "step": 5996 }, { "epoch": 4.158807212205271, "grad_norm": 0.3737938462591656, "learning_rate": 8.37699349386809e-07, "loss": 0.3386, "step": 5997 }, { "epoch": 4.159500693481276, "grad_norm": 0.4315241239685576, "learning_rate": 8.363585603092517e-07, "loss": 0.3938, "step": 5998 }, { "epoch": 4.160194174757281, "grad_norm": 0.46110209008123965, "learning_rate": 8.350187471590937e-07, "loss": 0.4185, "step": 5999 }, { "epoch": 4.160887656033287, "grad_norm": 0.39658353146422376, "learning_rate": 8.336799102503762e-07, "loss": 0.376, "step": 6000 }, { "epoch": 4.161581137309293, "grad_norm": 0.39401883680631355, "learning_rate": 8.323420498969159e-07, "loss": 0.3656, "step": 6001 }, { "epoch": 4.162274618585299, "grad_norm": 0.4290292016727245, "learning_rate": 8.310051664122937e-07, "loss": 0.3373, "step": 6002 }, { "epoch": 4.1629680998613035, "grad_norm": 0.4184393192229492, "learning_rate": 8.296692601098688e-07, "loss": 0.3525, "step": 6003 }, { "epoch": 4.163661581137309, "grad_norm": 0.42324902300214917, "learning_rate": 8.283343313027654e-07, "loss": 0.3027, "step": 6004 }, { "epoch": 4.164355062413315, "grad_norm": 0.4434263665341341, "learning_rate": 8.270003803038817e-07, "loss": 0.4057, "step": 6005 }, { "epoch": 4.165048543689321, "grad_norm": 0.3853786799031601, "learning_rate": 8.25667407425888e-07, "loss": 0.3338, "step": 6006 }, { "epoch": 4.165742024965326, "grad_norm": 0.40260227349286065, "learning_rate": 8.243354129812192e-07, "loss": 0.3522, "step": 6007 }, { "epoch": 4.166435506241331, "grad_norm": 0.37600486500593155, "learning_rate": 8.230043972820895e-07, "loss": 0.3298, "step": 6008 }, { "epoch": 4.167128987517337, "grad_norm": 0.4463058140550583, "learning_rate": 8.216743606404793e-07, "loss": 0.3317, "step": 6009 }, { "epoch": 4.167822468793343, "grad_norm": 0.377188534850174, "learning_rate": 8.203453033681368e-07, "loss": 0.3285, "step": 6010 }, { "epoch": 4.168515950069348, "grad_norm": 0.3889852476800571, "learning_rate": 8.190172257765855e-07, "loss": 0.3523, "step": 6011 }, { "epoch": 4.169209431345354, "grad_norm": 0.38463457226080766, "learning_rate": 8.176901281771154e-07, "loss": 0.3563, "step": 6012 }, { "epoch": 4.169902912621359, "grad_norm": 0.40416817629559865, "learning_rate": 8.163640108807897e-07, "loss": 0.3811, "step": 6013 }, { "epoch": 4.170596393897365, "grad_norm": 0.43964283176161695, "learning_rate": 8.150388741984416e-07, "loss": 0.3552, "step": 6014 }, { "epoch": 4.17128987517337, "grad_norm": 0.4314773291786829, "learning_rate": 8.137147184406718e-07, "loss": 0.3339, "step": 6015 }, { "epoch": 4.171983356449376, "grad_norm": 0.3670685364278709, "learning_rate": 8.123915439178531e-07, "loss": 0.3034, "step": 6016 }, { "epoch": 4.172676837725382, "grad_norm": 0.41926676658602996, "learning_rate": 8.11069350940128e-07, "loss": 0.3383, "step": 6017 }, { "epoch": 4.173370319001387, "grad_norm": 0.4872134877953159, "learning_rate": 8.097481398174101e-07, "loss": 0.3494, "step": 6018 }, { "epoch": 4.174063800277392, "grad_norm": 0.39761396661231746, "learning_rate": 8.084279108593818e-07, "loss": 0.3512, "step": 6019 }, { "epoch": 4.174757281553398, "grad_norm": 0.4256801841918406, "learning_rate": 8.071086643754933e-07, "loss": 0.3691, "step": 6020 }, { "epoch": 4.175450762829404, "grad_norm": 0.4234607184847094, "learning_rate": 8.057904006749673e-07, "loss": 0.3333, "step": 6021 }, { "epoch": 4.1761442441054095, "grad_norm": 0.4429490869933367, "learning_rate": 8.044731200667966e-07, "loss": 0.3713, "step": 6022 }, { "epoch": 4.176837725381414, "grad_norm": 0.4475007897306103, "learning_rate": 8.031568228597403e-07, "loss": 0.2945, "step": 6023 }, { "epoch": 4.17753120665742, "grad_norm": 0.4321793958307334, "learning_rate": 8.0184150936233e-07, "loss": 0.415, "step": 6024 }, { "epoch": 4.178224687933426, "grad_norm": 0.5080867078775074, "learning_rate": 8.005271798828646e-07, "loss": 0.3486, "step": 6025 }, { "epoch": 4.178918169209432, "grad_norm": 0.5286780112924887, "learning_rate": 7.992138347294148e-07, "loss": 0.38, "step": 6026 }, { "epoch": 4.179611650485437, "grad_norm": 0.39056671330678505, "learning_rate": 7.979014742098196e-07, "loss": 0.3593, "step": 6027 }, { "epoch": 4.180305131761442, "grad_norm": 0.4141165486544236, "learning_rate": 7.965900986316849e-07, "loss": 0.3288, "step": 6028 }, { "epoch": 4.180998613037448, "grad_norm": 0.4363692200440037, "learning_rate": 7.952797083023883e-07, "loss": 0.3561, "step": 6029 }, { "epoch": 4.181692094313454, "grad_norm": 0.3898890656791092, "learning_rate": 7.939703035290774e-07, "loss": 0.3271, "step": 6030 }, { "epoch": 4.182385575589459, "grad_norm": 0.4176585231952008, "learning_rate": 7.926618846186646e-07, "loss": 0.3816, "step": 6031 }, { "epoch": 4.1830790568654646, "grad_norm": 0.4494198167361945, "learning_rate": 7.913544518778349e-07, "loss": 0.3794, "step": 6032 }, { "epoch": 4.18377253814147, "grad_norm": 0.37593638717670064, "learning_rate": 7.900480056130428e-07, "loss": 0.3497, "step": 6033 }, { "epoch": 4.184466019417476, "grad_norm": 0.5669971707711042, "learning_rate": 7.887425461305059e-07, "loss": 0.3476, "step": 6034 }, { "epoch": 4.185159500693481, "grad_norm": 0.4724069941554161, "learning_rate": 7.874380737362186e-07, "loss": 0.3518, "step": 6035 }, { "epoch": 4.185852981969487, "grad_norm": 0.3758445148826483, "learning_rate": 7.861345887359372e-07, "loss": 0.3323, "step": 6036 }, { "epoch": 4.1865464632454925, "grad_norm": 0.512148301918811, "learning_rate": 7.848320914351903e-07, "loss": 0.3346, "step": 6037 }, { "epoch": 4.187239944521498, "grad_norm": 0.45113931383264844, "learning_rate": 7.835305821392741e-07, "loss": 0.3678, "step": 6038 }, { "epoch": 4.187933425797503, "grad_norm": 0.39604189770432496, "learning_rate": 7.822300611532513e-07, "loss": 0.2974, "step": 6039 }, { "epoch": 4.188626907073509, "grad_norm": 0.4020553967608756, "learning_rate": 7.809305287819557e-07, "loss": 0.3334, "step": 6040 }, { "epoch": 4.189320388349515, "grad_norm": 0.5780842483561379, "learning_rate": 7.7963198532999e-07, "loss": 0.3646, "step": 6041 }, { "epoch": 4.1900138696255205, "grad_norm": 0.36381579064490266, "learning_rate": 7.783344311017183e-07, "loss": 0.3384, "step": 6042 }, { "epoch": 4.190707350901525, "grad_norm": 0.36730056127974264, "learning_rate": 7.770378664012839e-07, "loss": 0.3294, "step": 6043 }, { "epoch": 4.191400832177531, "grad_norm": 0.43717286925053483, "learning_rate": 7.757422915325885e-07, "loss": 0.3287, "step": 6044 }, { "epoch": 4.192094313453537, "grad_norm": 0.40476188782189276, "learning_rate": 7.744477067993061e-07, "loss": 0.367, "step": 6045 }, { "epoch": 4.192787794729543, "grad_norm": 0.45175950463074016, "learning_rate": 7.731541125048798e-07, "loss": 0.3167, "step": 6046 }, { "epoch": 4.1934812760055475, "grad_norm": 0.5092667367776222, "learning_rate": 7.718615089525161e-07, "loss": 0.3613, "step": 6047 }, { "epoch": 4.194174757281553, "grad_norm": 0.8766702068560327, "learning_rate": 7.705698964451941e-07, "loss": 0.3363, "step": 6048 }, { "epoch": 4.194868238557559, "grad_norm": 0.41008619122123174, "learning_rate": 7.692792752856564e-07, "loss": 0.291, "step": 6049 }, { "epoch": 4.195561719833565, "grad_norm": 0.41500164653505817, "learning_rate": 7.679896457764164e-07, "loss": 0.3666, "step": 6050 }, { "epoch": 4.19625520110957, "grad_norm": 0.42725730863798383, "learning_rate": 7.667010082197534e-07, "loss": 0.3482, "step": 6051 }, { "epoch": 4.1969486823855755, "grad_norm": 0.4148846355993886, "learning_rate": 7.654133629177152e-07, "loss": 0.3782, "step": 6052 }, { "epoch": 4.197642163661581, "grad_norm": 0.39087434176662567, "learning_rate": 7.641267101721179e-07, "loss": 0.3146, "step": 6053 }, { "epoch": 4.198335644937587, "grad_norm": 0.4047503038136691, "learning_rate": 7.628410502845401e-07, "loss": 0.3263, "step": 6054 }, { "epoch": 4.199029126213592, "grad_norm": 0.3849298003850758, "learning_rate": 7.615563835563339e-07, "loss": 0.333, "step": 6055 }, { "epoch": 4.199722607489598, "grad_norm": 0.4547638404162615, "learning_rate": 7.602727102886165e-07, "loss": 0.352, "step": 6056 }, { "epoch": 4.2004160887656035, "grad_norm": 0.3783034975932897, "learning_rate": 7.589900307822684e-07, "loss": 0.3541, "step": 6057 }, { "epoch": 4.201109570041609, "grad_norm": 0.39462513301232205, "learning_rate": 7.577083453379425e-07, "loss": 0.3497, "step": 6058 }, { "epoch": 4.201803051317614, "grad_norm": 0.5958447544116373, "learning_rate": 7.564276542560578e-07, "loss": 0.3593, "step": 6059 }, { "epoch": 4.20249653259362, "grad_norm": 0.3887050554245725, "learning_rate": 7.551479578367948e-07, "loss": 0.342, "step": 6060 }, { "epoch": 4.203190013869626, "grad_norm": 0.40702570868356264, "learning_rate": 7.538692563801103e-07, "loss": 0.3351, "step": 6061 }, { "epoch": 4.203883495145631, "grad_norm": 0.3722924768078192, "learning_rate": 7.525915501857189e-07, "loss": 0.313, "step": 6062 }, { "epoch": 4.204576976421636, "grad_norm": 0.40094754439997143, "learning_rate": 7.513148395531073e-07, "loss": 0.3444, "step": 6063 }, { "epoch": 4.205270457697642, "grad_norm": 0.40667576909304676, "learning_rate": 7.50039124781528e-07, "loss": 0.3824, "step": 6064 }, { "epoch": 4.205963938973648, "grad_norm": 0.5936452773907123, "learning_rate": 7.487644061699966e-07, "loss": 0.3355, "step": 6065 }, { "epoch": 4.206657420249654, "grad_norm": 0.46290525806142524, "learning_rate": 7.474906840173001e-07, "loss": 0.3634, "step": 6066 }, { "epoch": 4.2073509015256585, "grad_norm": 0.4565555658930163, "learning_rate": 7.462179586219897e-07, "loss": 0.3311, "step": 6067 }, { "epoch": 4.208044382801664, "grad_norm": 0.46827195103233166, "learning_rate": 7.449462302823818e-07, "loss": 0.3484, "step": 6068 }, { "epoch": 4.20873786407767, "grad_norm": 0.3895267045665284, "learning_rate": 7.436754992965606e-07, "loss": 0.3438, "step": 6069 }, { "epoch": 4.209431345353676, "grad_norm": 0.47370338584444854, "learning_rate": 7.424057659623767e-07, "loss": 0.4025, "step": 6070 }, { "epoch": 4.210124826629681, "grad_norm": 0.39832369121029065, "learning_rate": 7.411370305774468e-07, "loss": 0.3472, "step": 6071 }, { "epoch": 4.2108183079056865, "grad_norm": 0.4029940029859894, "learning_rate": 7.398692934391532e-07, "loss": 0.3454, "step": 6072 }, { "epoch": 4.211511789181692, "grad_norm": 0.4834483036173078, "learning_rate": 7.386025548446435e-07, "loss": 0.3544, "step": 6073 }, { "epoch": 4.212205270457698, "grad_norm": 0.4857192886392201, "learning_rate": 7.373368150908316e-07, "loss": 0.3783, "step": 6074 }, { "epoch": 4.212898751733703, "grad_norm": 0.39459037231814265, "learning_rate": 7.360720744744004e-07, "loss": 0.3306, "step": 6075 }, { "epoch": 4.213592233009709, "grad_norm": 0.3910367689324389, "learning_rate": 7.348083332917927e-07, "loss": 0.3484, "step": 6076 }, { "epoch": 4.214285714285714, "grad_norm": 0.43108697736448265, "learning_rate": 7.33545591839222e-07, "loss": 0.3341, "step": 6077 }, { "epoch": 4.21497919556172, "grad_norm": 0.4354529586504891, "learning_rate": 7.322838504126651e-07, "loss": 0.3752, "step": 6078 }, { "epoch": 4.215672676837725, "grad_norm": 0.4277510314031144, "learning_rate": 7.310231093078657e-07, "loss": 0.3444, "step": 6079 }, { "epoch": 4.216366158113731, "grad_norm": 0.4203538970356623, "learning_rate": 7.297633688203332e-07, "loss": 0.3083, "step": 6080 }, { "epoch": 4.217059639389737, "grad_norm": 0.438697512780434, "learning_rate": 7.28504629245339e-07, "loss": 0.3546, "step": 6081 }, { "epoch": 4.217753120665742, "grad_norm": 0.4618963687939622, "learning_rate": 7.272468908779245e-07, "loss": 0.2975, "step": 6082 }, { "epoch": 4.218446601941747, "grad_norm": 0.393360744750457, "learning_rate": 7.25990154012895e-07, "loss": 0.3558, "step": 6083 }, { "epoch": 4.219140083217753, "grad_norm": 0.40685227710677674, "learning_rate": 7.247344189448186e-07, "loss": 0.3241, "step": 6084 }, { "epoch": 4.219833564493759, "grad_norm": 0.9771687624258758, "learning_rate": 7.23479685968031e-07, "loss": 0.3544, "step": 6085 }, { "epoch": 4.220527045769765, "grad_norm": 0.3881488861077096, "learning_rate": 7.222259553766348e-07, "loss": 0.3293, "step": 6086 }, { "epoch": 4.221220527045769, "grad_norm": 0.38743087425628003, "learning_rate": 7.20973227464491e-07, "loss": 0.3428, "step": 6087 }, { "epoch": 4.221914008321775, "grad_norm": 0.4626890137947432, "learning_rate": 7.197215025252347e-07, "loss": 0.3759, "step": 6088 }, { "epoch": 4.222607489597781, "grad_norm": 0.8928292265606828, "learning_rate": 7.184707808522578e-07, "loss": 0.3556, "step": 6089 }, { "epoch": 4.223300970873787, "grad_norm": 0.37934929811524004, "learning_rate": 7.172210627387216e-07, "loss": 0.3569, "step": 6090 }, { "epoch": 4.223994452149792, "grad_norm": 0.4136645989588592, "learning_rate": 7.159723484775522e-07, "loss": 0.3466, "step": 6091 }, { "epoch": 4.224687933425797, "grad_norm": 0.39614707350283174, "learning_rate": 7.14724638361437e-07, "loss": 0.3327, "step": 6092 }, { "epoch": 4.225381414701803, "grad_norm": 0.4049604409686824, "learning_rate": 7.134779326828317e-07, "loss": 0.3859, "step": 6093 }, { "epoch": 4.226074895977809, "grad_norm": 2.7061576392777082, "learning_rate": 7.122322317339542e-07, "loss": 0.3291, "step": 6094 }, { "epoch": 4.226768377253814, "grad_norm": 0.45460105757616354, "learning_rate": 7.109875358067875e-07, "loss": 0.352, "step": 6095 }, { "epoch": 4.22746185852982, "grad_norm": 0.382801251984125, "learning_rate": 7.0974384519308e-07, "loss": 0.369, "step": 6096 }, { "epoch": 4.228155339805825, "grad_norm": 0.5070626489124913, "learning_rate": 7.085011601843439e-07, "loss": 0.3465, "step": 6097 }, { "epoch": 4.228848821081831, "grad_norm": 0.4075644593585711, "learning_rate": 7.072594810718564e-07, "loss": 0.3506, "step": 6098 }, { "epoch": 4.229542302357836, "grad_norm": 0.4223945750640261, "learning_rate": 7.060188081466556e-07, "loss": 0.3869, "step": 6099 }, { "epoch": 4.230235783633842, "grad_norm": 0.4251773787927419, "learning_rate": 7.04779141699548e-07, "loss": 0.3489, "step": 6100 }, { "epoch": 4.2309292649098476, "grad_norm": 0.40065375915387225, "learning_rate": 7.035404820211034e-07, "loss": 0.3374, "step": 6101 }, { "epoch": 4.231622746185853, "grad_norm": 0.5339284059432036, "learning_rate": 7.02302829401652e-07, "loss": 0.3644, "step": 6102 }, { "epoch": 4.232316227461858, "grad_norm": 0.38640013398988654, "learning_rate": 7.010661841312921e-07, "loss": 0.3145, "step": 6103 }, { "epoch": 4.233009708737864, "grad_norm": 0.42984289939723686, "learning_rate": 6.998305464998856e-07, "loss": 0.3982, "step": 6104 }, { "epoch": 4.23370319001387, "grad_norm": 0.41997515202628516, "learning_rate": 6.98595916797053e-07, "loss": 0.366, "step": 6105 }, { "epoch": 4.2343966712898755, "grad_norm": 0.42410292693418583, "learning_rate": 6.973622953121878e-07, "loss": 0.3858, "step": 6106 }, { "epoch": 4.23509015256588, "grad_norm": 1.6808598567672512, "learning_rate": 6.961296823344388e-07, "loss": 0.3145, "step": 6107 }, { "epoch": 4.235783633841886, "grad_norm": 0.4062479700908595, "learning_rate": 6.948980781527214e-07, "loss": 0.3542, "step": 6108 }, { "epoch": 4.236477115117892, "grad_norm": 0.42045071797037775, "learning_rate": 6.936674830557167e-07, "loss": 0.3669, "step": 6109 }, { "epoch": 4.237170596393898, "grad_norm": 0.4016978652511189, "learning_rate": 6.924378973318651e-07, "loss": 0.3565, "step": 6110 }, { "epoch": 4.237864077669903, "grad_norm": 0.3964139730815441, "learning_rate": 6.912093212693738e-07, "loss": 0.3306, "step": 6111 }, { "epoch": 4.238557558945908, "grad_norm": 0.4616711831972742, "learning_rate": 6.899817551562127e-07, "loss": 0.3354, "step": 6112 }, { "epoch": 4.239251040221914, "grad_norm": 0.4031277226578537, "learning_rate": 6.887551992801123e-07, "loss": 0.3124, "step": 6113 }, { "epoch": 4.23994452149792, "grad_norm": 0.3825425788061083, "learning_rate": 6.875296539285697e-07, "loss": 0.3284, "step": 6114 }, { "epoch": 4.240638002773925, "grad_norm": 0.5139002770828404, "learning_rate": 6.863051193888443e-07, "loss": 0.3795, "step": 6115 }, { "epoch": 4.2413314840499305, "grad_norm": 0.4457720003365879, "learning_rate": 6.850815959479573e-07, "loss": 0.345, "step": 6116 }, { "epoch": 4.242024965325936, "grad_norm": 0.41310283107303697, "learning_rate": 6.838590838926951e-07, "loss": 0.3024, "step": 6117 }, { "epoch": 4.242718446601942, "grad_norm": 0.4375598526906385, "learning_rate": 6.826375835096038e-07, "loss": 0.3356, "step": 6118 }, { "epoch": 4.243411927877947, "grad_norm": 0.5014727873065993, "learning_rate": 6.814170950849952e-07, "loss": 0.3702, "step": 6119 }, { "epoch": 4.244105409153953, "grad_norm": 0.46374848345406067, "learning_rate": 6.801976189049436e-07, "loss": 0.3669, "step": 6120 }, { "epoch": 4.2447988904299585, "grad_norm": 0.3987952691796923, "learning_rate": 6.789791552552838e-07, "loss": 0.3382, "step": 6121 }, { "epoch": 4.245492371705964, "grad_norm": 0.4623724769354124, "learning_rate": 6.777617044216161e-07, "loss": 0.3275, "step": 6122 }, { "epoch": 4.246185852981969, "grad_norm": 0.47179020358321583, "learning_rate": 6.765452666893013e-07, "loss": 0.3714, "step": 6123 }, { "epoch": 4.246879334257975, "grad_norm": 0.3920320920597318, "learning_rate": 6.75329842343464e-07, "loss": 0.3267, "step": 6124 }, { "epoch": 4.247572815533981, "grad_norm": 0.4637309647230182, "learning_rate": 6.741154316689918e-07, "loss": 0.2894, "step": 6125 }, { "epoch": 4.2482662968099865, "grad_norm": 0.38636002550708953, "learning_rate": 6.729020349505322e-07, "loss": 0.339, "step": 6126 }, { "epoch": 4.248959778085991, "grad_norm": 0.40719786163839855, "learning_rate": 6.716896524724975e-07, "loss": 0.3415, "step": 6127 }, { "epoch": 4.249653259361997, "grad_norm": 0.4352294958256507, "learning_rate": 6.704782845190622e-07, "loss": 0.3587, "step": 6128 }, { "epoch": 4.250346740638003, "grad_norm": 0.4077453097774442, "learning_rate": 6.692679313741596e-07, "loss": 0.3563, "step": 6129 }, { "epoch": 4.251040221914009, "grad_norm": 0.4146069695584132, "learning_rate": 6.680585933214895e-07, "loss": 0.3169, "step": 6130 }, { "epoch": 4.2517337031900135, "grad_norm": 0.4942546675884171, "learning_rate": 6.668502706445129e-07, "loss": 0.3472, "step": 6131 }, { "epoch": 4.252427184466019, "grad_norm": 0.47257267893951527, "learning_rate": 6.656429636264483e-07, "loss": 0.3582, "step": 6132 }, { "epoch": 4.253120665742025, "grad_norm": 0.4030679496912566, "learning_rate": 6.644366725502844e-07, "loss": 0.3536, "step": 6133 }, { "epoch": 4.253814147018031, "grad_norm": 0.420845710532777, "learning_rate": 6.632313976987637e-07, "loss": 0.3116, "step": 6134 }, { "epoch": 4.254507628294036, "grad_norm": 0.4339162212423455, "learning_rate": 6.620271393543954e-07, "loss": 0.3534, "step": 6135 }, { "epoch": 4.2552011095700415, "grad_norm": 0.43094117989243275, "learning_rate": 6.608238977994491e-07, "loss": 0.3845, "step": 6136 }, { "epoch": 4.255894590846047, "grad_norm": 0.4114484366764265, "learning_rate": 6.596216733159544e-07, "loss": 0.3377, "step": 6137 }, { "epoch": 4.256588072122053, "grad_norm": 0.42784565493387683, "learning_rate": 6.584204661857063e-07, "loss": 0.3363, "step": 6138 }, { "epoch": 4.257281553398058, "grad_norm": 0.4453671767248613, "learning_rate": 6.572202766902569e-07, "loss": 0.3485, "step": 6139 }, { "epoch": 4.257975034674064, "grad_norm": 0.6829864492937774, "learning_rate": 6.560211051109222e-07, "loss": 0.3758, "step": 6140 }, { "epoch": 4.2586685159500695, "grad_norm": 0.41882460832398527, "learning_rate": 6.548229517287802e-07, "loss": 0.3665, "step": 6141 }, { "epoch": 4.259361997226075, "grad_norm": 0.4046853028028085, "learning_rate": 6.53625816824669e-07, "loss": 0.3371, "step": 6142 }, { "epoch": 4.26005547850208, "grad_norm": 0.4067265391069463, "learning_rate": 6.524297006791891e-07, "loss": 0.3405, "step": 6143 }, { "epoch": 4.260748959778086, "grad_norm": 0.43429700655270675, "learning_rate": 6.512346035727002e-07, "loss": 0.3675, "step": 6144 }, { "epoch": 4.261442441054092, "grad_norm": 0.4036417739625904, "learning_rate": 6.500405257853249e-07, "loss": 0.3486, "step": 6145 }, { "epoch": 4.262135922330097, "grad_norm": 0.42098798131586823, "learning_rate": 6.488474675969475e-07, "loss": 0.3478, "step": 6146 }, { "epoch": 4.262829403606102, "grad_norm": 0.3916550706558104, "learning_rate": 6.476554292872101e-07, "loss": 0.3865, "step": 6147 }, { "epoch": 4.263522884882108, "grad_norm": 0.3901404966600708, "learning_rate": 6.46464411135519e-07, "loss": 0.3003, "step": 6148 }, { "epoch": 4.264216366158114, "grad_norm": 0.44135706046747253, "learning_rate": 6.452744134210409e-07, "loss": 0.3262, "step": 6149 }, { "epoch": 4.26490984743412, "grad_norm": 0.393616806805249, "learning_rate": 6.440854364227e-07, "loss": 0.3376, "step": 6150 }, { "epoch": 4.2656033287101245, "grad_norm": 0.49525311947866135, "learning_rate": 6.428974804191879e-07, "loss": 0.3261, "step": 6151 }, { "epoch": 4.26629680998613, "grad_norm": 0.47144963602483675, "learning_rate": 6.4171054568895e-07, "loss": 0.3503, "step": 6152 }, { "epoch": 4.266990291262136, "grad_norm": 0.6374627627921992, "learning_rate": 6.405246325101955e-07, "loss": 0.3802, "step": 6153 }, { "epoch": 4.267683772538142, "grad_norm": 0.383849705197869, "learning_rate": 6.393397411608954e-07, "loss": 0.3505, "step": 6154 }, { "epoch": 4.268377253814147, "grad_norm": 0.5479555072255522, "learning_rate": 6.38155871918778e-07, "loss": 0.3897, "step": 6155 }, { "epoch": 4.2690707350901524, "grad_norm": 0.4261171194009753, "learning_rate": 6.369730250613337e-07, "loss": 0.3601, "step": 6156 }, { "epoch": 4.269764216366158, "grad_norm": 0.4239048841822734, "learning_rate": 6.357912008658151e-07, "loss": 0.3419, "step": 6157 }, { "epoch": 4.270457697642164, "grad_norm": 0.4657155620165313, "learning_rate": 6.346103996092313e-07, "loss": 0.3165, "step": 6158 }, { "epoch": 4.271151178918169, "grad_norm": 0.5491868563638476, "learning_rate": 6.334306215683533e-07, "loss": 0.3789, "step": 6159 }, { "epoch": 4.271844660194175, "grad_norm": 0.4319250838472781, "learning_rate": 6.322518670197142e-07, "loss": 0.392, "step": 6160 }, { "epoch": 4.27253814147018, "grad_norm": 0.4328432241317883, "learning_rate": 6.310741362396044e-07, "loss": 0.319, "step": 6161 }, { "epoch": 4.273231622746186, "grad_norm": 0.3736659692890957, "learning_rate": 6.298974295040771e-07, "loss": 0.3671, "step": 6162 }, { "epoch": 4.273925104022191, "grad_norm": 0.40190227149262936, "learning_rate": 6.287217470889412e-07, "loss": 0.3638, "step": 6163 }, { "epoch": 4.274618585298197, "grad_norm": 0.713328387990018, "learning_rate": 6.275470892697699e-07, "loss": 0.3158, "step": 6164 }, { "epoch": 4.275312066574203, "grad_norm": 0.46495555001833455, "learning_rate": 6.263734563218949e-07, "loss": 0.3586, "step": 6165 }, { "epoch": 4.276005547850208, "grad_norm": 0.43175522940112815, "learning_rate": 6.252008485204053e-07, "loss": 0.386, "step": 6166 }, { "epoch": 4.276699029126213, "grad_norm": 1.1876253593297061, "learning_rate": 6.240292661401531e-07, "loss": 0.3104, "step": 6167 }, { "epoch": 4.277392510402219, "grad_norm": 0.37604448193583445, "learning_rate": 6.228587094557487e-07, "loss": 0.328, "step": 6168 }, { "epoch": 4.278085991678225, "grad_norm": 0.4312877507264945, "learning_rate": 6.216891787415618e-07, "loss": 0.3199, "step": 6169 }, { "epoch": 4.278779472954231, "grad_norm": 0.46327010950639513, "learning_rate": 6.205206742717235e-07, "loss": 0.3204, "step": 6170 }, { "epoch": 4.279472954230235, "grad_norm": 0.4300735295759126, "learning_rate": 6.193531963201204e-07, "loss": 0.3708, "step": 6171 }, { "epoch": 4.280166435506241, "grad_norm": 0.406823148007132, "learning_rate": 6.181867451604017e-07, "loss": 0.4043, "step": 6172 }, { "epoch": 4.280859916782247, "grad_norm": 0.5703469794329353, "learning_rate": 6.17021321065977e-07, "loss": 0.3539, "step": 6173 }, { "epoch": 4.281553398058253, "grad_norm": 0.4194635742416221, "learning_rate": 6.158569243100098e-07, "loss": 0.345, "step": 6174 }, { "epoch": 4.282246879334258, "grad_norm": 0.39675649447701894, "learning_rate": 6.146935551654298e-07, "loss": 0.3916, "step": 6175 }, { "epoch": 4.282940360610263, "grad_norm": 0.3940883541329325, "learning_rate": 6.135312139049194e-07, "loss": 0.3705, "step": 6176 }, { "epoch": 4.283633841886269, "grad_norm": 0.3779188880551679, "learning_rate": 6.123699008009226e-07, "loss": 0.3421, "step": 6177 }, { "epoch": 4.284327323162275, "grad_norm": 0.4424265070556396, "learning_rate": 6.11209616125647e-07, "loss": 0.3658, "step": 6178 }, { "epoch": 4.28502080443828, "grad_norm": 0.4250513685627804, "learning_rate": 6.10050360151051e-07, "loss": 0.3291, "step": 6179 }, { "epoch": 4.285714285714286, "grad_norm": 0.45555341494865514, "learning_rate": 6.088921331488568e-07, "loss": 0.359, "step": 6180 }, { "epoch": 4.286407766990291, "grad_norm": 0.43125831610772375, "learning_rate": 6.077349353905465e-07, "loss": 0.3107, "step": 6181 }, { "epoch": 4.287101248266297, "grad_norm": 0.4039363152679757, "learning_rate": 6.065787671473556e-07, "loss": 0.3776, "step": 6182 }, { "epoch": 4.287794729542302, "grad_norm": 0.38540240716953844, "learning_rate": 6.054236286902837e-07, "loss": 0.3467, "step": 6183 }, { "epoch": 4.288488210818308, "grad_norm": 0.4579479097403802, "learning_rate": 6.042695202900855e-07, "loss": 0.3558, "step": 6184 }, { "epoch": 4.2891816920943135, "grad_norm": 0.423682803441983, "learning_rate": 6.031164422172764e-07, "loss": 0.3732, "step": 6185 }, { "epoch": 4.289875173370319, "grad_norm": 0.3877837283274081, "learning_rate": 6.019643947421294e-07, "loss": 0.327, "step": 6186 }, { "epoch": 4.290568654646324, "grad_norm": 0.4520411830361061, "learning_rate": 6.008133781346764e-07, "loss": 0.3852, "step": 6187 }, { "epoch": 4.29126213592233, "grad_norm": 0.5937281099550518, "learning_rate": 5.996633926647083e-07, "loss": 0.3452, "step": 6188 }, { "epoch": 4.291955617198336, "grad_norm": 0.417706808891538, "learning_rate": 5.985144386017711e-07, "loss": 0.3901, "step": 6189 }, { "epoch": 4.2926490984743415, "grad_norm": 0.41980789291666265, "learning_rate": 5.973665162151721e-07, "loss": 0.3793, "step": 6190 }, { "epoch": 4.293342579750346, "grad_norm": 0.4472645091665417, "learning_rate": 5.962196257739778e-07, "loss": 0.3072, "step": 6191 }, { "epoch": 4.294036061026352, "grad_norm": 0.6167083905787968, "learning_rate": 5.950737675470081e-07, "loss": 0.3215, "step": 6192 }, { "epoch": 4.294729542302358, "grad_norm": 0.42346397021702487, "learning_rate": 5.939289418028455e-07, "loss": 0.3411, "step": 6193 }, { "epoch": 4.295423023578364, "grad_norm": 0.4414484901294475, "learning_rate": 5.927851488098297e-07, "loss": 0.3803, "step": 6194 }, { "epoch": 4.296116504854369, "grad_norm": 0.5654918787529339, "learning_rate": 5.916423888360546e-07, "loss": 0.3475, "step": 6195 }, { "epoch": 4.296809986130374, "grad_norm": 0.40382204330186966, "learning_rate": 5.905006621493787e-07, "loss": 0.345, "step": 6196 }, { "epoch": 4.29750346740638, "grad_norm": 0.6680270898026206, "learning_rate": 5.893599690174113e-07, "loss": 0.3631, "step": 6197 }, { "epoch": 4.298196948682386, "grad_norm": 0.4477827265195121, "learning_rate": 5.88220309707524e-07, "loss": 0.37, "step": 6198 }, { "epoch": 4.298890429958391, "grad_norm": 0.9396659341260173, "learning_rate": 5.870816844868454e-07, "loss": 0.3488, "step": 6199 }, { "epoch": 4.2995839112343965, "grad_norm": 0.42597375744288096, "learning_rate": 5.859440936222588e-07, "loss": 0.3244, "step": 6200 }, { "epoch": 4.300277392510402, "grad_norm": 0.40344659290859214, "learning_rate": 5.848075373804091e-07, "loss": 0.3465, "step": 6201 }, { "epoch": 4.300970873786408, "grad_norm": 0.38904474351836377, "learning_rate": 5.836720160276971e-07, "loss": 0.3514, "step": 6202 }, { "epoch": 4.301664355062413, "grad_norm": 0.4250164064598548, "learning_rate": 5.825375298302788e-07, "loss": 0.3493, "step": 6203 }, { "epoch": 4.302357836338419, "grad_norm": 0.3879367816339768, "learning_rate": 5.814040790540709e-07, "loss": 0.3584, "step": 6204 }, { "epoch": 4.3030513176144245, "grad_norm": 0.4094681652813443, "learning_rate": 5.80271663964746e-07, "loss": 0.3432, "step": 6205 }, { "epoch": 4.30374479889043, "grad_norm": 0.4356177516472818, "learning_rate": 5.791402848277338e-07, "loss": 0.3526, "step": 6206 }, { "epoch": 4.304438280166435, "grad_norm": 0.42138383489532405, "learning_rate": 5.780099419082225e-07, "loss": 0.371, "step": 6207 }, { "epoch": 4.305131761442441, "grad_norm": 0.41835172082853456, "learning_rate": 5.768806354711542e-07, "loss": 0.3453, "step": 6208 }, { "epoch": 4.305825242718447, "grad_norm": 0.41561602078900267, "learning_rate": 5.757523657812314e-07, "loss": 0.374, "step": 6209 }, { "epoch": 4.3065187239944525, "grad_norm": 0.45871631927087475, "learning_rate": 5.746251331029129e-07, "loss": 0.4013, "step": 6210 }, { "epoch": 4.307212205270457, "grad_norm": 0.3994641431752328, "learning_rate": 5.734989377004119e-07, "loss": 0.3125, "step": 6211 }, { "epoch": 4.307905686546463, "grad_norm": 0.40984984446095474, "learning_rate": 5.723737798377021e-07, "loss": 0.3997, "step": 6212 }, { "epoch": 4.308599167822469, "grad_norm": 0.41280406942887543, "learning_rate": 5.712496597785122e-07, "loss": 0.3075, "step": 6213 }, { "epoch": 4.309292649098475, "grad_norm": 0.37925251180171826, "learning_rate": 5.701265777863268e-07, "loss": 0.3312, "step": 6214 }, { "epoch": 4.3099861303744795, "grad_norm": 0.38193550940595977, "learning_rate": 5.690045341243905e-07, "loss": 0.3328, "step": 6215 }, { "epoch": 4.310679611650485, "grad_norm": 0.397144039490869, "learning_rate": 5.678835290556995e-07, "loss": 0.335, "step": 6216 }, { "epoch": 4.311373092926491, "grad_norm": 0.37757178720838747, "learning_rate": 5.667635628430102e-07, "loss": 0.3237, "step": 6217 }, { "epoch": 4.312066574202497, "grad_norm": 0.4136908646046677, "learning_rate": 5.65644635748836e-07, "loss": 0.3471, "step": 6218 }, { "epoch": 4.312760055478502, "grad_norm": 0.3860292890394113, "learning_rate": 5.645267480354427e-07, "loss": 0.3291, "step": 6219 }, { "epoch": 4.3134535367545075, "grad_norm": 0.3939865909723036, "learning_rate": 5.63409899964858e-07, "loss": 0.361, "step": 6220 }, { "epoch": 4.314147018030513, "grad_norm": 0.3810758534510466, "learning_rate": 5.62294091798859e-07, "loss": 0.3643, "step": 6221 }, { "epoch": 4.314840499306519, "grad_norm": 0.42709070042461467, "learning_rate": 5.611793237989866e-07, "loss": 0.362, "step": 6222 }, { "epoch": 4.315533980582524, "grad_norm": 0.42060904025699275, "learning_rate": 5.600655962265345e-07, "loss": 0.3858, "step": 6223 }, { "epoch": 4.31622746185853, "grad_norm": 0.37044351820730226, "learning_rate": 5.589529093425495e-07, "loss": 0.2944, "step": 6224 }, { "epoch": 4.3169209431345354, "grad_norm": 0.4018705583531944, "learning_rate": 5.578412634078406e-07, "loss": 0.3219, "step": 6225 }, { "epoch": 4.317614424410541, "grad_norm": 0.4552219393699127, "learning_rate": 5.567306586829668e-07, "loss": 0.3141, "step": 6226 }, { "epoch": 4.318307905686546, "grad_norm": 0.43884630038911576, "learning_rate": 5.556210954282465e-07, "loss": 0.3614, "step": 6227 }, { "epoch": 4.319001386962552, "grad_norm": 0.41863593852180997, "learning_rate": 5.545125739037555e-07, "loss": 0.4291, "step": 6228 }, { "epoch": 4.319694868238558, "grad_norm": 0.7679789964136878, "learning_rate": 5.534050943693197e-07, "loss": 0.3392, "step": 6229 }, { "epoch": 4.320388349514563, "grad_norm": 0.38067444619224, "learning_rate": 5.522986570845257e-07, "loss": 0.3032, "step": 6230 }, { "epoch": 4.321081830790568, "grad_norm": 0.4142321804738146, "learning_rate": 5.511932623087163e-07, "loss": 0.3256, "step": 6231 }, { "epoch": 4.321775312066574, "grad_norm": 0.38731523768753373, "learning_rate": 5.500889103009855e-07, "loss": 0.3356, "step": 6232 }, { "epoch": 4.32246879334258, "grad_norm": 0.4440993313107543, "learning_rate": 5.489856013201872e-07, "loss": 0.4083, "step": 6233 }, { "epoch": 4.323162274618586, "grad_norm": 0.40240024547632747, "learning_rate": 5.478833356249274e-07, "loss": 0.3223, "step": 6234 }, { "epoch": 4.3238557558945905, "grad_norm": 0.39262817711177267, "learning_rate": 5.467821134735701e-07, "loss": 0.3742, "step": 6235 }, { "epoch": 4.324549237170596, "grad_norm": 0.408222550882554, "learning_rate": 5.456819351242349e-07, "loss": 0.3413, "step": 6236 }, { "epoch": 4.325242718446602, "grad_norm": 0.456644835750475, "learning_rate": 5.445828008347925e-07, "loss": 0.3547, "step": 6237 }, { "epoch": 4.325936199722608, "grad_norm": 0.9994355490037153, "learning_rate": 5.434847108628749e-07, "loss": 0.3583, "step": 6238 }, { "epoch": 4.326629680998613, "grad_norm": 0.4090478902096986, "learning_rate": 5.42387665465865e-07, "loss": 0.3516, "step": 6239 }, { "epoch": 4.327323162274618, "grad_norm": 0.3891216014127711, "learning_rate": 5.412916649009026e-07, "loss": 0.3685, "step": 6240 }, { "epoch": 4.328016643550624, "grad_norm": 0.4146122313921773, "learning_rate": 5.401967094248839e-07, "loss": 0.3381, "step": 6241 }, { "epoch": 4.32871012482663, "grad_norm": 0.4328714296396735, "learning_rate": 5.391027992944559e-07, "loss": 0.3898, "step": 6242 }, { "epoch": 4.329403606102635, "grad_norm": 0.428828804359781, "learning_rate": 5.380099347660245e-07, "loss": 0.3412, "step": 6243 }, { "epoch": 4.330097087378641, "grad_norm": 0.4038367750406883, "learning_rate": 5.369181160957498e-07, "loss": 0.3394, "step": 6244 }, { "epoch": 4.330790568654646, "grad_norm": 0.4331393800229408, "learning_rate": 5.358273435395451e-07, "loss": 0.3666, "step": 6245 }, { "epoch": 4.331484049930652, "grad_norm": 0.41032139653484206, "learning_rate": 5.3473761735308e-07, "loss": 0.3314, "step": 6246 }, { "epoch": 4.332177531206657, "grad_norm": 0.4241927325262719, "learning_rate": 5.336489377917786e-07, "loss": 0.3606, "step": 6247 }, { "epoch": 4.332871012482663, "grad_norm": 0.42256931019101407, "learning_rate": 5.325613051108181e-07, "loss": 0.3651, "step": 6248 }, { "epoch": 4.333564493758669, "grad_norm": 0.4423421216341831, "learning_rate": 5.314747195651349e-07, "loss": 0.3193, "step": 6249 }, { "epoch": 4.334257975034674, "grad_norm": 0.7089264750675331, "learning_rate": 5.303891814094137e-07, "loss": 0.3419, "step": 6250 }, { "epoch": 4.334951456310679, "grad_norm": 0.3979645535749209, "learning_rate": 5.293046908980982e-07, "loss": 0.3716, "step": 6251 }, { "epoch": 4.335644937586685, "grad_norm": 0.4140966129946514, "learning_rate": 5.282212482853855e-07, "loss": 0.349, "step": 6252 }, { "epoch": 4.336338418862691, "grad_norm": 0.4322214711878763, "learning_rate": 5.271388538252254e-07, "loss": 0.3773, "step": 6253 }, { "epoch": 4.3370319001386965, "grad_norm": 0.46375529184788306, "learning_rate": 5.260575077713237e-07, "loss": 0.3685, "step": 6254 }, { "epoch": 4.337725381414701, "grad_norm": 0.5093682290616093, "learning_rate": 5.249772103771411e-07, "loss": 0.3539, "step": 6255 }, { "epoch": 4.338418862690707, "grad_norm": 0.4151991185971279, "learning_rate": 5.2389796189589e-07, "loss": 0.3256, "step": 6256 }, { "epoch": 4.339112343966713, "grad_norm": 0.4876112463290477, "learning_rate": 5.228197625805392e-07, "loss": 0.3727, "step": 6257 }, { "epoch": 4.339805825242719, "grad_norm": 0.44803030415871914, "learning_rate": 5.21742612683811e-07, "loss": 0.3829, "step": 6258 }, { "epoch": 4.340499306518724, "grad_norm": 0.4020862925962174, "learning_rate": 5.206665124581811e-07, "loss": 0.3804, "step": 6259 }, { "epoch": 4.341192787794729, "grad_norm": 0.4247183562690849, "learning_rate": 5.195914621558812e-07, "loss": 0.3645, "step": 6260 }, { "epoch": 4.341886269070735, "grad_norm": 0.45851218777202424, "learning_rate": 5.185174620288924e-07, "loss": 0.3938, "step": 6261 }, { "epoch": 4.342579750346741, "grad_norm": 0.43178059180305894, "learning_rate": 5.174445123289546e-07, "loss": 0.3288, "step": 6262 }, { "epoch": 4.343273231622746, "grad_norm": 0.3941526023398659, "learning_rate": 5.163726133075597e-07, "loss": 0.3337, "step": 6263 }, { "epoch": 4.343966712898752, "grad_norm": 1.1298319852933496, "learning_rate": 5.153017652159509e-07, "loss": 0.3462, "step": 6264 }, { "epoch": 4.344660194174757, "grad_norm": 0.3863566658702163, "learning_rate": 5.1423196830513e-07, "loss": 0.3548, "step": 6265 }, { "epoch": 4.345353675450763, "grad_norm": 0.389988149443278, "learning_rate": 5.131632228258459e-07, "loss": 0.3556, "step": 6266 }, { "epoch": 4.346047156726768, "grad_norm": 0.4067820454890897, "learning_rate": 5.120955290286089e-07, "loss": 0.362, "step": 6267 }, { "epoch": 4.346740638002774, "grad_norm": 0.4314836365529451, "learning_rate": 5.110288871636776e-07, "loss": 0.3869, "step": 6268 }, { "epoch": 4.3474341192787795, "grad_norm": 0.4523265486060939, "learning_rate": 5.099632974810631e-07, "loss": 0.3537, "step": 6269 }, { "epoch": 4.348127600554785, "grad_norm": 0.4452057505688693, "learning_rate": 5.088987602305351e-07, "loss": 0.3421, "step": 6270 }, { "epoch": 4.34882108183079, "grad_norm": 0.41812913581316336, "learning_rate": 5.0783527566161e-07, "loss": 0.3743, "step": 6271 }, { "epoch": 4.349514563106796, "grad_norm": 0.36653016088480334, "learning_rate": 5.067728440235626e-07, "loss": 0.3179, "step": 6272 }, { "epoch": 4.350208044382802, "grad_norm": 0.4147025993998611, "learning_rate": 5.0571146556542e-07, "loss": 0.3377, "step": 6273 }, { "epoch": 4.3509015256588075, "grad_norm": 0.4269001973458045, "learning_rate": 5.0465114053596e-07, "loss": 0.35, "step": 6274 }, { "epoch": 4.351595006934812, "grad_norm": 0.39427856589738997, "learning_rate": 5.035918691837155e-07, "loss": 0.376, "step": 6275 }, { "epoch": 4.352288488210818, "grad_norm": 0.3995736149075299, "learning_rate": 5.025336517569723e-07, "loss": 0.3358, "step": 6276 }, { "epoch": 4.352981969486824, "grad_norm": 0.45975735789614397, "learning_rate": 5.014764885037693e-07, "loss": 0.3249, "step": 6277 }, { "epoch": 4.35367545076283, "grad_norm": 0.3900761533925085, "learning_rate": 5.004203796718987e-07, "loss": 0.3369, "step": 6278 }, { "epoch": 4.354368932038835, "grad_norm": 0.41847197456481755, "learning_rate": 4.993653255089021e-07, "loss": 0.328, "step": 6279 }, { "epoch": 4.35506241331484, "grad_norm": 0.46397746855439015, "learning_rate": 4.983113262620781e-07, "loss": 0.3395, "step": 6280 }, { "epoch": 4.355755894590846, "grad_norm": 0.4085685443900029, "learning_rate": 4.972583821784777e-07, "loss": 0.3839, "step": 6281 }, { "epoch": 4.356449375866852, "grad_norm": 0.4094690357682738, "learning_rate": 4.962064935049016e-07, "loss": 0.3914, "step": 6282 }, { "epoch": 4.357142857142857, "grad_norm": 0.4166281529519862, "learning_rate": 4.951556604879049e-07, "loss": 0.3212, "step": 6283 }, { "epoch": 4.3578363384188625, "grad_norm": 0.4490450622483123, "learning_rate": 4.941058833737956e-07, "loss": 0.3723, "step": 6284 }, { "epoch": 4.358529819694868, "grad_norm": 0.41668814494773443, "learning_rate": 4.930571624086339e-07, "loss": 0.3753, "step": 6285 }, { "epoch": 4.359223300970874, "grad_norm": 0.3950256038203281, "learning_rate": 4.920094978382339e-07, "loss": 0.3274, "step": 6286 }, { "epoch": 4.359916782246879, "grad_norm": 0.3805846131438291, "learning_rate": 4.909628899081581e-07, "loss": 0.3214, "step": 6287 }, { "epoch": 4.360610263522885, "grad_norm": 0.38900389826563797, "learning_rate": 4.899173388637252e-07, "loss": 0.3505, "step": 6288 }, { "epoch": 4.3613037447988905, "grad_norm": 0.39999103900945704, "learning_rate": 4.888728449500052e-07, "loss": 0.3641, "step": 6289 }, { "epoch": 4.361997226074896, "grad_norm": 0.40785582825012384, "learning_rate": 4.878294084118185e-07, "loss": 0.3523, "step": 6290 }, { "epoch": 4.362690707350901, "grad_norm": 0.428957856667886, "learning_rate": 4.867870294937393e-07, "loss": 0.362, "step": 6291 }, { "epoch": 4.363384188626907, "grad_norm": 0.39429097731269275, "learning_rate": 4.857457084400957e-07, "loss": 0.3558, "step": 6292 }, { "epoch": 4.364077669902913, "grad_norm": 0.43027300052080414, "learning_rate": 4.847054454949617e-07, "loss": 0.3165, "step": 6293 }, { "epoch": 4.3647711511789185, "grad_norm": 0.4001717709838788, "learning_rate": 4.836662409021725e-07, "loss": 0.3794, "step": 6294 }, { "epoch": 4.365464632454923, "grad_norm": 0.38825690155737724, "learning_rate": 4.826280949053064e-07, "loss": 0.3565, "step": 6295 }, { "epoch": 4.366158113730929, "grad_norm": 0.41986818401117687, "learning_rate": 4.815910077476987e-07, "loss": 0.403, "step": 6296 }, { "epoch": 4.366851595006935, "grad_norm": 0.40056475962480215, "learning_rate": 4.805549796724357e-07, "loss": 0.338, "step": 6297 }, { "epoch": 4.367545076282941, "grad_norm": 0.41571163317843407, "learning_rate": 4.79520010922353e-07, "loss": 0.3623, "step": 6298 }, { "epoch": 4.3682385575589455, "grad_norm": 0.3724373986559259, "learning_rate": 4.784861017400411e-07, "loss": 0.3194, "step": 6299 }, { "epoch": 4.368932038834951, "grad_norm": 0.5453313923600107, "learning_rate": 4.774532523678415e-07, "loss": 0.3037, "step": 6300 }, { "epoch": 4.369625520110957, "grad_norm": 0.43137188826371686, "learning_rate": 4.7642146304784475e-07, "loss": 0.3859, "step": 6301 }, { "epoch": 4.370319001386963, "grad_norm": 0.38940894932145365, "learning_rate": 4.7539073402189605e-07, "loss": 0.3519, "step": 6302 }, { "epoch": 4.371012482662968, "grad_norm": 0.390519751931627, "learning_rate": 4.7436106553159e-07, "loss": 0.3633, "step": 6303 }, { "epoch": 4.3717059639389735, "grad_norm": 0.428762268328225, "learning_rate": 4.7333245781827463e-07, "loss": 0.388, "step": 6304 }, { "epoch": 4.372399445214979, "grad_norm": 0.4331875010957346, "learning_rate": 4.7230491112304767e-07, "loss": 0.3477, "step": 6305 }, { "epoch": 4.373092926490985, "grad_norm": 0.39582647322625164, "learning_rate": 4.712784256867581e-07, "loss": 0.3278, "step": 6306 }, { "epoch": 4.37378640776699, "grad_norm": 0.63486828029795, "learning_rate": 4.7025300175000675e-07, "loss": 0.3886, "step": 6307 }, { "epoch": 4.374479889042996, "grad_norm": 0.36852223068564477, "learning_rate": 4.69228639553147e-07, "loss": 0.3154, "step": 6308 }, { "epoch": 4.375173370319001, "grad_norm": 0.39740566447596537, "learning_rate": 4.6820533933627956e-07, "loss": 0.2965, "step": 6309 }, { "epoch": 4.375866851595007, "grad_norm": 0.4155349731984529, "learning_rate": 4.6718310133926084e-07, "loss": 0.3323, "step": 6310 }, { "epoch": 4.376560332871012, "grad_norm": 0.46088054896957437, "learning_rate": 4.6616192580169306e-07, "loss": 0.3774, "step": 6311 }, { "epoch": 4.377253814147018, "grad_norm": 0.3980571868353865, "learning_rate": 4.651418129629348e-07, "loss": 0.3279, "step": 6312 }, { "epoch": 4.377947295423024, "grad_norm": 0.4400787696951905, "learning_rate": 4.6412276306209426e-07, "loss": 0.3894, "step": 6313 }, { "epoch": 4.378640776699029, "grad_norm": 0.4315819008936016, "learning_rate": 4.631047763380264e-07, "loss": 0.3232, "step": 6314 }, { "epoch": 4.379334257975034, "grad_norm": 0.4061097626261044, "learning_rate": 4.620878530293421e-07, "loss": 0.3594, "step": 6315 }, { "epoch": 4.38002773925104, "grad_norm": 0.4617086676641591, "learning_rate": 4.61071993374399e-07, "loss": 0.3613, "step": 6316 }, { "epoch": 4.380721220527046, "grad_norm": 0.4837472209010971, "learning_rate": 4.600571976113083e-07, "loss": 0.3306, "step": 6317 }, { "epoch": 4.381414701803052, "grad_norm": 0.4266507892564871, "learning_rate": 4.590434659779314e-07, "loss": 0.3364, "step": 6318 }, { "epoch": 4.3821081830790565, "grad_norm": 0.4145526426917289, "learning_rate": 4.5803079871187816e-07, "loss": 0.3561, "step": 6319 }, { "epoch": 4.382801664355062, "grad_norm": 0.585601135435787, "learning_rate": 4.570191960505116e-07, "loss": 0.3438, "step": 6320 }, { "epoch": 4.383495145631068, "grad_norm": 0.5158600867218267, "learning_rate": 4.560086582309431e-07, "loss": 0.3316, "step": 6321 }, { "epoch": 4.384188626907074, "grad_norm": 0.3885637940829561, "learning_rate": 4.54999185490036e-07, "loss": 0.3029, "step": 6322 }, { "epoch": 4.384882108183079, "grad_norm": 0.40677268320065696, "learning_rate": 4.5399077806440486e-07, "loss": 0.2962, "step": 6323 }, { "epoch": 4.385575589459084, "grad_norm": 0.4795919028533219, "learning_rate": 4.529834361904101e-07, "loss": 0.3882, "step": 6324 }, { "epoch": 4.38626907073509, "grad_norm": 0.4015692080918974, "learning_rate": 4.5197716010416723e-07, "loss": 0.3519, "step": 6325 }, { "epoch": 4.386962552011096, "grad_norm": 0.4019314645164274, "learning_rate": 4.509719500415405e-07, "loss": 0.3665, "step": 6326 }, { "epoch": 4.387656033287101, "grad_norm": 0.4176820631419, "learning_rate": 4.4996780623814186e-07, "loss": 0.3473, "step": 6327 }, { "epoch": 4.388349514563107, "grad_norm": 0.43013904897295463, "learning_rate": 4.4896472892933693e-07, "loss": 0.3386, "step": 6328 }, { "epoch": 4.389042995839112, "grad_norm": 0.3700295995202613, "learning_rate": 4.479627183502394e-07, "loss": 0.3298, "step": 6329 }, { "epoch": 4.389736477115118, "grad_norm": 0.3864971972567878, "learning_rate": 4.46961774735713e-07, "loss": 0.3558, "step": 6330 }, { "epoch": 4.390429958391123, "grad_norm": 0.4215785042564987, "learning_rate": 4.4596189832037286e-07, "loss": 0.394, "step": 6331 }, { "epoch": 4.391123439667129, "grad_norm": 0.41938505377725205, "learning_rate": 4.449630893385809e-07, "loss": 0.3625, "step": 6332 }, { "epoch": 4.391816920943135, "grad_norm": 0.3759323573464562, "learning_rate": 4.4396534802445213e-07, "loss": 0.3392, "step": 6333 }, { "epoch": 4.39251040221914, "grad_norm": 0.7048532444104139, "learning_rate": 4.4296867461185e-07, "loss": 0.3742, "step": 6334 }, { "epoch": 4.393203883495145, "grad_norm": 0.5408104347465381, "learning_rate": 4.41973069334386e-07, "loss": 0.3372, "step": 6335 }, { "epoch": 4.393897364771151, "grad_norm": 0.46993520360254293, "learning_rate": 4.40978532425424e-07, "loss": 0.3582, "step": 6336 }, { "epoch": 4.394590846047157, "grad_norm": 0.4428729197009111, "learning_rate": 4.39985064118077e-07, "loss": 0.3481, "step": 6337 }, { "epoch": 4.3952843273231625, "grad_norm": 0.41780682419165016, "learning_rate": 4.3899266464520365e-07, "loss": 0.3481, "step": 6338 }, { "epoch": 4.395977808599167, "grad_norm": 0.43262509885222333, "learning_rate": 4.380013342394196e-07, "loss": 0.3639, "step": 6339 }, { "epoch": 4.396671289875173, "grad_norm": 0.4595878985194084, "learning_rate": 4.370110731330818e-07, "loss": 0.3621, "step": 6340 }, { "epoch": 4.397364771151179, "grad_norm": 0.45021919353226675, "learning_rate": 4.360218815583023e-07, "loss": 0.3852, "step": 6341 }, { "epoch": 4.398058252427185, "grad_norm": 0.5048590140254069, "learning_rate": 4.3503375974694063e-07, "loss": 0.354, "step": 6342 }, { "epoch": 4.39875173370319, "grad_norm": 0.41822513041651366, "learning_rate": 4.340467079306032e-07, "loss": 0.363, "step": 6343 }, { "epoch": 4.399445214979195, "grad_norm": 0.43747501097360025, "learning_rate": 4.3306072634065e-07, "loss": 0.3561, "step": 6344 }, { "epoch": 4.400138696255201, "grad_norm": 0.4075033166791627, "learning_rate": 4.3207581520818773e-07, "loss": 0.35, "step": 6345 }, { "epoch": 4.400832177531207, "grad_norm": 0.41544014884749425, "learning_rate": 4.310919747640707e-07, "loss": 0.3249, "step": 6346 }, { "epoch": 4.401525658807212, "grad_norm": 0.40260693254121643, "learning_rate": 4.3010920523890554e-07, "loss": 0.4032, "step": 6347 }, { "epoch": 4.402219140083218, "grad_norm": 0.372218437013559, "learning_rate": 4.2912750686304625e-07, "loss": 0.3415, "step": 6348 }, { "epoch": 4.402912621359223, "grad_norm": 0.3931426152441145, "learning_rate": 4.28146879866595e-07, "loss": 0.3326, "step": 6349 }, { "epoch": 4.403606102635229, "grad_norm": 0.38945490066830396, "learning_rate": 4.271673244794056e-07, "loss": 0.3189, "step": 6350 }, { "epoch": 4.404299583911234, "grad_norm": 0.3827032181801738, "learning_rate": 4.2618884093107604e-07, "loss": 0.3451, "step": 6351 }, { "epoch": 4.40499306518724, "grad_norm": 0.39567281835561535, "learning_rate": 4.252114294509574e-07, "loss": 0.3671, "step": 6352 }, { "epoch": 4.4056865464632455, "grad_norm": 0.4114214376501773, "learning_rate": 4.24235090268148e-07, "loss": 0.3467, "step": 6353 }, { "epoch": 4.406380027739251, "grad_norm": 0.4301141111183747, "learning_rate": 4.2325982361149377e-07, "loss": 0.4031, "step": 6354 }, { "epoch": 4.407073509015256, "grad_norm": 0.4556807307871837, "learning_rate": 4.222856297095912e-07, "loss": 0.3503, "step": 6355 }, { "epoch": 4.407766990291262, "grad_norm": 0.3722555998423781, "learning_rate": 4.213125087907821e-07, "loss": 0.3239, "step": 6356 }, { "epoch": 4.408460471567268, "grad_norm": 0.41884999361999586, "learning_rate": 4.2034046108316127e-07, "loss": 0.3665, "step": 6357 }, { "epoch": 4.4091539528432735, "grad_norm": 0.42194226562584725, "learning_rate": 4.193694868145698e-07, "loss": 0.3146, "step": 6358 }, { "epoch": 4.409847434119278, "grad_norm": 0.4169510229784901, "learning_rate": 4.18399586212595e-07, "loss": 0.3199, "step": 6359 }, { "epoch": 4.410540915395284, "grad_norm": 0.4398902848685013, "learning_rate": 4.174307595045768e-07, "loss": 0.386, "step": 6360 }, { "epoch": 4.41123439667129, "grad_norm": 0.6713588084844746, "learning_rate": 4.16463006917599e-07, "loss": 0.3217, "step": 6361 }, { "epoch": 4.411927877947296, "grad_norm": 0.49245107242596364, "learning_rate": 4.154963286784969e-07, "loss": 0.3238, "step": 6362 }, { "epoch": 4.412621359223301, "grad_norm": 0.41633481101546227, "learning_rate": 4.1453072501385415e-07, "loss": 0.3444, "step": 6363 }, { "epoch": 4.413314840499306, "grad_norm": 0.41190021325085757, "learning_rate": 4.135661961499987e-07, "loss": 0.3446, "step": 6364 }, { "epoch": 4.414008321775312, "grad_norm": 0.41880141955352584, "learning_rate": 4.1260274231301025e-07, "loss": 0.339, "step": 6365 }, { "epoch": 4.414701803051318, "grad_norm": 0.4247633183013577, "learning_rate": 4.116403637287153e-07, "loss": 0.3349, "step": 6366 }, { "epoch": 4.415395284327323, "grad_norm": 0.4101174101082708, "learning_rate": 4.10679060622689e-07, "loss": 0.3779, "step": 6367 }, { "epoch": 4.4160887656033285, "grad_norm": 0.4493975809531821, "learning_rate": 4.097188332202545e-07, "loss": 0.3405, "step": 6368 }, { "epoch": 4.416782246879334, "grad_norm": 0.6886349458825491, "learning_rate": 4.0875968174648005e-07, "loss": 0.3873, "step": 6369 }, { "epoch": 4.41747572815534, "grad_norm": 0.5516427166935733, "learning_rate": 4.078016064261847e-07, "loss": 0.3898, "step": 6370 }, { "epoch": 4.418169209431345, "grad_norm": 0.51104124248911, "learning_rate": 4.068446074839355e-07, "loss": 0.3698, "step": 6371 }, { "epoch": 4.418862690707351, "grad_norm": 0.37054836093861715, "learning_rate": 4.0588868514404466e-07, "loss": 0.3271, "step": 6372 }, { "epoch": 4.4195561719833565, "grad_norm": 0.43217439156912235, "learning_rate": 4.0493383963057354e-07, "loss": 0.359, "step": 6373 }, { "epoch": 4.420249653259362, "grad_norm": 0.41123167200583505, "learning_rate": 4.039800711673314e-07, "loss": 0.3375, "step": 6374 }, { "epoch": 4.420943134535367, "grad_norm": 0.4092117836816686, "learning_rate": 4.0302737997787444e-07, "loss": 0.3707, "step": 6375 }, { "epoch": 4.421636615811373, "grad_norm": 0.3971762370452769, "learning_rate": 4.020757662855079e-07, "loss": 0.3241, "step": 6376 }, { "epoch": 4.422330097087379, "grad_norm": 0.4402934915902368, "learning_rate": 4.011252303132812e-07, "loss": 0.3781, "step": 6377 }, { "epoch": 4.4230235783633844, "grad_norm": 0.4416151158821577, "learning_rate": 4.0017577228399383e-07, "loss": 0.3152, "step": 6378 }, { "epoch": 4.423717059639389, "grad_norm": 0.4063817727030474, "learning_rate": 3.992273924201928e-07, "loss": 0.3431, "step": 6379 }, { "epoch": 4.424410540915395, "grad_norm": 0.4167749691745393, "learning_rate": 3.9828009094416973e-07, "loss": 0.4004, "step": 6380 }, { "epoch": 4.425104022191401, "grad_norm": 0.42586220669925484, "learning_rate": 3.973338680779659e-07, "loss": 0.3854, "step": 6381 }, { "epoch": 4.425797503467407, "grad_norm": 0.36676096467382063, "learning_rate": 3.9638872404337057e-07, "loss": 0.2959, "step": 6382 }, { "epoch": 4.4264909847434115, "grad_norm": 0.4305992075376699, "learning_rate": 3.954446590619154e-07, "loss": 0.4079, "step": 6383 }, { "epoch": 4.427184466019417, "grad_norm": 0.381184772365284, "learning_rate": 3.945016733548862e-07, "loss": 0.3176, "step": 6384 }, { "epoch": 4.427877947295423, "grad_norm": 0.40309557690038145, "learning_rate": 3.9355976714330944e-07, "loss": 0.3663, "step": 6385 }, { "epoch": 4.428571428571429, "grad_norm": 0.3802601095632871, "learning_rate": 3.9261894064796136e-07, "loss": 0.321, "step": 6386 }, { "epoch": 4.429264909847434, "grad_norm": 0.4167808411994532, "learning_rate": 3.916791940893666e-07, "loss": 0.3352, "step": 6387 }, { "epoch": 4.4299583911234395, "grad_norm": 0.3984340665888154, "learning_rate": 3.907405276877929e-07, "loss": 0.3503, "step": 6388 }, { "epoch": 4.430651872399445, "grad_norm": 0.4483459886217029, "learning_rate": 3.898029416632582e-07, "loss": 0.331, "step": 6389 }, { "epoch": 4.431345353675451, "grad_norm": 0.5590900519385273, "learning_rate": 3.8886643623552545e-07, "loss": 0.3229, "step": 6390 }, { "epoch": 4.432038834951456, "grad_norm": 0.41040463424095275, "learning_rate": 3.8793101162410417e-07, "loss": 0.3417, "step": 6391 }, { "epoch": 4.432732316227462, "grad_norm": 0.41344522315356363, "learning_rate": 3.869966680482512e-07, "loss": 0.3499, "step": 6392 }, { "epoch": 4.433425797503467, "grad_norm": 0.3809315116967289, "learning_rate": 3.8606340572697076e-07, "loss": 0.3489, "step": 6393 }, { "epoch": 4.434119278779473, "grad_norm": 0.40744983573471544, "learning_rate": 3.851312248790118e-07, "loss": 0.3704, "step": 6394 }, { "epoch": 4.434812760055478, "grad_norm": 0.4324191308332644, "learning_rate": 3.842001257228728e-07, "loss": 0.3616, "step": 6395 }, { "epoch": 4.435506241331484, "grad_norm": 0.6043370531779009, "learning_rate": 3.8327010847679367e-07, "loss": 0.3609, "step": 6396 }, { "epoch": 4.43619972260749, "grad_norm": 0.42549559985897084, "learning_rate": 3.823411733587662e-07, "loss": 0.3429, "step": 6397 }, { "epoch": 4.436893203883495, "grad_norm": 0.3899977164637308, "learning_rate": 3.8141332058652447e-07, "loss": 0.3439, "step": 6398 }, { "epoch": 4.4375866851595, "grad_norm": 0.39979565484471935, "learning_rate": 3.8048655037755066e-07, "loss": 0.3417, "step": 6399 }, { "epoch": 4.438280166435506, "grad_norm": 0.4039727325465911, "learning_rate": 3.795608629490738e-07, "loss": 0.3725, "step": 6400 }, { "epoch": 4.438973647711512, "grad_norm": 0.3902090034425511, "learning_rate": 3.786362585180675e-07, "loss": 0.3321, "step": 6401 }, { "epoch": 4.439667128987518, "grad_norm": 0.4355666390335971, "learning_rate": 3.777127373012529e-07, "loss": 0.322, "step": 6402 }, { "epoch": 4.440360610263523, "grad_norm": 0.4304007853870166, "learning_rate": 3.7679029951509736e-07, "loss": 0.3883, "step": 6403 }, { "epoch": 4.441054091539528, "grad_norm": 0.40025749412819384, "learning_rate": 3.7586894537581187e-07, "loss": 0.3787, "step": 6404 }, { "epoch": 4.441747572815534, "grad_norm": 0.3838135413625623, "learning_rate": 3.749486750993564e-07, "loss": 0.336, "step": 6405 }, { "epoch": 4.44244105409154, "grad_norm": 0.4402419987263163, "learning_rate": 3.740294889014351e-07, "loss": 0.3689, "step": 6406 }, { "epoch": 4.443134535367545, "grad_norm": 0.4712223960422006, "learning_rate": 3.731113869974984e-07, "loss": 0.3576, "step": 6407 }, { "epoch": 4.44382801664355, "grad_norm": 0.37961848140922916, "learning_rate": 3.721943696027441e-07, "loss": 0.343, "step": 6408 }, { "epoch": 4.444521497919556, "grad_norm": 0.4261017222835085, "learning_rate": 3.712784369321121e-07, "loss": 0.3271, "step": 6409 }, { "epoch": 4.445214979195562, "grad_norm": 0.3955025959090337, "learning_rate": 3.703635892002927e-07, "loss": 0.3629, "step": 6410 }, { "epoch": 4.445908460471568, "grad_norm": 0.4303594210927355, "learning_rate": 3.694498266217178e-07, "loss": 0.3559, "step": 6411 }, { "epoch": 4.446601941747573, "grad_norm": 0.3922912554410651, "learning_rate": 3.685371494105683e-07, "loss": 0.3514, "step": 6412 }, { "epoch": 4.447295423023578, "grad_norm": 0.39983174854623693, "learning_rate": 3.676255577807686e-07, "loss": 0.3511, "step": 6413 }, { "epoch": 4.447988904299584, "grad_norm": 0.4365924653744624, "learning_rate": 3.6671505194598777e-07, "loss": 0.3367, "step": 6414 }, { "epoch": 4.448682385575589, "grad_norm": 0.4256680423167934, "learning_rate": 3.6580563211964346e-07, "loss": 0.3551, "step": 6415 }, { "epoch": 4.449375866851595, "grad_norm": 0.41180963080396754, "learning_rate": 3.648972985148974e-07, "loss": 0.3616, "step": 6416 }, { "epoch": 4.450069348127601, "grad_norm": 0.40246046167541016, "learning_rate": 3.6399005134465426e-07, "loss": 0.3099, "step": 6417 }, { "epoch": 4.450762829403606, "grad_norm": 0.39309268381047585, "learning_rate": 3.6308389082156835e-07, "loss": 0.3318, "step": 6418 }, { "epoch": 4.451456310679612, "grad_norm": 1.3650711117186602, "learning_rate": 3.6217881715803536e-07, "loss": 0.3614, "step": 6419 }, { "epoch": 4.452149791955617, "grad_norm": 0.3953208802560284, "learning_rate": 3.612748305661995e-07, "loss": 0.3515, "step": 6420 }, { "epoch": 4.452843273231623, "grad_norm": 0.4014098797996639, "learning_rate": 3.60371931257949e-07, "loss": 0.3498, "step": 6421 }, { "epoch": 4.4535367545076285, "grad_norm": 0.43947384328906647, "learning_rate": 3.5947011944491516e-07, "loss": 0.3052, "step": 6422 }, { "epoch": 4.454230235783633, "grad_norm": 0.5383784922701661, "learning_rate": 3.585693953384767e-07, "loss": 0.3467, "step": 6423 }, { "epoch": 4.454923717059639, "grad_norm": 0.3988991932539132, "learning_rate": 3.576697591497585e-07, "loss": 0.3618, "step": 6424 }, { "epoch": 4.455617198335645, "grad_norm": 0.3606107553496361, "learning_rate": 3.5677121108962655e-07, "loss": 0.2923, "step": 6425 }, { "epoch": 4.456310679611651, "grad_norm": 0.4249631383075413, "learning_rate": 3.558737513686944e-07, "loss": 0.3456, "step": 6426 }, { "epoch": 4.4570041608876565, "grad_norm": 0.429837817751984, "learning_rate": 3.549773801973211e-07, "loss": 0.3941, "step": 6427 }, { "epoch": 4.457697642163661, "grad_norm": 0.48755067157371196, "learning_rate": 3.5408209778560854e-07, "loss": 0.3559, "step": 6428 }, { "epoch": 4.458391123439667, "grad_norm": 0.37875143764677704, "learning_rate": 3.5318790434340613e-07, "loss": 0.3185, "step": 6429 }, { "epoch": 4.459084604715673, "grad_norm": 0.41411150489536086, "learning_rate": 3.5229480008030395e-07, "loss": 0.3629, "step": 6430 }, { "epoch": 4.459778085991678, "grad_norm": 0.4039513018740434, "learning_rate": 3.514027852056406e-07, "loss": 0.3518, "step": 6431 }, { "epoch": 4.460471567267684, "grad_norm": 0.4165462965731385, "learning_rate": 3.50511859928499e-07, "loss": 0.3861, "step": 6432 }, { "epoch": 4.461165048543689, "grad_norm": 0.3965463802401738, "learning_rate": 3.496220244577025e-07, "loss": 0.3054, "step": 6433 }, { "epoch": 4.461858529819695, "grad_norm": 0.4688123280454221, "learning_rate": 3.487332790018244e-07, "loss": 0.357, "step": 6434 }, { "epoch": 4.462552011095701, "grad_norm": 0.40119468223791616, "learning_rate": 3.4784562376918076e-07, "loss": 0.3409, "step": 6435 }, { "epoch": 4.463245492371706, "grad_norm": 0.46370427307719886, "learning_rate": 3.469590589678284e-07, "loss": 0.3669, "step": 6436 }, { "epoch": 4.4639389736477115, "grad_norm": 0.4033241157687948, "learning_rate": 3.460735848055752e-07, "loss": 0.3143, "step": 6437 }, { "epoch": 4.464632454923717, "grad_norm": 0.40705754510496184, "learning_rate": 3.451892014899677e-07, "loss": 0.3403, "step": 6438 }, { "epoch": 4.465325936199722, "grad_norm": 0.41357876947975447, "learning_rate": 3.4430590922829965e-07, "loss": 0.3331, "step": 6439 }, { "epoch": 4.466019417475728, "grad_norm": 0.3954692268157122, "learning_rate": 3.434237082276093e-07, "loss": 0.3614, "step": 6440 }, { "epoch": 4.466712898751734, "grad_norm": 0.3760812585813724, "learning_rate": 3.4254259869467623e-07, "loss": 0.3235, "step": 6441 }, { "epoch": 4.4674063800277395, "grad_norm": 0.40449653513926387, "learning_rate": 3.4166258083602797e-07, "loss": 0.3849, "step": 6442 }, { "epoch": 4.468099861303745, "grad_norm": 0.40914052503140225, "learning_rate": 3.4078365485793297e-07, "loss": 0.3773, "step": 6443 }, { "epoch": 4.46879334257975, "grad_norm": 0.4419044690559538, "learning_rate": 3.3990582096640526e-07, "loss": 0.3464, "step": 6444 }, { "epoch": 4.469486823855756, "grad_norm": 0.4085625947232388, "learning_rate": 3.3902907936720353e-07, "loss": 0.3663, "step": 6445 }, { "epoch": 4.470180305131762, "grad_norm": 0.3936127333601739, "learning_rate": 3.38153430265829e-07, "loss": 0.3406, "step": 6446 }, { "epoch": 4.470873786407767, "grad_norm": 0.40031214701874424, "learning_rate": 3.3727887386752866e-07, "loss": 0.3489, "step": 6447 }, { "epoch": 4.471567267683772, "grad_norm": 0.4553388947830274, "learning_rate": 3.3640541037729013e-07, "loss": 0.3509, "step": 6448 }, { "epoch": 4.472260748959778, "grad_norm": 0.44325214353034015, "learning_rate": 3.3553303999984854e-07, "loss": 0.3548, "step": 6449 }, { "epoch": 4.472954230235784, "grad_norm": 0.4393914871143648, "learning_rate": 3.3466176293968146e-07, "loss": 0.3728, "step": 6450 }, { "epoch": 4.47364771151179, "grad_norm": 0.4603045183143016, "learning_rate": 3.3379157940100825e-07, "loss": 0.3774, "step": 6451 }, { "epoch": 4.4743411927877945, "grad_norm": 0.5124282463772938, "learning_rate": 3.3292248958779414e-07, "loss": 0.3278, "step": 6452 }, { "epoch": 4.4750346740638, "grad_norm": 0.4107030441536405, "learning_rate": 3.3205449370374955e-07, "loss": 0.3506, "step": 6453 }, { "epoch": 4.475728155339806, "grad_norm": 0.3894232902370426, "learning_rate": 3.3118759195232273e-07, "loss": 0.3618, "step": 6454 }, { "epoch": 4.476421636615811, "grad_norm": 0.43683930384381864, "learning_rate": 3.303217845367124e-07, "loss": 0.3219, "step": 6455 }, { "epoch": 4.477115117891817, "grad_norm": 0.39801986042785004, "learning_rate": 3.294570716598561e-07, "loss": 0.3405, "step": 6456 }, { "epoch": 4.4778085991678225, "grad_norm": 0.3746218809626706, "learning_rate": 3.2859345352443673e-07, "loss": 0.3257, "step": 6457 }, { "epoch": 4.478502080443828, "grad_norm": 0.4054198427002251, "learning_rate": 3.2773093033288016e-07, "loss": 0.3844, "step": 6458 }, { "epoch": 4.479195561719834, "grad_norm": 0.4059728409327418, "learning_rate": 3.2686950228735525e-07, "loss": 0.3304, "step": 6459 }, { "epoch": 4.479889042995839, "grad_norm": 0.4251068297741258, "learning_rate": 3.2600916958977437e-07, "loss": 0.3519, "step": 6460 }, { "epoch": 4.480582524271845, "grad_norm": 0.4240131950128313, "learning_rate": 3.2514993244179395e-07, "loss": 0.3275, "step": 6461 }, { "epoch": 4.48127600554785, "grad_norm": 0.6330401176140308, "learning_rate": 3.242917910448118e-07, "loss": 0.3408, "step": 6462 }, { "epoch": 4.481969486823855, "grad_norm": 0.3881132677643724, "learning_rate": 3.234347455999709e-07, "loss": 0.3624, "step": 6463 }, { "epoch": 4.482662968099861, "grad_norm": 0.36505614923981594, "learning_rate": 3.2257879630815614e-07, "loss": 0.3254, "step": 6464 }, { "epoch": 4.483356449375867, "grad_norm": 0.5751774300457781, "learning_rate": 3.2172394336999644e-07, "loss": 0.3533, "step": 6465 }, { "epoch": 4.484049930651873, "grad_norm": 1.4789054122146275, "learning_rate": 3.2087018698586326e-07, "loss": 0.3409, "step": 6466 }, { "epoch": 4.484743411927878, "grad_norm": 0.42218696405004064, "learning_rate": 3.200175273558698e-07, "loss": 0.3794, "step": 6467 }, { "epoch": 4.485436893203883, "grad_norm": 0.44298189214979666, "learning_rate": 3.1916596467987395e-07, "loss": 0.3632, "step": 6468 }, { "epoch": 4.486130374479889, "grad_norm": 0.42006300798758667, "learning_rate": 3.183154991574766e-07, "loss": 0.3413, "step": 6469 }, { "epoch": 4.486823855755895, "grad_norm": 0.38498342351820186, "learning_rate": 3.174661309880189e-07, "loss": 0.3429, "step": 6470 }, { "epoch": 4.4875173370319, "grad_norm": 0.4217689623941785, "learning_rate": 3.166178603705872e-07, "loss": 0.3565, "step": 6471 }, { "epoch": 4.4882108183079055, "grad_norm": 0.3774929025318759, "learning_rate": 3.157706875040112e-07, "loss": 0.3283, "step": 6472 }, { "epoch": 4.488904299583911, "grad_norm": 0.532292283351115, "learning_rate": 3.1492461258686044e-07, "loss": 0.3479, "step": 6473 }, { "epoch": 4.489597780859917, "grad_norm": 0.3983099552400063, "learning_rate": 3.140796358174508e-07, "loss": 0.3574, "step": 6474 }, { "epoch": 4.490291262135923, "grad_norm": 0.408053838958733, "learning_rate": 3.1323575739383716e-07, "loss": 0.3367, "step": 6475 }, { "epoch": 4.490984743411928, "grad_norm": 0.3979044170543787, "learning_rate": 3.1239297751381845e-07, "loss": 0.3448, "step": 6476 }, { "epoch": 4.491678224687933, "grad_norm": 0.3911801713811084, "learning_rate": 3.1155129637493733e-07, "loss": 0.3465, "step": 6477 }, { "epoch": 4.492371705963939, "grad_norm": 0.40382648272801225, "learning_rate": 3.1071071417447587e-07, "loss": 0.3447, "step": 6478 }, { "epoch": 4.493065187239944, "grad_norm": 0.3910398820348786, "learning_rate": 3.0987123110946204e-07, "loss": 0.3576, "step": 6479 }, { "epoch": 4.49375866851595, "grad_norm": 0.4587393510940895, "learning_rate": 3.090328473766646e-07, "loss": 0.3157, "step": 6480 }, { "epoch": 4.494452149791956, "grad_norm": 0.43309360839548056, "learning_rate": 3.0819556317259304e-07, "loss": 0.3673, "step": 6481 }, { "epoch": 4.495145631067961, "grad_norm": 0.4333632180477413, "learning_rate": 3.073593786935031e-07, "loss": 0.3578, "step": 6482 }, { "epoch": 4.495839112343967, "grad_norm": 0.4287510088150104, "learning_rate": 3.06524294135388e-07, "loss": 0.3971, "step": 6483 }, { "epoch": 4.496532593619972, "grad_norm": 0.428508304993015, "learning_rate": 3.0569030969398726e-07, "loss": 0.3538, "step": 6484 }, { "epoch": 4.497226074895978, "grad_norm": 0.4713320414272633, "learning_rate": 3.0485742556478073e-07, "loss": 0.3815, "step": 6485 }, { "epoch": 4.497919556171984, "grad_norm": 0.43818288531586524, "learning_rate": 3.040256419429888e-07, "loss": 0.3896, "step": 6486 }, { "epoch": 4.4986130374479885, "grad_norm": 0.4094588046686549, "learning_rate": 3.031949590235772e-07, "loss": 0.3575, "step": 6487 }, { "epoch": 4.499306518723994, "grad_norm": 0.4537010217842934, "learning_rate": 3.023653770012508e-07, "loss": 0.3778, "step": 6488 }, { "epoch": 4.5, "grad_norm": 0.41914491753733574, "learning_rate": 3.015368960704584e-07, "loss": 0.3517, "step": 6489 }, { "epoch": 4.500693481276006, "grad_norm": 0.4030602571777503, "learning_rate": 3.0070951642538925e-07, "loss": 0.3767, "step": 6490 }, { "epoch": 4.5013869625520115, "grad_norm": 0.4218914131164033, "learning_rate": 2.998832382599759e-07, "loss": 0.3659, "step": 6491 }, { "epoch": 4.502080443828016, "grad_norm": 0.40665886625530623, "learning_rate": 2.990580617678923e-07, "loss": 0.3709, "step": 6492 }, { "epoch": 4.502773925104022, "grad_norm": 0.4221749105124615, "learning_rate": 2.982339871425527e-07, "loss": 0.3401, "step": 6493 }, { "epoch": 4.503467406380028, "grad_norm": 0.42294670887561653, "learning_rate": 2.974110145771142e-07, "loss": 0.4071, "step": 6494 }, { "epoch": 4.504160887656033, "grad_norm": 0.521710788793327, "learning_rate": 2.965891442644775e-07, "loss": 0.3478, "step": 6495 }, { "epoch": 4.504854368932039, "grad_norm": 0.3807171060977504, "learning_rate": 2.9576837639728073e-07, "loss": 0.3382, "step": 6496 }, { "epoch": 4.505547850208044, "grad_norm": 0.39005771180130033, "learning_rate": 2.9494871116790667e-07, "loss": 0.3724, "step": 6497 }, { "epoch": 4.50624133148405, "grad_norm": 0.39533793319339844, "learning_rate": 2.9413014876848e-07, "loss": 0.3297, "step": 6498 }, { "epoch": 4.506934812760056, "grad_norm": 0.4473857911757184, "learning_rate": 2.9331268939086334e-07, "loss": 0.3262, "step": 6499 }, { "epoch": 4.507628294036061, "grad_norm": 0.3871276758241381, "learning_rate": 2.924963332266667e-07, "loss": 0.3793, "step": 6500 }, { "epoch": 4.508321775312067, "grad_norm": 0.4422729774052628, "learning_rate": 2.916810804672349e-07, "loss": 0.3718, "step": 6501 }, { "epoch": 4.509015256588072, "grad_norm": 0.39973229638445923, "learning_rate": 2.908669313036588e-07, "loss": 0.4085, "step": 6502 }, { "epoch": 4.509708737864077, "grad_norm": 0.4118083586733117, "learning_rate": 2.9005388592676987e-07, "loss": 0.3277, "step": 6503 }, { "epoch": 4.510402219140083, "grad_norm": 0.4167204896286219, "learning_rate": 2.892419445271383e-07, "loss": 0.3163, "step": 6504 }, { "epoch": 4.511095700416089, "grad_norm": 0.4135384801167932, "learning_rate": 2.8843110729507794e-07, "loss": 0.3837, "step": 6505 }, { "epoch": 4.5117891816920945, "grad_norm": 0.3758678191361516, "learning_rate": 2.8762137442064353e-07, "loss": 0.32, "step": 6506 }, { "epoch": 4.5124826629681, "grad_norm": 0.4284753020111622, "learning_rate": 2.868127460936304e-07, "loss": 0.3666, "step": 6507 }, { "epoch": 4.513176144244105, "grad_norm": 0.3606113740843206, "learning_rate": 2.860052225035742e-07, "loss": 0.2953, "step": 6508 }, { "epoch": 4.513869625520111, "grad_norm": 0.4469565264864672, "learning_rate": 2.8519880383975406e-07, "loss": 0.3571, "step": 6509 }, { "epoch": 4.514563106796117, "grad_norm": 0.3668758159511137, "learning_rate": 2.8439349029118825e-07, "loss": 0.3213, "step": 6510 }, { "epoch": 4.515256588072122, "grad_norm": 0.4240413519433988, "learning_rate": 2.8358928204663684e-07, "loss": 0.3593, "step": 6511 }, { "epoch": 4.515950069348127, "grad_norm": 0.44069596706920966, "learning_rate": 2.827861792945991e-07, "loss": 0.385, "step": 6512 }, { "epoch": 4.516643550624133, "grad_norm": 0.7423024391069702, "learning_rate": 2.8198418222331713e-07, "loss": 0.3289, "step": 6513 }, { "epoch": 4.517337031900139, "grad_norm": 0.41623711687393744, "learning_rate": 2.81183291020774e-07, "loss": 0.3513, "step": 6514 }, { "epoch": 4.518030513176145, "grad_norm": 0.4517311584869714, "learning_rate": 2.803835058746918e-07, "loss": 0.3575, "step": 6515 }, { "epoch": 4.51872399445215, "grad_norm": 0.39809335931351525, "learning_rate": 2.7958482697253433e-07, "loss": 0.3522, "step": 6516 }, { "epoch": 4.519417475728155, "grad_norm": 0.4291542228527471, "learning_rate": 2.787872545015069e-07, "loss": 0.3589, "step": 6517 }, { "epoch": 4.520110957004161, "grad_norm": 0.4150943789778223, "learning_rate": 2.7799078864855446e-07, "loss": 0.3654, "step": 6518 }, { "epoch": 4.520804438280166, "grad_norm": 0.44335141616014906, "learning_rate": 2.7719542960036315e-07, "loss": 0.3644, "step": 6519 }, { "epoch": 4.521497919556172, "grad_norm": 0.4348871010347055, "learning_rate": 2.764011775433584e-07, "loss": 0.3655, "step": 6520 }, { "epoch": 4.5221914008321775, "grad_norm": 0.41287644806382484, "learning_rate": 2.7560803266370783e-07, "loss": 0.3192, "step": 6521 }, { "epoch": 4.522884882108183, "grad_norm": 0.39974817155933356, "learning_rate": 2.748159951473195e-07, "loss": 0.3597, "step": 6522 }, { "epoch": 4.523578363384189, "grad_norm": 0.4541361314750261, "learning_rate": 2.7402506517983983e-07, "loss": 0.3593, "step": 6523 }, { "epoch": 4.524271844660194, "grad_norm": 0.40599335654716, "learning_rate": 2.732352429466573e-07, "loss": 0.3649, "step": 6524 }, { "epoch": 4.5249653259362, "grad_norm": 0.6400772691369392, "learning_rate": 2.72446528632902e-07, "loss": 0.4258, "step": 6525 }, { "epoch": 4.5256588072122055, "grad_norm": 0.38880545523469967, "learning_rate": 2.716589224234406e-07, "loss": 0.3443, "step": 6526 }, { "epoch": 4.52635228848821, "grad_norm": 0.40296157361064605, "learning_rate": 2.708724245028849e-07, "loss": 0.3193, "step": 6527 }, { "epoch": 4.527045769764216, "grad_norm": 0.6351020494405178, "learning_rate": 2.700870350555823e-07, "loss": 0.3589, "step": 6528 }, { "epoch": 4.527739251040222, "grad_norm": 0.3661771491123025, "learning_rate": 2.693027542656229e-07, "loss": 0.3158, "step": 6529 }, { "epoch": 4.528432732316228, "grad_norm": 0.4313492272871311, "learning_rate": 2.6851958231683685e-07, "loss": 0.3442, "step": 6530 }, { "epoch": 4.529126213592233, "grad_norm": 0.43572220403651096, "learning_rate": 2.677375193927939e-07, "loss": 0.3544, "step": 6531 }, { "epoch": 4.529819694868238, "grad_norm": 0.42791169698498455, "learning_rate": 2.669565656768036e-07, "loss": 0.3386, "step": 6532 }, { "epoch": 4.530513176144244, "grad_norm": 0.38519609286662515, "learning_rate": 2.6617672135191565e-07, "loss": 0.3683, "step": 6533 }, { "epoch": 4.53120665742025, "grad_norm": 0.4280170359083549, "learning_rate": 2.653979866009204e-07, "loss": 0.3529, "step": 6534 }, { "epoch": 4.531900138696255, "grad_norm": 0.4068672641372711, "learning_rate": 2.646203616063475e-07, "loss": 0.3641, "step": 6535 }, { "epoch": 4.5325936199722605, "grad_norm": 0.4271099957472437, "learning_rate": 2.638438465504667e-07, "loss": 0.3437, "step": 6536 }, { "epoch": 4.533287101248266, "grad_norm": 0.4737638227447026, "learning_rate": 2.630684416152879e-07, "loss": 0.3624, "step": 6537 }, { "epoch": 4.533980582524272, "grad_norm": 0.39051439935558413, "learning_rate": 2.6229414698255907e-07, "loss": 0.3164, "step": 6538 }, { "epoch": 4.534674063800278, "grad_norm": 0.4568092945082574, "learning_rate": 2.6152096283377e-07, "loss": 0.3664, "step": 6539 }, { "epoch": 4.535367545076283, "grad_norm": 0.3843181895475802, "learning_rate": 2.6074888935015087e-07, "loss": 0.3594, "step": 6540 }, { "epoch": 4.5360610263522885, "grad_norm": 0.5258146706913907, "learning_rate": 2.5997792671266787e-07, "loss": 0.2818, "step": 6541 }, { "epoch": 4.536754507628294, "grad_norm": 0.45660257694532436, "learning_rate": 2.5920807510202984e-07, "loss": 0.3425, "step": 6542 }, { "epoch": 4.537447988904299, "grad_norm": 0.37549771371111773, "learning_rate": 2.584393346986852e-07, "loss": 0.3725, "step": 6543 }, { "epoch": 4.538141470180305, "grad_norm": 0.4330040274361384, "learning_rate": 2.576717056828193e-07, "loss": 0.3778, "step": 6544 }, { "epoch": 4.538834951456311, "grad_norm": 0.44130154909130553, "learning_rate": 2.569051882343615e-07, "loss": 0.3657, "step": 6545 }, { "epoch": 4.539528432732316, "grad_norm": 0.39400618194672443, "learning_rate": 2.5613978253297533e-07, "loss": 0.3584, "step": 6546 }, { "epoch": 4.540221914008322, "grad_norm": 0.4069553441613309, "learning_rate": 2.5537548875806785e-07, "loss": 0.3495, "step": 6547 }, { "epoch": 4.540915395284327, "grad_norm": 0.424531468262278, "learning_rate": 2.546123070887846e-07, "loss": 0.3497, "step": 6548 }, { "epoch": 4.541608876560333, "grad_norm": 0.4577295112311656, "learning_rate": 2.5385023770400754e-07, "loss": 0.4103, "step": 6549 }, { "epoch": 4.542302357836339, "grad_norm": 0.4027887701619919, "learning_rate": 2.5308928078236207e-07, "loss": 0.3572, "step": 6550 }, { "epoch": 4.5429958391123435, "grad_norm": 0.4721085563830496, "learning_rate": 2.5232943650221055e-07, "loss": 0.3633, "step": 6551 }, { "epoch": 4.543689320388349, "grad_norm": 0.4459911633946896, "learning_rate": 2.5157070504165495e-07, "loss": 0.3305, "step": 6552 }, { "epoch": 4.544382801664355, "grad_norm": 0.4302313580487091, "learning_rate": 2.5081308657853576e-07, "loss": 0.3637, "step": 6553 }, { "epoch": 4.545076282940361, "grad_norm": 0.42841197764228633, "learning_rate": 2.5005658129043377e-07, "loss": 0.3545, "step": 6554 }, { "epoch": 4.545769764216367, "grad_norm": 0.4160618786378027, "learning_rate": 2.4930118935466875e-07, "loss": 0.4077, "step": 6555 }, { "epoch": 4.5464632454923715, "grad_norm": 0.40678581500629307, "learning_rate": 2.4854691094829965e-07, "loss": 0.3782, "step": 6556 }, { "epoch": 4.547156726768377, "grad_norm": 0.6031029795388966, "learning_rate": 2.477937462481217e-07, "loss": 0.3398, "step": 6557 }, { "epoch": 4.547850208044383, "grad_norm": 0.3924631494717983, "learning_rate": 2.4704169543067314e-07, "loss": 0.3172, "step": 6558 }, { "epoch": 4.548543689320388, "grad_norm": 0.40269393212454174, "learning_rate": 2.462907586722285e-07, "loss": 0.3955, "step": 6559 }, { "epoch": 4.549237170596394, "grad_norm": 0.4050210715802751, "learning_rate": 2.4554093614880206e-07, "loss": 0.3562, "step": 6560 }, { "epoch": 4.549930651872399, "grad_norm": 0.4177639963500074, "learning_rate": 2.4479222803614644e-07, "loss": 0.3919, "step": 6561 }, { "epoch": 4.550624133148405, "grad_norm": 0.4749450806249847, "learning_rate": 2.4404463450975415e-07, "loss": 0.325, "step": 6562 }, { "epoch": 4.551317614424411, "grad_norm": 0.4351040183644717, "learning_rate": 2.4329815574485493e-07, "loss": 0.343, "step": 6563 }, { "epoch": 4.552011095700416, "grad_norm": 0.3733244201879771, "learning_rate": 2.425527919164195e-07, "loss": 0.3062, "step": 6564 }, { "epoch": 4.552704576976422, "grad_norm": 0.4396580819743459, "learning_rate": 2.4180854319915346e-07, "loss": 0.368, "step": 6565 }, { "epoch": 4.553398058252427, "grad_norm": 0.4380716762686249, "learning_rate": 2.410654097675041e-07, "loss": 0.3859, "step": 6566 }, { "epoch": 4.554091539528432, "grad_norm": 0.3668319890075246, "learning_rate": 2.403233917956582e-07, "loss": 0.3086, "step": 6567 }, { "epoch": 4.554785020804438, "grad_norm": 0.3909884804602913, "learning_rate": 2.3958248945753714e-07, "loss": 0.3182, "step": 6568 }, { "epoch": 4.555478502080444, "grad_norm": 0.3938518713301587, "learning_rate": 2.3884270292680476e-07, "loss": 0.3638, "step": 6569 }, { "epoch": 4.55617198335645, "grad_norm": 0.3944873374706976, "learning_rate": 2.381040323768602e-07, "loss": 0.3288, "step": 6570 }, { "epoch": 4.556865464632455, "grad_norm": 0.45370858756261456, "learning_rate": 2.3736647798084268e-07, "loss": 0.3611, "step": 6571 }, { "epoch": 4.55755894590846, "grad_norm": 0.4639467918963087, "learning_rate": 2.3663003991163113e-07, "loss": 0.3637, "step": 6572 }, { "epoch": 4.558252427184466, "grad_norm": 0.42693584690979414, "learning_rate": 2.3589471834183975e-07, "loss": 0.3728, "step": 6573 }, { "epoch": 4.558945908460472, "grad_norm": 0.4206348896724024, "learning_rate": 2.3516051344382285e-07, "loss": 0.3571, "step": 6574 }, { "epoch": 4.559639389736477, "grad_norm": 0.4161159497288621, "learning_rate": 2.344274253896739e-07, "loss": 0.358, "step": 6575 }, { "epoch": 4.560332871012482, "grad_norm": 0.41348718852471833, "learning_rate": 2.336954543512221e-07, "loss": 0.3435, "step": 6576 }, { "epoch": 4.561026352288488, "grad_norm": 0.4024688574700567, "learning_rate": 2.3296460050003687e-07, "loss": 0.3423, "step": 6577 }, { "epoch": 4.561719833564494, "grad_norm": 0.43723687127994926, "learning_rate": 2.3223486400742456e-07, "loss": 0.3643, "step": 6578 }, { "epoch": 4.5624133148405, "grad_norm": 0.4480853856166621, "learning_rate": 2.3150624504442997e-07, "loss": 0.3314, "step": 6579 }, { "epoch": 4.563106796116505, "grad_norm": 0.37850608175012385, "learning_rate": 2.307787437818365e-07, "loss": 0.3154, "step": 6580 }, { "epoch": 4.56380027739251, "grad_norm": 0.41017433062649455, "learning_rate": 2.3005236039016554e-07, "loss": 0.333, "step": 6581 }, { "epoch": 4.564493758668516, "grad_norm": 0.44818421710488887, "learning_rate": 2.2932709503967587e-07, "loss": 0.3372, "step": 6582 }, { "epoch": 4.565187239944521, "grad_norm": 0.4241577498244145, "learning_rate": 2.286029479003643e-07, "loss": 0.3616, "step": 6583 }, { "epoch": 4.565880721220527, "grad_norm": 0.46869856563719514, "learning_rate": 2.2787991914196505e-07, "loss": 0.359, "step": 6584 }, { "epoch": 4.566574202496533, "grad_norm": 0.3967347604826857, "learning_rate": 2.2715800893395256e-07, "loss": 0.3398, "step": 6585 }, { "epoch": 4.567267683772538, "grad_norm": 0.42530800727361445, "learning_rate": 2.2643721744553483e-07, "loss": 0.3376, "step": 6586 }, { "epoch": 4.567961165048544, "grad_norm": 0.41069185069357356, "learning_rate": 2.257175448456622e-07, "loss": 0.3497, "step": 6587 }, { "epoch": 4.568654646324549, "grad_norm": 0.4001560346445005, "learning_rate": 2.2499899130301983e-07, "loss": 0.3004, "step": 6588 }, { "epoch": 4.569348127600555, "grad_norm": 0.43127553399784263, "learning_rate": 2.2428155698603182e-07, "loss": 0.3394, "step": 6589 }, { "epoch": 4.5700416088765605, "grad_norm": 0.44513476052202566, "learning_rate": 2.2356524206286033e-07, "loss": 0.344, "step": 6590 }, { "epoch": 4.570735090152565, "grad_norm": 0.5265858974041646, "learning_rate": 2.2285004670140275e-07, "loss": 0.3996, "step": 6591 }, { "epoch": 4.571428571428571, "grad_norm": 0.41331666692849756, "learning_rate": 2.2213597106929608e-07, "loss": 0.3445, "step": 6592 }, { "epoch": 4.572122052704577, "grad_norm": 0.38689482305681333, "learning_rate": 2.2142301533391586e-07, "loss": 0.3499, "step": 6593 }, { "epoch": 4.572815533980583, "grad_norm": 0.4151646109190327, "learning_rate": 2.207111796623723e-07, "loss": 0.346, "step": 6594 }, { "epoch": 4.5735090152565885, "grad_norm": 0.38944932861630477, "learning_rate": 2.2000046422151479e-07, "loss": 0.3177, "step": 6595 }, { "epoch": 4.574202496532593, "grad_norm": 0.3929990605150274, "learning_rate": 2.1929086917793052e-07, "loss": 0.3298, "step": 6596 }, { "epoch": 4.574895977808599, "grad_norm": 0.382487539641259, "learning_rate": 2.1858239469794206e-07, "loss": 0.3374, "step": 6597 }, { "epoch": 4.575589459084605, "grad_norm": 0.3743567709313739, "learning_rate": 2.1787504094761268e-07, "loss": 0.3351, "step": 6598 }, { "epoch": 4.57628294036061, "grad_norm": 0.4383388425995304, "learning_rate": 2.1716880809273978e-07, "loss": 0.3343, "step": 6599 }, { "epoch": 4.5769764216366156, "grad_norm": 0.5747238472588304, "learning_rate": 2.1646369629885867e-07, "loss": 0.3493, "step": 6600 }, { "epoch": 4.577669902912621, "grad_norm": 0.39377261634329735, "learning_rate": 2.1575970573124437e-07, "loss": 0.3189, "step": 6601 }, { "epoch": 4.578363384188627, "grad_norm": 0.5130892949484, "learning_rate": 2.1505683655490495e-07, "loss": 0.3356, "step": 6602 }, { "epoch": 4.579056865464633, "grad_norm": 0.3909262813168535, "learning_rate": 2.1435508893458912e-07, "loss": 0.3621, "step": 6603 }, { "epoch": 4.579750346740638, "grad_norm": 0.5019565370247855, "learning_rate": 2.1365446303478142e-07, "loss": 0.3943, "step": 6604 }, { "epoch": 4.5804438280166435, "grad_norm": 0.41810129940267743, "learning_rate": 2.1295495901970275e-07, "loss": 0.3392, "step": 6605 }, { "epoch": 4.581137309292649, "grad_norm": 0.4398827655216951, "learning_rate": 2.1225657705331249e-07, "loss": 0.3403, "step": 6606 }, { "epoch": 4.581830790568654, "grad_norm": 0.4091014458505974, "learning_rate": 2.115593172993058e-07, "loss": 0.3645, "step": 6607 }, { "epoch": 4.58252427184466, "grad_norm": 0.43415775581988225, "learning_rate": 2.108631799211158e-07, "loss": 0.3434, "step": 6608 }, { "epoch": 4.583217753120666, "grad_norm": 0.4739394088706493, "learning_rate": 2.1016816508191263e-07, "loss": 0.3534, "step": 6609 }, { "epoch": 4.5839112343966715, "grad_norm": 0.4090274718583703, "learning_rate": 2.0947427294460142e-07, "loss": 0.3432, "step": 6610 }, { "epoch": 4.584604715672677, "grad_norm": 0.39989999743785615, "learning_rate": 2.0878150367182547e-07, "loss": 0.3518, "step": 6611 }, { "epoch": 4.585298196948682, "grad_norm": 0.4548783797876923, "learning_rate": 2.0808985742596653e-07, "loss": 0.4076, "step": 6612 }, { "epoch": 4.585991678224688, "grad_norm": 0.44472465670113426, "learning_rate": 2.073993343691394e-07, "loss": 0.3935, "step": 6613 }, { "epoch": 4.586685159500694, "grad_norm": 0.4368895525570366, "learning_rate": 2.0670993466319956e-07, "loss": 0.3546, "step": 6614 }, { "epoch": 4.5873786407766985, "grad_norm": 0.4145043639499307, "learning_rate": 2.0602165846973498e-07, "loss": 0.3394, "step": 6615 }, { "epoch": 4.588072122052704, "grad_norm": 0.539968477580173, "learning_rate": 2.05334505950075e-07, "loss": 0.3364, "step": 6616 }, { "epoch": 4.58876560332871, "grad_norm": 0.40832294925657137, "learning_rate": 2.0464847726528236e-07, "loss": 0.3594, "step": 6617 }, { "epoch": 4.589459084604716, "grad_norm": 0.7232419628694357, "learning_rate": 2.0396357257615684e-07, "loss": 0.332, "step": 6618 }, { "epoch": 4.590152565880722, "grad_norm": 0.4157838669127496, "learning_rate": 2.0327979204323557e-07, "loss": 0.3588, "step": 6619 }, { "epoch": 4.5908460471567265, "grad_norm": 0.39598205637842165, "learning_rate": 2.025971358267914e-07, "loss": 0.3884, "step": 6620 }, { "epoch": 4.591539528432732, "grad_norm": 0.41932620594896597, "learning_rate": 2.019156040868342e-07, "loss": 0.3255, "step": 6621 }, { "epoch": 4.592233009708738, "grad_norm": 0.40689397698464574, "learning_rate": 2.0123519698311e-07, "loss": 0.3279, "step": 6622 }, { "epoch": 4.592926490984743, "grad_norm": 0.4255000799197367, "learning_rate": 2.0055591467510126e-07, "loss": 0.3406, "step": 6623 }, { "epoch": 4.593619972260749, "grad_norm": 0.4453047957874876, "learning_rate": 1.9987775732202618e-07, "loss": 0.3622, "step": 6624 }, { "epoch": 4.5943134535367545, "grad_norm": 0.36916286051210967, "learning_rate": 1.9920072508284204e-07, "loss": 0.3305, "step": 6625 }, { "epoch": 4.59500693481276, "grad_norm": 0.42935231157805465, "learning_rate": 1.9852481811623803e-07, "loss": 0.3315, "step": 6626 }, { "epoch": 4.595700416088766, "grad_norm": 0.44283573166986173, "learning_rate": 1.978500365806435e-07, "loss": 0.3805, "step": 6627 }, { "epoch": 4.596393897364771, "grad_norm": 0.40192658742637344, "learning_rate": 1.971763806342214e-07, "loss": 0.3055, "step": 6628 }, { "epoch": 4.597087378640777, "grad_norm": 0.4129234107909708, "learning_rate": 1.9650385043487152e-07, "loss": 0.3271, "step": 6629 }, { "epoch": 4.597780859916782, "grad_norm": 0.4426665403270593, "learning_rate": 1.958324461402311e-07, "loss": 0.3738, "step": 6630 }, { "epoch": 4.598474341192787, "grad_norm": 0.41428201391912783, "learning_rate": 1.9516216790767151e-07, "loss": 0.3237, "step": 6631 }, { "epoch": 4.599167822468793, "grad_norm": 0.41145024535793706, "learning_rate": 1.9449301589430148e-07, "loss": 0.3404, "step": 6632 }, { "epoch": 4.599861303744799, "grad_norm": 0.4152177258658786, "learning_rate": 1.93824990256965e-07, "loss": 0.3437, "step": 6633 }, { "epoch": 4.600554785020805, "grad_norm": 0.4342061013767659, "learning_rate": 1.9315809115224348e-07, "loss": 0.4081, "step": 6634 }, { "epoch": 4.60124826629681, "grad_norm": 0.44760226540203973, "learning_rate": 1.9249231873645247e-07, "loss": 0.3799, "step": 6635 }, { "epoch": 4.601941747572815, "grad_norm": 0.42085826953626726, "learning_rate": 1.9182767316564433e-07, "loss": 0.3529, "step": 6636 }, { "epoch": 4.602635228848821, "grad_norm": 0.4108062638522855, "learning_rate": 1.911641545956072e-07, "loss": 0.3581, "step": 6637 }, { "epoch": 4.603328710124827, "grad_norm": 0.4176745703913112, "learning_rate": 1.9050176318186508e-07, "loss": 0.3794, "step": 6638 }, { "epoch": 4.604022191400832, "grad_norm": 0.3975265994014037, "learning_rate": 1.898404990796776e-07, "loss": 0.3609, "step": 6639 }, { "epoch": 4.6047156726768375, "grad_norm": 0.47061066044974603, "learning_rate": 1.8918036244404026e-07, "loss": 0.3352, "step": 6640 }, { "epoch": 4.605409153952843, "grad_norm": 0.4358234354241011, "learning_rate": 1.8852135342968481e-07, "loss": 0.3957, "step": 6641 }, { "epoch": 4.606102635228849, "grad_norm": 0.3730065338551913, "learning_rate": 1.878634721910766e-07, "loss": 0.324, "step": 6642 }, { "epoch": 4.606796116504855, "grad_norm": 0.42398317259038704, "learning_rate": 1.8720671888242058e-07, "loss": 0.3847, "step": 6643 }, { "epoch": 4.60748959778086, "grad_norm": 0.4359676935233792, "learning_rate": 1.8655109365765312e-07, "loss": 0.3513, "step": 6644 }, { "epoch": 4.608183079056865, "grad_norm": 0.41843473412501964, "learning_rate": 1.8589659667044847e-07, "loss": 0.3487, "step": 6645 }, { "epoch": 4.608876560332871, "grad_norm": 0.3955520622447244, "learning_rate": 1.8524322807421723e-07, "loss": 0.3845, "step": 6646 }, { "epoch": 4.609570041608876, "grad_norm": 0.4237314241468602, "learning_rate": 1.845909880221025e-07, "loss": 0.3566, "step": 6647 }, { "epoch": 4.610263522884882, "grad_norm": 0.432610738349929, "learning_rate": 1.839398766669853e-07, "loss": 0.3643, "step": 6648 }, { "epoch": 4.610957004160888, "grad_norm": 0.3690249344272704, "learning_rate": 1.8328989416148192e-07, "loss": 0.3004, "step": 6649 }, { "epoch": 4.611650485436893, "grad_norm": 0.4143862370385774, "learning_rate": 1.8264104065794265e-07, "loss": 0.3522, "step": 6650 }, { "epoch": 4.612343966712899, "grad_norm": 0.4209448083559079, "learning_rate": 1.8199331630845418e-07, "loss": 0.3489, "step": 6651 }, { "epoch": 4.613037447988904, "grad_norm": 0.43925667239934624, "learning_rate": 1.81346721264839e-07, "loss": 0.3654, "step": 6652 }, { "epoch": 4.61373092926491, "grad_norm": 0.8191495949140846, "learning_rate": 1.8070125567865415e-07, "loss": 0.4005, "step": 6653 }, { "epoch": 4.614424410540916, "grad_norm": 0.3921487423882409, "learning_rate": 1.8005691970119254e-07, "loss": 0.3195, "step": 6654 }, { "epoch": 4.6151178918169204, "grad_norm": 0.43231313283189815, "learning_rate": 1.7941371348348057e-07, "loss": 0.3714, "step": 6655 }, { "epoch": 4.615811373092926, "grad_norm": 0.41016272036284057, "learning_rate": 1.7877163717628155e-07, "loss": 0.3184, "step": 6656 }, { "epoch": 4.616504854368932, "grad_norm": 0.45460623159582375, "learning_rate": 1.781306909300945e-07, "loss": 0.3667, "step": 6657 }, { "epoch": 4.617198335644938, "grad_norm": 0.40401495288836997, "learning_rate": 1.774908748951515e-07, "loss": 0.3208, "step": 6658 }, { "epoch": 4.6178918169209435, "grad_norm": 0.3728093803124587, "learning_rate": 1.76852189221422e-07, "loss": 0.3304, "step": 6659 }, { "epoch": 4.618585298196948, "grad_norm": 0.4212808230587952, "learning_rate": 1.7621463405860683e-07, "loss": 0.3094, "step": 6660 }, { "epoch": 4.619278779472954, "grad_norm": 0.41247273143391566, "learning_rate": 1.75578209556147e-07, "loss": 0.3401, "step": 6661 }, { "epoch": 4.61997226074896, "grad_norm": 0.4151187889067134, "learning_rate": 1.749429158632149e-07, "loss": 0.3125, "step": 6662 }, { "epoch": 4.620665742024965, "grad_norm": 0.42407205762886613, "learning_rate": 1.7430875312871797e-07, "loss": 0.3729, "step": 6663 }, { "epoch": 4.621359223300971, "grad_norm": 0.391440849609123, "learning_rate": 1.736757215013013e-07, "loss": 0.3667, "step": 6664 }, { "epoch": 4.622052704576976, "grad_norm": 0.38587659761782256, "learning_rate": 1.730438211293406e-07, "loss": 0.3173, "step": 6665 }, { "epoch": 4.622746185852982, "grad_norm": 0.4384276094034426, "learning_rate": 1.724130521609496e-07, "loss": 0.3485, "step": 6666 }, { "epoch": 4.623439667128988, "grad_norm": 0.38801601832069726, "learning_rate": 1.7178341474397674e-07, "loss": 0.3477, "step": 6667 }, { "epoch": 4.624133148404993, "grad_norm": 0.4183163085913416, "learning_rate": 1.711549090260034e-07, "loss": 0.3572, "step": 6668 }, { "epoch": 4.624826629680999, "grad_norm": 0.4222211756985973, "learning_rate": 1.7052753515434728e-07, "loss": 0.3689, "step": 6669 }, { "epoch": 4.625520110957004, "grad_norm": 0.3899032292209465, "learning_rate": 1.699012932760602e-07, "loss": 0.3687, "step": 6670 }, { "epoch": 4.62621359223301, "grad_norm": 0.40036377433853393, "learning_rate": 1.6927618353792862e-07, "loss": 0.3614, "step": 6671 }, { "epoch": 4.626907073509015, "grad_norm": 0.4141272469217083, "learning_rate": 1.686522060864748e-07, "loss": 0.3874, "step": 6672 }, { "epoch": 4.627600554785021, "grad_norm": 0.39555092439102046, "learning_rate": 1.6802936106795286e-07, "loss": 0.3438, "step": 6673 }, { "epoch": 4.6282940360610265, "grad_norm": 0.3879885169859168, "learning_rate": 1.674076486283538e-07, "loss": 0.3751, "step": 6674 }, { "epoch": 4.628987517337032, "grad_norm": 0.5668088213465785, "learning_rate": 1.6678706891340325e-07, "loss": 0.3231, "step": 6675 }, { "epoch": 4.629680998613037, "grad_norm": 0.3744846867711921, "learning_rate": 1.6616762206855928e-07, "loss": 0.3493, "step": 6676 }, { "epoch": 4.630374479889043, "grad_norm": 0.3684355729031906, "learning_rate": 1.6554930823901695e-07, "loss": 0.3112, "step": 6677 }, { "epoch": 4.631067961165049, "grad_norm": 0.40771883102611195, "learning_rate": 1.6493212756970356e-07, "loss": 0.3379, "step": 6678 }, { "epoch": 4.6317614424410545, "grad_norm": 0.46091316799899035, "learning_rate": 1.6431608020528233e-07, "loss": 0.282, "step": 6679 }, { "epoch": 4.632454923717059, "grad_norm": 0.4169117708550667, "learning_rate": 1.637011662901511e-07, "loss": 0.3756, "step": 6680 }, { "epoch": 4.633148404993065, "grad_norm": 0.4194509220578528, "learning_rate": 1.6308738596843953e-07, "loss": 0.3617, "step": 6681 }, { "epoch": 4.633841886269071, "grad_norm": 0.43122335718609295, "learning_rate": 1.6247473938401426e-07, "loss": 0.4036, "step": 6682 }, { "epoch": 4.634535367545077, "grad_norm": 0.3886736164761595, "learning_rate": 1.6186322668047538e-07, "loss": 0.3588, "step": 6683 }, { "epoch": 4.6352288488210815, "grad_norm": 0.435130127048327, "learning_rate": 1.6125284800115604e-07, "loss": 0.357, "step": 6684 }, { "epoch": 4.635922330097087, "grad_norm": 0.42392845633829646, "learning_rate": 1.6064360348912567e-07, "loss": 0.4047, "step": 6685 }, { "epoch": 4.636615811373093, "grad_norm": 0.37841049734660087, "learning_rate": 1.600354932871867e-07, "loss": 0.3347, "step": 6686 }, { "epoch": 4.637309292649099, "grad_norm": 0.46564699368728796, "learning_rate": 1.594285175378746e-07, "loss": 0.3554, "step": 6687 }, { "epoch": 4.638002773925104, "grad_norm": 0.3978575411021788, "learning_rate": 1.5882267638346217e-07, "loss": 0.3124, "step": 6688 }, { "epoch": 4.6386962552011095, "grad_norm": 0.38198941755229715, "learning_rate": 1.5821796996595197e-07, "loss": 0.2977, "step": 6689 }, { "epoch": 4.639389736477115, "grad_norm": 0.39947531859126206, "learning_rate": 1.5761439842708392e-07, "loss": 0.3441, "step": 6690 }, { "epoch": 4.640083217753121, "grad_norm": 0.37601127611692114, "learning_rate": 1.5701196190833102e-07, "loss": 0.3134, "step": 6691 }, { "epoch": 4.640776699029126, "grad_norm": 0.44854689077246596, "learning_rate": 1.5641066055089916e-07, "loss": 0.3401, "step": 6692 }, { "epoch": 4.641470180305132, "grad_norm": 0.3968189250149672, "learning_rate": 1.5581049449573004e-07, "loss": 0.3388, "step": 6693 }, { "epoch": 4.6421636615811375, "grad_norm": 0.4107917651039533, "learning_rate": 1.5521146388349783e-07, "loss": 0.359, "step": 6694 }, { "epoch": 4.642857142857143, "grad_norm": 0.3896473693015918, "learning_rate": 1.5461356885461077e-07, "loss": 0.3325, "step": 6695 }, { "epoch": 4.643550624133148, "grad_norm": 0.40495332063411876, "learning_rate": 1.5401680954921062e-07, "loss": 0.3571, "step": 6696 }, { "epoch": 4.644244105409154, "grad_norm": 0.4103280332373116, "learning_rate": 1.5342118610717438e-07, "loss": 0.342, "step": 6697 }, { "epoch": 4.64493758668516, "grad_norm": 0.46280859618808157, "learning_rate": 1.5282669866811152e-07, "loss": 0.3803, "step": 6698 }, { "epoch": 4.645631067961165, "grad_norm": 0.41744347843303314, "learning_rate": 1.5223334737136608e-07, "loss": 0.3635, "step": 6699 }, { "epoch": 4.64632454923717, "grad_norm": 0.406194187007945, "learning_rate": 1.5164113235601462e-07, "loss": 0.3422, "step": 6700 }, { "epoch": 4.647018030513176, "grad_norm": 0.46002313056312844, "learning_rate": 1.5105005376086778e-07, "loss": 0.4384, "step": 6701 }, { "epoch": 4.647711511789182, "grad_norm": 0.37639387252560136, "learning_rate": 1.504601117244714e-07, "loss": 0.3158, "step": 6702 }, { "epoch": 4.648404993065188, "grad_norm": 0.39863091718502724, "learning_rate": 1.498713063851026e-07, "loss": 0.3365, "step": 6703 }, { "epoch": 4.6490984743411925, "grad_norm": 0.4018608400880308, "learning_rate": 1.4928363788077327e-07, "loss": 0.3631, "step": 6704 }, { "epoch": 4.649791955617198, "grad_norm": 0.4104518088407998, "learning_rate": 1.4869710634922762e-07, "loss": 0.3607, "step": 6705 }, { "epoch": 4.650485436893204, "grad_norm": 0.4534698134523714, "learning_rate": 1.4811171192794628e-07, "loss": 0.3381, "step": 6706 }, { "epoch": 4.65117891816921, "grad_norm": 0.4227278924218095, "learning_rate": 1.475274547541411e-07, "loss": 0.3677, "step": 6707 }, { "epoch": 4.651872399445215, "grad_norm": 0.5608003809455238, "learning_rate": 1.4694433496475702e-07, "loss": 0.4047, "step": 6708 }, { "epoch": 4.6525658807212205, "grad_norm": 0.39603586592941703, "learning_rate": 1.4636235269647359e-07, "loss": 0.3224, "step": 6709 }, { "epoch": 4.653259361997226, "grad_norm": 0.41740625091370726, "learning_rate": 1.4578150808570223e-07, "loss": 0.3437, "step": 6710 }, { "epoch": 4.653952843273232, "grad_norm": 0.43748035655726614, "learning_rate": 1.4520180126859018e-07, "loss": 0.3691, "step": 6711 }, { "epoch": 4.654646324549237, "grad_norm": 0.4032783662769279, "learning_rate": 1.4462323238101538e-07, "loss": 0.3647, "step": 6712 }, { "epoch": 4.655339805825243, "grad_norm": 0.36993848544011976, "learning_rate": 1.4404580155859106e-07, "loss": 0.3179, "step": 6713 }, { "epoch": 4.656033287101248, "grad_norm": 0.41115462936777036, "learning_rate": 1.4346950893666167e-07, "loss": 0.3364, "step": 6714 }, { "epoch": 4.656726768377254, "grad_norm": 0.4363038939442896, "learning_rate": 1.42894354650307e-07, "loss": 0.3194, "step": 6715 }, { "epoch": 4.657420249653259, "grad_norm": 0.4485323098363289, "learning_rate": 1.423203388343386e-07, "loss": 0.3088, "step": 6716 }, { "epoch": 4.658113730929265, "grad_norm": 0.4196024765984699, "learning_rate": 1.4174746162330278e-07, "loss": 0.4428, "step": 6717 }, { "epoch": 4.658807212205271, "grad_norm": 0.4177274097626942, "learning_rate": 1.4117572315147598e-07, "loss": 0.3495, "step": 6718 }, { "epoch": 4.659500693481276, "grad_norm": 0.45901509451905975, "learning_rate": 1.4060512355287048e-07, "loss": 0.3592, "step": 6719 }, { "epoch": 4.660194174757281, "grad_norm": 0.4740244898274189, "learning_rate": 1.4003566296123095e-07, "loss": 0.3887, "step": 6720 }, { "epoch": 4.660887656033287, "grad_norm": 0.38106305206428936, "learning_rate": 1.394673415100345e-07, "loss": 0.3494, "step": 6721 }, { "epoch": 4.661581137309293, "grad_norm": 0.4544181776548709, "learning_rate": 1.3890015933249124e-07, "loss": 0.2719, "step": 6722 }, { "epoch": 4.662274618585299, "grad_norm": 0.4030455780376685, "learning_rate": 1.3833411656154483e-07, "loss": 0.2863, "step": 6723 }, { "epoch": 4.6629680998613035, "grad_norm": 0.37873720359007057, "learning_rate": 1.3776921332987193e-07, "loss": 0.3341, "step": 6724 }, { "epoch": 4.663661581137309, "grad_norm": 0.4580870905083957, "learning_rate": 1.372054497698816e-07, "loss": 0.353, "step": 6725 }, { "epoch": 4.664355062413315, "grad_norm": 0.4001016690633297, "learning_rate": 1.3664282601371536e-07, "loss": 0.3357, "step": 6726 }, { "epoch": 4.665048543689321, "grad_norm": 0.4168440857226407, "learning_rate": 1.3608134219324886e-07, "loss": 0.3642, "step": 6727 }, { "epoch": 4.665742024965326, "grad_norm": 0.4165023500950493, "learning_rate": 1.3552099844009013e-07, "loss": 0.3203, "step": 6728 }, { "epoch": 4.666435506241331, "grad_norm": 0.3893905445083926, "learning_rate": 1.349617948855786e-07, "loss": 0.3429, "step": 6729 }, { "epoch": 4.667128987517337, "grad_norm": 0.4335745681172854, "learning_rate": 1.3440373166078824e-07, "loss": 0.3795, "step": 6730 }, { "epoch": 4.667822468793343, "grad_norm": 0.4708287411816723, "learning_rate": 1.3384680889652502e-07, "loss": 0.3387, "step": 6731 }, { "epoch": 4.668515950069348, "grad_norm": 0.4292906387974233, "learning_rate": 1.332910267233267e-07, "loss": 0.3435, "step": 6732 }, { "epoch": 4.669209431345354, "grad_norm": 0.4967563615936282, "learning_rate": 1.3273638527146638e-07, "loss": 0.3353, "step": 6733 }, { "epoch": 4.669902912621359, "grad_norm": 0.3737189666638068, "learning_rate": 1.321828846709461e-07, "loss": 0.3506, "step": 6734 }, { "epoch": 4.670596393897365, "grad_norm": 0.4240505954098639, "learning_rate": 1.3163052505150375e-07, "loss": 0.3633, "step": 6735 }, { "epoch": 4.67128987517337, "grad_norm": 0.39289518288564, "learning_rate": 1.3107930654260804e-07, "loss": 0.3536, "step": 6736 }, { "epoch": 4.671983356449376, "grad_norm": 0.4567296836979554, "learning_rate": 1.3052922927346e-07, "loss": 0.3513, "step": 6737 }, { "epoch": 4.672676837725382, "grad_norm": 0.38757277985861877, "learning_rate": 1.2998029337299433e-07, "loss": 0.3395, "step": 6738 }, { "epoch": 4.673370319001387, "grad_norm": 0.39599690112778674, "learning_rate": 1.2943249896987864e-07, "loss": 0.3288, "step": 6739 }, { "epoch": 4.674063800277392, "grad_norm": 0.42239428336219415, "learning_rate": 1.2888584619250966e-07, "loss": 0.3737, "step": 6740 }, { "epoch": 4.674757281553398, "grad_norm": 0.3857297733744965, "learning_rate": 1.2834033516902044e-07, "loss": 0.323, "step": 6741 }, { "epoch": 4.675450762829404, "grad_norm": 0.967508623031919, "learning_rate": 1.277959660272743e-07, "loss": 0.3179, "step": 6742 }, { "epoch": 4.6761442441054095, "grad_norm": 0.4682470535663424, "learning_rate": 1.2725273889486745e-07, "loss": 0.3635, "step": 6743 }, { "epoch": 4.676837725381414, "grad_norm": 0.43960960852529, "learning_rate": 1.2671065389912917e-07, "loss": 0.3914, "step": 6744 }, { "epoch": 4.67753120665742, "grad_norm": 0.5233143144705975, "learning_rate": 1.2616971116711895e-07, "loss": 0.3403, "step": 6745 }, { "epoch": 4.678224687933426, "grad_norm": 0.4100101852721493, "learning_rate": 1.2562991082563092e-07, "loss": 0.3233, "step": 6746 }, { "epoch": 4.678918169209432, "grad_norm": 0.4484855491389793, "learning_rate": 1.2509125300118996e-07, "loss": 0.3245, "step": 6747 }, { "epoch": 4.679611650485437, "grad_norm": 0.38710294017866376, "learning_rate": 1.2455373782005343e-07, "loss": 0.3686, "step": 6748 }, { "epoch": 4.680305131761442, "grad_norm": 0.45062554431993884, "learning_rate": 1.2401736540821108e-07, "loss": 0.3501, "step": 6749 }, { "epoch": 4.680998613037448, "grad_norm": 0.38733770690691943, "learning_rate": 1.2348213589138402e-07, "loss": 0.3448, "step": 6750 }, { "epoch": 4.681692094313454, "grad_norm": 0.42319529577596543, "learning_rate": 1.2294804939502746e-07, "loss": 0.3546, "step": 6751 }, { "epoch": 4.682385575589459, "grad_norm": 0.5086293836259173, "learning_rate": 1.224151060443274e-07, "loss": 0.412, "step": 6752 }, { "epoch": 4.6830790568654646, "grad_norm": 0.5485209236035724, "learning_rate": 1.2188330596420106e-07, "loss": 0.3551, "step": 6753 }, { "epoch": 4.68377253814147, "grad_norm": 0.4102678308013505, "learning_rate": 1.213526492792988e-07, "loss": 0.3717, "step": 6754 }, { "epoch": 4.684466019417476, "grad_norm": 0.47525191378298826, "learning_rate": 1.2082313611400276e-07, "loss": 0.3928, "step": 6755 }, { "epoch": 4.685159500693481, "grad_norm": 0.4439277804978099, "learning_rate": 1.2029476659242644e-07, "loss": 0.3505, "step": 6756 }, { "epoch": 4.685852981969487, "grad_norm": 0.4132820277534149, "learning_rate": 1.1976754083841747e-07, "loss": 0.3638, "step": 6757 }, { "epoch": 4.6865464632454925, "grad_norm": 0.39405119535260347, "learning_rate": 1.192414589755514e-07, "loss": 0.3481, "step": 6758 }, { "epoch": 4.687239944521498, "grad_norm": 0.4860560758389836, "learning_rate": 1.187165211271396e-07, "loss": 0.3706, "step": 6759 }, { "epoch": 4.687933425797503, "grad_norm": 0.4061770014012126, "learning_rate": 1.1819272741622367e-07, "loss": 0.3335, "step": 6760 }, { "epoch": 4.688626907073509, "grad_norm": 0.40806900505269417, "learning_rate": 1.176700779655765e-07, "loss": 0.3001, "step": 6761 }, { "epoch": 4.689320388349515, "grad_norm": 0.3642532282080855, "learning_rate": 1.1714857289770399e-07, "loss": 0.3393, "step": 6762 }, { "epoch": 4.6900138696255205, "grad_norm": 0.39553145474978363, "learning_rate": 1.1662821233484167e-07, "loss": 0.3451, "step": 6763 }, { "epoch": 4.690707350901525, "grad_norm": 0.4529794811430911, "learning_rate": 1.1610899639896034e-07, "loss": 0.38, "step": 6764 }, { "epoch": 4.691400832177531, "grad_norm": 0.3815225750665992, "learning_rate": 1.155909252117593e-07, "loss": 0.3064, "step": 6765 }, { "epoch": 4.692094313453537, "grad_norm": 0.39076056913424856, "learning_rate": 1.150739988946703e-07, "loss": 0.3291, "step": 6766 }, { "epoch": 4.692787794729543, "grad_norm": 0.4061633583383636, "learning_rate": 1.145582175688581e-07, "loss": 0.3567, "step": 6767 }, { "epoch": 4.6934812760055475, "grad_norm": 0.40762646298708616, "learning_rate": 1.140435813552171e-07, "loss": 0.3494, "step": 6768 }, { "epoch": 4.694174757281553, "grad_norm": 0.4105204089115084, "learning_rate": 1.1353009037437523e-07, "loss": 0.3817, "step": 6769 }, { "epoch": 4.694868238557559, "grad_norm": 0.4415513186430572, "learning_rate": 1.1301774474669125e-07, "loss": 0.3537, "step": 6770 }, { "epoch": 4.695561719833565, "grad_norm": 0.4306115816350744, "learning_rate": 1.1250654459225407e-07, "loss": 0.3422, "step": 6771 }, { "epoch": 4.69625520110957, "grad_norm": 0.4292024895730401, "learning_rate": 1.1199649003088619e-07, "loss": 0.4078, "step": 6772 }, { "epoch": 4.6969486823855755, "grad_norm": 0.40697351675677496, "learning_rate": 1.1148758118214087e-07, "loss": 0.3638, "step": 6773 }, { "epoch": 4.697642163661581, "grad_norm": 0.4135029922149521, "learning_rate": 1.1097981816530157e-07, "loss": 0.3253, "step": 6774 }, { "epoch": 4.698335644937587, "grad_norm": 0.384596218410337, "learning_rate": 1.104732010993853e-07, "loss": 0.3482, "step": 6775 }, { "epoch": 4.699029126213592, "grad_norm": 0.5598074012488575, "learning_rate": 1.0996773010313876e-07, "loss": 0.3398, "step": 6776 }, { "epoch": 4.699722607489598, "grad_norm": 0.4235991723272917, "learning_rate": 1.0946340529504108e-07, "loss": 0.3598, "step": 6777 }, { "epoch": 4.7004160887656035, "grad_norm": 0.4191143944749233, "learning_rate": 1.0896022679330265e-07, "loss": 0.3313, "step": 6778 }, { "epoch": 4.701109570041609, "grad_norm": 0.45766788222001814, "learning_rate": 1.0845819471586416e-07, "loss": 0.3375, "step": 6779 }, { "epoch": 4.701803051317614, "grad_norm": 0.41868433977160835, "learning_rate": 1.0795730918039871e-07, "loss": 0.3534, "step": 6780 }, { "epoch": 4.70249653259362, "grad_norm": 0.4253257730744769, "learning_rate": 1.0745757030431015e-07, "loss": 0.3524, "step": 6781 }, { "epoch": 4.703190013869626, "grad_norm": 0.41838230902361123, "learning_rate": 1.0695897820473367e-07, "loss": 0.3788, "step": 6782 }, { "epoch": 4.703883495145631, "grad_norm": 0.4365048673807159, "learning_rate": 1.0646153299853523e-07, "loss": 0.3748, "step": 6783 }, { "epoch": 4.704576976421636, "grad_norm": 0.4488075569062203, "learning_rate": 1.059652348023138e-07, "loss": 0.3357, "step": 6784 }, { "epoch": 4.705270457697642, "grad_norm": 0.5641354438931263, "learning_rate": 1.0547008373239576e-07, "loss": 0.3453, "step": 6785 }, { "epoch": 4.705963938973648, "grad_norm": 0.40413271735483397, "learning_rate": 1.0497607990484326e-07, "loss": 0.3776, "step": 6786 }, { "epoch": 4.706657420249654, "grad_norm": 0.43243871522736055, "learning_rate": 1.0448322343544537e-07, "loss": 0.3614, "step": 6787 }, { "epoch": 4.7073509015256585, "grad_norm": 0.4309666969442699, "learning_rate": 1.0399151443972521e-07, "loss": 0.3593, "step": 6788 }, { "epoch": 4.708044382801664, "grad_norm": 0.4122702124215238, "learning_rate": 1.0350095303293617e-07, "loss": 0.3323, "step": 6789 }, { "epoch": 4.70873786407767, "grad_norm": 0.4281984543047366, "learning_rate": 1.0301153933006126e-07, "loss": 0.3159, "step": 6790 }, { "epoch": 4.709431345353676, "grad_norm": 0.41312288396118063, "learning_rate": 1.0252327344581592e-07, "loss": 0.3517, "step": 6791 }, { "epoch": 4.710124826629681, "grad_norm": 0.39005107427814484, "learning_rate": 1.0203615549464585e-07, "loss": 0.3481, "step": 6792 }, { "epoch": 4.7108183079056865, "grad_norm": 0.40156674774110573, "learning_rate": 1.0155018559072805e-07, "loss": 0.3428, "step": 6793 }, { "epoch": 4.711511789181692, "grad_norm": 0.40568518077027116, "learning_rate": 1.0106536384797083e-07, "loss": 0.3374, "step": 6794 }, { "epoch": 4.712205270457698, "grad_norm": 0.4125994683015578, "learning_rate": 1.005816903800122e-07, "loss": 0.3854, "step": 6795 }, { "epoch": 4.712898751733703, "grad_norm": 0.4632445191240259, "learning_rate": 1.0009916530022256e-07, "loss": 0.3684, "step": 6796 }, { "epoch": 4.713592233009709, "grad_norm": 0.3925149370949742, "learning_rate": 9.961778872170202e-08, "loss": 0.3358, "step": 6797 }, { "epoch": 4.714285714285714, "grad_norm": 0.3987378195867283, "learning_rate": 9.913756075728088e-08, "loss": 0.3658, "step": 6798 }, { "epoch": 4.71497919556172, "grad_norm": 0.4263902974832491, "learning_rate": 9.86584815195224e-08, "loss": 0.3387, "step": 6799 }, { "epoch": 4.715672676837725, "grad_norm": 0.4141152908935563, "learning_rate": 9.818055112071844e-08, "loss": 0.3717, "step": 6800 }, { "epoch": 4.716366158113731, "grad_norm": 0.4103975056095425, "learning_rate": 9.770376967289219e-08, "loss": 0.3636, "step": 6801 }, { "epoch": 4.717059639389737, "grad_norm": 0.4047969443606198, "learning_rate": 9.722813728779923e-08, "loss": 0.3545, "step": 6802 }, { "epoch": 4.717753120665742, "grad_norm": 0.4005027983543377, "learning_rate": 9.675365407692205e-08, "loss": 0.3784, "step": 6803 }, { "epoch": 4.718446601941747, "grad_norm": 0.4525688038167993, "learning_rate": 9.628032015147836e-08, "loss": 0.3589, "step": 6804 }, { "epoch": 4.719140083217753, "grad_norm": 0.4132518708972397, "learning_rate": 9.580813562241276e-08, "loss": 0.3479, "step": 6805 }, { "epoch": 4.719833564493759, "grad_norm": 0.44356132531979187, "learning_rate": 9.533710060040224e-08, "loss": 0.3347, "step": 6806 }, { "epoch": 4.720527045769765, "grad_norm": 0.43176070223402224, "learning_rate": 9.486721519585462e-08, "loss": 0.3678, "step": 6807 }, { "epoch": 4.721220527045769, "grad_norm": 0.38632097807987065, "learning_rate": 9.43984795189068e-08, "loss": 0.3258, "step": 6808 }, { "epoch": 4.721914008321775, "grad_norm": 0.4139292071412524, "learning_rate": 9.393089367942754e-08, "loss": 0.3029, "step": 6809 }, { "epoch": 4.722607489597781, "grad_norm": 0.3774464897660259, "learning_rate": 9.346445778701529e-08, "loss": 0.318, "step": 6810 }, { "epoch": 4.723300970873787, "grad_norm": 0.44178754709316437, "learning_rate": 9.299917195099928e-08, "loss": 0.3236, "step": 6811 }, { "epoch": 4.723994452149792, "grad_norm": 0.4328788907551428, "learning_rate": 9.253503628043947e-08, "loss": 0.3591, "step": 6812 }, { "epoch": 4.724687933425797, "grad_norm": 0.40237448569599843, "learning_rate": 9.207205088412496e-08, "loss": 0.3346, "step": 6813 }, { "epoch": 4.725381414701803, "grad_norm": 0.400456602925986, "learning_rate": 9.161021587057728e-08, "loss": 0.3648, "step": 6814 }, { "epoch": 4.726074895977809, "grad_norm": 0.4081929175488941, "learning_rate": 9.114953134804705e-08, "loss": 0.3569, "step": 6815 }, { "epoch": 4.726768377253814, "grad_norm": 0.4638939004745626, "learning_rate": 9.068999742451456e-08, "loss": 0.3399, "step": 6816 }, { "epoch": 4.72746185852982, "grad_norm": 0.41917451542274065, "learning_rate": 9.0231614207692e-08, "loss": 0.3161, "step": 6817 }, { "epoch": 4.728155339805825, "grad_norm": 0.6287713531427881, "learning_rate": 8.977438180502118e-08, "loss": 0.4059, "step": 6818 }, { "epoch": 4.728848821081831, "grad_norm": 0.390518839793498, "learning_rate": 8.931830032367361e-08, "loss": 0.3367, "step": 6819 }, { "epoch": 4.729542302357836, "grad_norm": 0.4017996211638298, "learning_rate": 8.8863369870551e-08, "loss": 0.3519, "step": 6820 }, { "epoch": 4.730235783633842, "grad_norm": 0.4079111533114061, "learning_rate": 8.840959055228693e-08, "loss": 0.3568, "step": 6821 }, { "epoch": 4.7309292649098476, "grad_norm": 0.40067820562618406, "learning_rate": 8.7956962475243e-08, "loss": 0.3518, "step": 6822 }, { "epoch": 4.731622746185853, "grad_norm": 0.39752367168541325, "learning_rate": 8.75054857455132e-08, "loss": 0.3396, "step": 6823 }, { "epoch": 4.732316227461858, "grad_norm": 0.5247590165452723, "learning_rate": 8.705516046891905e-08, "loss": 0.3825, "step": 6824 }, { "epoch": 4.733009708737864, "grad_norm": 0.41551390206060973, "learning_rate": 8.660598675101384e-08, "loss": 0.3569, "step": 6825 }, { "epoch": 4.73370319001387, "grad_norm": 0.4039738878181958, "learning_rate": 8.615796469708171e-08, "loss": 0.356, "step": 6826 }, { "epoch": 4.7343966712898755, "grad_norm": 0.4116222758803286, "learning_rate": 8.57110944121342e-08, "loss": 0.3487, "step": 6827 }, { "epoch": 4.73509015256588, "grad_norm": 0.43387076097437016, "learning_rate": 8.526537600091477e-08, "loss": 0.3653, "step": 6828 }, { "epoch": 4.735783633841886, "grad_norm": 0.4246650898209524, "learning_rate": 8.482080956789817e-08, "loss": 0.3437, "step": 6829 }, { "epoch": 4.736477115117892, "grad_norm": 0.4275177627205395, "learning_rate": 8.437739521728549e-08, "loss": 0.3107, "step": 6830 }, { "epoch": 4.737170596393898, "grad_norm": 0.3775912044979139, "learning_rate": 8.393513305301138e-08, "loss": 0.3391, "step": 6831 }, { "epoch": 4.737864077669903, "grad_norm": 0.423519338366062, "learning_rate": 8.34940231787379e-08, "loss": 0.3995, "step": 6832 }, { "epoch": 4.738557558945908, "grad_norm": 0.4584113477802442, "learning_rate": 8.305406569785845e-08, "loss": 0.333, "step": 6833 }, { "epoch": 4.739251040221914, "grad_norm": 0.4293005261696209, "learning_rate": 8.261526071349613e-08, "loss": 0.3808, "step": 6834 }, { "epoch": 4.73994452149792, "grad_norm": 0.422761223569215, "learning_rate": 8.217760832850308e-08, "loss": 0.3646, "step": 6835 }, { "epoch": 4.740638002773925, "grad_norm": 0.4081122434989935, "learning_rate": 8.174110864546225e-08, "loss": 0.3827, "step": 6836 }, { "epoch": 4.7413314840499305, "grad_norm": 0.5276095426233536, "learning_rate": 8.13057617666857e-08, "loss": 0.3406, "step": 6837 }, { "epoch": 4.742024965325936, "grad_norm": 0.4446229105695271, "learning_rate": 8.087156779421512e-08, "loss": 0.3115, "step": 6838 }, { "epoch": 4.742718446601942, "grad_norm": 0.40921992617296, "learning_rate": 8.043852682982356e-08, "loss": 0.3818, "step": 6839 }, { "epoch": 4.743411927877947, "grad_norm": 0.44700400369817855, "learning_rate": 8.000663897501259e-08, "loss": 0.3355, "step": 6840 }, { "epoch": 4.744105409153953, "grad_norm": 0.4047666969145994, "learning_rate": 7.957590433101293e-08, "loss": 0.3449, "step": 6841 }, { "epoch": 4.7447988904299585, "grad_norm": 0.44510310727835206, "learning_rate": 7.914632299878544e-08, "loss": 0.3143, "step": 6842 }, { "epoch": 4.745492371705964, "grad_norm": 0.44913663374622265, "learning_rate": 7.871789507902183e-08, "loss": 0.3911, "step": 6843 }, { "epoch": 4.746185852981969, "grad_norm": 0.38812213867453554, "learning_rate": 7.829062067214233e-08, "loss": 0.3142, "step": 6844 }, { "epoch": 4.746879334257975, "grad_norm": 0.3950616638944297, "learning_rate": 7.78644998782957e-08, "loss": 0.3325, "step": 6845 }, { "epoch": 4.747572815533981, "grad_norm": 0.38795078335508976, "learning_rate": 7.743953279736315e-08, "loss": 0.3278, "step": 6846 }, { "epoch": 4.7482662968099865, "grad_norm": 0.47077018282848426, "learning_rate": 7.701571952895337e-08, "loss": 0.3517, "step": 6847 }, { "epoch": 4.748959778085991, "grad_norm": 0.42275888126951844, "learning_rate": 7.659306017240464e-08, "loss": 0.3799, "step": 6848 }, { "epoch": 4.749653259361997, "grad_norm": 0.38755631522184725, "learning_rate": 7.617155482678607e-08, "loss": 0.3589, "step": 6849 }, { "epoch": 4.750346740638003, "grad_norm": 0.4842499370481706, "learning_rate": 7.575120359089416e-08, "loss": 0.339, "step": 6850 }, { "epoch": 4.751040221914009, "grad_norm": 0.41237035293335705, "learning_rate": 7.53320065632579e-08, "loss": 0.3193, "step": 6851 }, { "epoch": 4.7517337031900135, "grad_norm": 0.4025419440534093, "learning_rate": 7.491396384213312e-08, "loss": 0.379, "step": 6852 }, { "epoch": 4.752427184466019, "grad_norm": 0.39427184180682384, "learning_rate": 7.449707552550533e-08, "loss": 0.3016, "step": 6853 }, { "epoch": 4.753120665742025, "grad_norm": 0.42822774283365916, "learning_rate": 7.408134171109138e-08, "loss": 0.323, "step": 6854 }, { "epoch": 4.753814147018031, "grad_norm": 0.3921444185820582, "learning_rate": 7.366676249633609e-08, "loss": 0.3434, "step": 6855 }, { "epoch": 4.754507628294036, "grad_norm": 0.4685555826086588, "learning_rate": 7.325333797841283e-08, "loss": 0.3641, "step": 6856 }, { "epoch": 4.7552011095700415, "grad_norm": 0.3974537006608089, "learning_rate": 7.284106825422632e-08, "loss": 0.3623, "step": 6857 }, { "epoch": 4.755894590846047, "grad_norm": 0.5464280208635189, "learning_rate": 7.242995342040926e-08, "loss": 0.3169, "step": 6858 }, { "epoch": 4.756588072122053, "grad_norm": 0.43377542822096865, "learning_rate": 7.201999357332346e-08, "loss": 0.3472, "step": 6859 }, { "epoch": 4.757281553398058, "grad_norm": 0.39961496026766874, "learning_rate": 7.161118880906203e-08, "loss": 0.3585, "step": 6860 }, { "epoch": 4.757975034674064, "grad_norm": 0.40510658913640607, "learning_rate": 7.120353922344447e-08, "loss": 0.3359, "step": 6861 }, { "epoch": 4.7586685159500695, "grad_norm": 0.40455382978082327, "learning_rate": 7.079704491202099e-08, "loss": 0.365, "step": 6862 }, { "epoch": 4.759361997226075, "grad_norm": 0.39163038288764424, "learning_rate": 7.03917059700715e-08, "loss": 0.351, "step": 6863 }, { "epoch": 4.76005547850208, "grad_norm": 0.7175277727970721, "learning_rate": 6.998752249260387e-08, "loss": 0.3135, "step": 6864 }, { "epoch": 4.760748959778086, "grad_norm": 0.39138433989798443, "learning_rate": 6.958449457435679e-08, "loss": 0.3394, "step": 6865 }, { "epoch": 4.761442441054092, "grad_norm": 0.4845060873969994, "learning_rate": 6.918262230979577e-08, "loss": 0.3646, "step": 6866 }, { "epoch": 4.762135922330097, "grad_norm": 0.39139535674858816, "learning_rate": 6.878190579311772e-08, "loss": 0.3631, "step": 6867 }, { "epoch": 4.762829403606102, "grad_norm": 0.42219273468760327, "learning_rate": 6.838234511824748e-08, "loss": 0.3339, "step": 6868 }, { "epoch": 4.763522884882108, "grad_norm": 0.3830655678826751, "learning_rate": 6.798394037883904e-08, "loss": 0.3602, "step": 6869 }, { "epoch": 4.764216366158114, "grad_norm": 0.558477613490607, "learning_rate": 6.758669166827547e-08, "loss": 0.3377, "step": 6870 }, { "epoch": 4.76490984743412, "grad_norm": 0.4234565855302753, "learning_rate": 6.719059907966952e-08, "loss": 0.3641, "step": 6871 }, { "epoch": 4.7656033287101245, "grad_norm": 0.4187831061714325, "learning_rate": 6.679566270586191e-08, "loss": 0.3299, "step": 6872 }, { "epoch": 4.76629680998613, "grad_norm": 0.5000926901157373, "learning_rate": 6.640188263942248e-08, "loss": 0.3728, "step": 6873 }, { "epoch": 4.766990291262136, "grad_norm": 0.4304122396360334, "learning_rate": 6.600925897265187e-08, "loss": 0.3734, "step": 6874 }, { "epoch": 4.767683772538142, "grad_norm": 0.4123609497597858, "learning_rate": 6.561779179757644e-08, "loss": 0.3527, "step": 6875 }, { "epoch": 4.768377253814147, "grad_norm": 0.395750825503072, "learning_rate": 6.5227481205955e-08, "loss": 0.3338, "step": 6876 }, { "epoch": 4.7690707350901524, "grad_norm": 0.4360803428959288, "learning_rate": 6.483832728927219e-08, "loss": 0.3396, "step": 6877 }, { "epoch": 4.769764216366158, "grad_norm": 0.384209773552081, "learning_rate": 6.44503301387428e-08, "loss": 0.3377, "step": 6878 }, { "epoch": 4.770457697642164, "grad_norm": 0.3962186906657134, "learning_rate": 6.406348984531241e-08, "loss": 0.3549, "step": 6879 }, { "epoch": 4.771151178918169, "grad_norm": 0.5821520902820478, "learning_rate": 6.367780649965127e-08, "loss": 0.3619, "step": 6880 }, { "epoch": 4.771844660194175, "grad_norm": 0.48422951675088144, "learning_rate": 6.329328019216208e-08, "loss": 0.3458, "step": 6881 }, { "epoch": 4.77253814147018, "grad_norm": 0.41427924570498564, "learning_rate": 6.290991101297495e-08, "loss": 0.3447, "step": 6882 }, { "epoch": 4.773231622746186, "grad_norm": 0.5338804788176426, "learning_rate": 6.2527699051948e-08, "loss": 0.3732, "step": 6883 }, { "epoch": 4.773925104022191, "grad_norm": 0.40049217356679934, "learning_rate": 6.214664439866957e-08, "loss": 0.3441, "step": 6884 }, { "epoch": 4.774618585298197, "grad_norm": 0.40061440425441786, "learning_rate": 6.176674714245656e-08, "loss": 0.3554, "step": 6885 }, { "epoch": 4.775312066574203, "grad_norm": 0.3919407783142851, "learning_rate": 6.138800737235384e-08, "loss": 0.3345, "step": 6886 }, { "epoch": 4.776005547850208, "grad_norm": 0.4344771229222764, "learning_rate": 6.101042517713429e-08, "loss": 0.3489, "step": 6887 }, { "epoch": 4.776699029126213, "grad_norm": 0.4195158216513353, "learning_rate": 6.063400064530155e-08, "loss": 0.3707, "step": 6888 }, { "epoch": 4.777392510402219, "grad_norm": 0.39922307345510843, "learning_rate": 6.025873386508673e-08, "loss": 0.3612, "step": 6889 }, { "epoch": 4.778085991678225, "grad_norm": 0.39642192644407176, "learning_rate": 5.988462492444946e-08, "loss": 0.3567, "step": 6890 }, { "epoch": 4.778779472954231, "grad_norm": 0.40566625595319633, "learning_rate": 5.9511673911077924e-08, "loss": 0.3342, "step": 6891 }, { "epoch": 4.779472954230235, "grad_norm": 0.4597075486751991, "learning_rate": 5.913988091238943e-08, "loss": 0.3297, "step": 6892 }, { "epoch": 4.780166435506241, "grad_norm": 0.4340545199423955, "learning_rate": 5.876924601552869e-08, "loss": 0.3468, "step": 6893 }, { "epoch": 4.780859916782247, "grad_norm": 0.3953802526566802, "learning_rate": 5.839976930737179e-08, "loss": 0.3512, "step": 6894 }, { "epoch": 4.781553398058253, "grad_norm": 0.4267458150677247, "learning_rate": 5.803145087451945e-08, "loss": 0.3467, "step": 6895 }, { "epoch": 4.782246879334258, "grad_norm": 0.4879897653854673, "learning_rate": 5.766429080330371e-08, "loss": 0.3269, "step": 6896 }, { "epoch": 4.782940360610263, "grad_norm": 0.40101218093549146, "learning_rate": 5.729828917978464e-08, "loss": 0.3506, "step": 6897 }, { "epoch": 4.783633841886269, "grad_norm": 0.40949889228304925, "learning_rate": 5.693344608974916e-08, "loss": 0.3548, "step": 6898 }, { "epoch": 4.784327323162275, "grad_norm": 0.4324929112027942, "learning_rate": 5.656976161871497e-08, "loss": 0.3504, "step": 6899 }, { "epoch": 4.78502080443828, "grad_norm": 0.41618782158161427, "learning_rate": 5.620723585192667e-08, "loss": 0.3907, "step": 6900 }, { "epoch": 4.785714285714286, "grad_norm": 0.4244362889494322, "learning_rate": 5.584586887435739e-08, "loss": 0.3647, "step": 6901 }, { "epoch": 4.786407766990291, "grad_norm": 0.40496961206327814, "learning_rate": 5.5485660770709385e-08, "loss": 0.3231, "step": 6902 }, { "epoch": 4.787101248266297, "grad_norm": 0.6944059890736237, "learning_rate": 5.512661162541233e-08, "loss": 0.3278, "step": 6903 }, { "epoch": 4.787794729542302, "grad_norm": 0.406457267015003, "learning_rate": 5.476872152262558e-08, "loss": 0.3295, "step": 6904 }, { "epoch": 4.788488210818308, "grad_norm": 0.3981263206641865, "learning_rate": 5.441199054623536e-08, "loss": 0.3107, "step": 6905 }, { "epoch": 4.7891816920943135, "grad_norm": 0.38689615703998426, "learning_rate": 5.405641877985646e-08, "loss": 0.3452, "step": 6906 }, { "epoch": 4.789875173370319, "grad_norm": 0.4229570411223415, "learning_rate": 5.370200630683331e-08, "loss": 0.3209, "step": 6907 }, { "epoch": 4.790568654646324, "grad_norm": 0.38626318053258785, "learning_rate": 5.3348753210237244e-08, "loss": 0.3518, "step": 6908 }, { "epoch": 4.79126213592233, "grad_norm": 0.4802303893011554, "learning_rate": 5.2996659572867595e-08, "loss": 0.3545, "step": 6909 }, { "epoch": 4.791955617198336, "grad_norm": 0.4015189840259108, "learning_rate": 5.2645725477252775e-08, "loss": 0.3443, "step": 6910 }, { "epoch": 4.7926490984743415, "grad_norm": 0.45766271874627884, "learning_rate": 5.229595100564977e-08, "loss": 0.3515, "step": 6911 }, { "epoch": 4.793342579750346, "grad_norm": 0.39156170175293953, "learning_rate": 5.1947336240043e-08, "loss": 0.3771, "step": 6912 }, { "epoch": 4.794036061026352, "grad_norm": 0.3989951226746273, "learning_rate": 5.159988126214543e-08, "loss": 0.3236, "step": 6913 }, { "epoch": 4.794729542302358, "grad_norm": 0.4008655698322317, "learning_rate": 5.1253586153397485e-08, "loss": 0.3666, "step": 6914 }, { "epoch": 4.795423023578364, "grad_norm": 0.409899447239586, "learning_rate": 5.090845099496866e-08, "loss": 0.3416, "step": 6915 }, { "epoch": 4.796116504854369, "grad_norm": 0.4623452915993466, "learning_rate": 5.0564475867755924e-08, "loss": 0.3604, "step": 6916 }, { "epoch": 4.796809986130374, "grad_norm": 0.390243403261641, "learning_rate": 5.0221660852384226e-08, "loss": 0.3196, "step": 6917 }, { "epoch": 4.79750346740638, "grad_norm": 0.410900232085877, "learning_rate": 4.988000602920706e-08, "loss": 0.3691, "step": 6918 }, { "epoch": 4.798196948682386, "grad_norm": 0.4084424977197252, "learning_rate": 4.953951147830649e-08, "loss": 0.3458, "step": 6919 }, { "epoch": 4.798890429958391, "grad_norm": 0.4265428979318083, "learning_rate": 4.920017727949089e-08, "loss": 0.327, "step": 6920 }, { "epoch": 4.7995839112343965, "grad_norm": 0.42090461960905484, "learning_rate": 4.886200351229886e-08, "loss": 0.3831, "step": 6921 }, { "epoch": 4.800277392510402, "grad_norm": 0.4345928866202194, "learning_rate": 4.852499025599533e-08, "loss": 0.3448, "step": 6922 }, { "epoch": 4.800970873786408, "grad_norm": 0.44982911549267557, "learning_rate": 4.818913758957378e-08, "loss": 0.3627, "step": 6923 }, { "epoch": 4.801664355062413, "grad_norm": 0.41447436912158064, "learning_rate": 4.785444559175567e-08, "loss": 0.3705, "step": 6924 }, { "epoch": 4.802357836338419, "grad_norm": 0.42229609729339795, "learning_rate": 4.752091434099049e-08, "loss": 0.3869, "step": 6925 }, { "epoch": 4.8030513176144245, "grad_norm": 0.4170527689602492, "learning_rate": 4.718854391545569e-08, "loss": 0.3318, "step": 6926 }, { "epoch": 4.80374479889043, "grad_norm": 0.40702953087802257, "learning_rate": 4.685733439305562e-08, "loss": 0.3162, "step": 6927 }, { "epoch": 4.804438280166435, "grad_norm": 0.37413577192538516, "learning_rate": 4.6527285851424295e-08, "loss": 0.3493, "step": 6928 }, { "epoch": 4.805131761442441, "grad_norm": 0.5599324085959952, "learning_rate": 4.619839836792261e-08, "loss": 0.3631, "step": 6929 }, { "epoch": 4.805825242718447, "grad_norm": 0.4062003685428794, "learning_rate": 4.5870672019638905e-08, "loss": 0.3622, "step": 6930 }, { "epoch": 4.8065187239944525, "grad_norm": 0.37795181480569495, "learning_rate": 4.5544106883390614e-08, "loss": 0.3004, "step": 6931 }, { "epoch": 4.807212205270457, "grad_norm": 0.3852526524395699, "learning_rate": 4.5218703035721514e-08, "loss": 0.3241, "step": 6932 }, { "epoch": 4.807905686546463, "grad_norm": 0.45727984178567427, "learning_rate": 4.489446055290392e-08, "loss": 0.359, "step": 6933 }, { "epoch": 4.808599167822469, "grad_norm": 0.40414797748884745, "learning_rate": 4.4571379510938705e-08, "loss": 0.4459, "step": 6934 }, { "epoch": 4.809292649098475, "grad_norm": 0.4095538258795409, "learning_rate": 4.424945998555308e-08, "loss": 0.3159, "step": 6935 }, { "epoch": 4.8099861303744795, "grad_norm": 0.45794444590898525, "learning_rate": 4.3928702052202786e-08, "loss": 0.349, "step": 6936 }, { "epoch": 4.810679611650485, "grad_norm": 0.4382399261276273, "learning_rate": 4.360910578607158e-08, "loss": 0.3866, "step": 6937 }, { "epoch": 4.811373092926491, "grad_norm": 0.43964004852582556, "learning_rate": 4.329067126206954e-08, "loss": 0.3981, "step": 6938 }, { "epoch": 4.812066574202497, "grad_norm": 0.4359603259960818, "learning_rate": 4.29733985548364e-08, "loss": 0.3818, "step": 6939 }, { "epoch": 4.812760055478502, "grad_norm": 0.44589836821992757, "learning_rate": 4.265728773873767e-08, "loss": 0.3683, "step": 6940 }, { "epoch": 4.8134535367545075, "grad_norm": 0.42218529636975183, "learning_rate": 4.234233888786799e-08, "loss": 0.3132, "step": 6941 }, { "epoch": 4.814147018030513, "grad_norm": 0.38511463630655796, "learning_rate": 4.202855207604939e-08, "loss": 0.3505, "step": 6942 }, { "epoch": 4.814840499306519, "grad_norm": 0.4037907780637292, "learning_rate": 4.171592737683083e-08, "loss": 0.3534, "step": 6943 }, { "epoch": 4.815533980582524, "grad_norm": 0.7813746787339475, "learning_rate": 4.1404464863489256e-08, "loss": 0.327, "step": 6944 }, { "epoch": 4.81622746185853, "grad_norm": 0.40146937333052046, "learning_rate": 4.109416460902904e-08, "loss": 0.338, "step": 6945 }, { "epoch": 4.8169209431345354, "grad_norm": 0.6195966408370592, "learning_rate": 4.078502668618256e-08, "loss": 0.3607, "step": 6946 }, { "epoch": 4.817614424410541, "grad_norm": 0.43914167443459606, "learning_rate": 4.0477051167410185e-08, "loss": 0.3964, "step": 6947 }, { "epoch": 4.818307905686546, "grad_norm": 0.4494699128783925, "learning_rate": 4.017023812489751e-08, "loss": 0.363, "step": 6948 }, { "epoch": 4.819001386962552, "grad_norm": 0.39687876668104566, "learning_rate": 3.986458763056089e-08, "loss": 0.3419, "step": 6949 }, { "epoch": 4.819694868238558, "grad_norm": 0.5078209870651843, "learning_rate": 3.9560099756041915e-08, "loss": 0.3593, "step": 6950 }, { "epoch": 4.820388349514563, "grad_norm": 0.38621878624607414, "learning_rate": 3.9256774572710157e-08, "loss": 0.341, "step": 6951 }, { "epoch": 4.821081830790568, "grad_norm": 0.7258465363588564, "learning_rate": 3.8954612151663184e-08, "loss": 0.3673, "step": 6952 }, { "epoch": 4.821775312066574, "grad_norm": 0.4371026524517926, "learning_rate": 3.8653612563725465e-08, "loss": 0.3405, "step": 6953 }, { "epoch": 4.82246879334258, "grad_norm": 0.49113893094781175, "learning_rate": 3.835377587944944e-08, "loss": 0.34, "step": 6954 }, { "epoch": 4.823162274618586, "grad_norm": 0.4705036596618689, "learning_rate": 3.80551021691139e-08, "loss": 0.3671, "step": 6955 }, { "epoch": 4.8238557558945905, "grad_norm": 0.4424351103421927, "learning_rate": 3.775759150272673e-08, "loss": 0.372, "step": 6956 }, { "epoch": 4.824549237170596, "grad_norm": 0.4014604289703149, "learning_rate": 3.74612439500216e-08, "loss": 0.3226, "step": 6957 }, { "epoch": 4.825242718446602, "grad_norm": 0.47077927144046877, "learning_rate": 3.716605958046071e-08, "loss": 0.3844, "step": 6958 }, { "epoch": 4.825936199722608, "grad_norm": 0.3973943525985301, "learning_rate": 3.687203846323262e-08, "loss": 0.3468, "step": 6959 }, { "epoch": 4.826629680998613, "grad_norm": 0.3913295607780083, "learning_rate": 3.657918066725441e-08, "loss": 0.3347, "step": 6960 }, { "epoch": 4.827323162274618, "grad_norm": 0.38534171319172944, "learning_rate": 3.6287486261169515e-08, "loss": 0.3485, "step": 6961 }, { "epoch": 4.828016643550624, "grad_norm": 0.5122972208847042, "learning_rate": 3.599695531334879e-08, "loss": 0.329, "step": 6962 }, { "epoch": 4.82871012482663, "grad_norm": 0.4069284210103114, "learning_rate": 3.570758789189055e-08, "loss": 0.3655, "step": 6963 }, { "epoch": 4.829403606102635, "grad_norm": 0.38485990839853634, "learning_rate": 3.541938406462053e-08, "loss": 0.3107, "step": 6964 }, { "epoch": 4.830097087378641, "grad_norm": 0.42643408148264367, "learning_rate": 3.513234389909192e-08, "loss": 0.3255, "step": 6965 }, { "epoch": 4.830790568654646, "grad_norm": 0.3914646908078493, "learning_rate": 3.4846467462584796e-08, "loss": 0.3308, "step": 6966 }, { "epoch": 4.831484049930652, "grad_norm": 0.41991133893343485, "learning_rate": 3.456175482210611e-08, "loss": 0.3766, "step": 6967 }, { "epoch": 4.832177531206657, "grad_norm": 0.4187980595385569, "learning_rate": 3.4278206044390804e-08, "loss": 0.3248, "step": 6968 }, { "epoch": 4.832871012482663, "grad_norm": 0.4179467903424799, "learning_rate": 3.399582119590072e-08, "loss": 0.3271, "step": 6969 }, { "epoch": 4.833564493758669, "grad_norm": 0.4452789265587225, "learning_rate": 3.371460034282459e-08, "loss": 0.351, "step": 6970 }, { "epoch": 4.834257975034674, "grad_norm": 0.39743620351146214, "learning_rate": 3.3434543551078555e-08, "loss": 0.3335, "step": 6971 }, { "epoch": 4.834951456310679, "grad_norm": 0.4425420257487699, "learning_rate": 3.3155650886306236e-08, "loss": 0.3394, "step": 6972 }, { "epoch": 4.835644937586685, "grad_norm": 0.40050631569226564, "learning_rate": 3.2877922413876994e-08, "loss": 0.3335, "step": 6973 }, { "epoch": 4.836338418862691, "grad_norm": 0.4264455119359866, "learning_rate": 3.260135819888988e-08, "loss": 0.3891, "step": 6974 }, { "epoch": 4.8370319001386965, "grad_norm": 0.408461137740394, "learning_rate": 3.232595830616858e-08, "loss": 0.3499, "step": 6975 }, { "epoch": 4.837725381414701, "grad_norm": 0.38137978335887013, "learning_rate": 3.205172280026536e-08, "loss": 0.3314, "step": 6976 }, { "epoch": 4.838418862690707, "grad_norm": 0.38848118773462487, "learning_rate": 3.1778651745458246e-08, "loss": 0.3382, "step": 6977 }, { "epoch": 4.839112343966713, "grad_norm": 0.3880703513525962, "learning_rate": 3.1506745205753806e-08, "loss": 0.3321, "step": 6978 }, { "epoch": 4.839805825242719, "grad_norm": 0.42651294698344894, "learning_rate": 3.123600324488496e-08, "loss": 0.3189, "step": 6979 }, { "epoch": 4.840499306518724, "grad_norm": 0.43988143678634084, "learning_rate": 3.096642592631094e-08, "loss": 0.349, "step": 6980 }, { "epoch": 4.841192787794729, "grad_norm": 0.529721713784114, "learning_rate": 3.069801331321953e-08, "loss": 0.3297, "step": 6981 }, { "epoch": 4.841886269070735, "grad_norm": 0.39465111595697205, "learning_rate": 3.043076546852486e-08, "loss": 0.3581, "step": 6982 }, { "epoch": 4.842579750346741, "grad_norm": 0.4123035006107765, "learning_rate": 3.0164682454866814e-08, "loss": 0.3794, "step": 6983 }, { "epoch": 4.843273231622746, "grad_norm": 0.7375004528984306, "learning_rate": 2.989976433461439e-08, "loss": 0.3477, "step": 6984 }, { "epoch": 4.843966712898752, "grad_norm": 0.5067491560434093, "learning_rate": 2.9636011169861812e-08, "loss": 0.348, "step": 6985 }, { "epoch": 4.844660194174757, "grad_norm": 0.4143473192391111, "learning_rate": 2.9373423022431292e-08, "loss": 0.3542, "step": 6986 }, { "epoch": 4.845353675450763, "grad_norm": 0.6526855266680608, "learning_rate": 2.9111999953871373e-08, "loss": 0.3668, "step": 6987 }, { "epoch": 4.846047156726768, "grad_norm": 0.4193674664460992, "learning_rate": 2.885174202545804e-08, "loss": 0.3619, "step": 6988 }, { "epoch": 4.846740638002774, "grad_norm": 0.41134133442438553, "learning_rate": 2.8592649298193053e-08, "loss": 0.3185, "step": 6989 }, { "epoch": 4.8474341192787795, "grad_norm": 0.40573112885511203, "learning_rate": 2.8334721832807276e-08, "loss": 0.3744, "step": 6990 }, { "epoch": 4.848127600554785, "grad_norm": 0.40268972208204984, "learning_rate": 2.8077959689755686e-08, "loss": 0.3762, "step": 6991 }, { "epoch": 4.84882108183079, "grad_norm": 0.39259809595310746, "learning_rate": 2.7822362929221804e-08, "loss": 0.3466, "step": 6992 }, { "epoch": 4.849514563106796, "grad_norm": 0.4022798126883263, "learning_rate": 2.7567931611116037e-08, "loss": 0.3621, "step": 6993 }, { "epoch": 4.850208044382802, "grad_norm": 0.39845692580513625, "learning_rate": 2.7314665795075135e-08, "loss": 0.395, "step": 6994 }, { "epoch": 4.8509015256588075, "grad_norm": 0.3805788766910518, "learning_rate": 2.7062565540462715e-08, "loss": 0.326, "step": 6995 }, { "epoch": 4.851595006934812, "grad_norm": 0.4235264999130844, "learning_rate": 2.681163090636929e-08, "loss": 0.3889, "step": 6996 }, { "epoch": 4.852288488210818, "grad_norm": 0.3858796636544987, "learning_rate": 2.65618619516117e-08, "loss": 0.3616, "step": 6997 }, { "epoch": 4.852981969486824, "grad_norm": 1.2476283436230053, "learning_rate": 2.631325873473478e-08, "loss": 0.3071, "step": 6998 }, { "epoch": 4.85367545076283, "grad_norm": 0.38992115694265467, "learning_rate": 2.6065821314009142e-08, "loss": 0.3169, "step": 6999 }, { "epoch": 4.854368932038835, "grad_norm": 0.6515897947207091, "learning_rate": 2.581954974743117e-08, "loss": 0.3497, "step": 7000 }, { "epoch": 4.85506241331484, "grad_norm": 0.407057737738664, "learning_rate": 2.5574444092726358e-08, "loss": 0.3621, "step": 7001 }, { "epoch": 4.855755894590846, "grad_norm": 0.47573275757093725, "learning_rate": 2.5330504407345415e-08, "loss": 0.3219, "step": 7002 }, { "epoch": 4.856449375866852, "grad_norm": 0.3993589365632951, "learning_rate": 2.508773074846649e-08, "loss": 0.3433, "step": 7003 }, { "epoch": 4.857142857142857, "grad_norm": 0.39260504467944574, "learning_rate": 2.4846123172992953e-08, "loss": 0.373, "step": 7004 }, { "epoch": 4.8578363384188625, "grad_norm": 0.3861880075482633, "learning_rate": 2.460568173755673e-08, "loss": 0.3134, "step": 7005 }, { "epoch": 4.858529819694868, "grad_norm": 0.4026053848786676, "learning_rate": 2.436640649851496e-08, "loss": 0.3872, "step": 7006 }, { "epoch": 4.859223300970874, "grad_norm": 0.39099415234204743, "learning_rate": 2.4128297511952227e-08, "loss": 0.3499, "step": 7007 }, { "epoch": 4.859916782246879, "grad_norm": 0.4116912765648552, "learning_rate": 2.389135483367999e-08, "loss": 0.3627, "step": 7008 }, { "epoch": 4.860610263522885, "grad_norm": 0.442772142820646, "learning_rate": 2.365557851923439e-08, "loss": 0.3549, "step": 7009 }, { "epoch": 4.8613037447988905, "grad_norm": 0.4048592213735465, "learning_rate": 2.3420968623881768e-08, "loss": 0.3153, "step": 7010 }, { "epoch": 4.861997226074896, "grad_norm": 0.4128785983645319, "learning_rate": 2.3187525202612028e-08, "loss": 0.38, "step": 7011 }, { "epoch": 4.862690707350901, "grad_norm": 0.4266549420850963, "learning_rate": 2.295524831014251e-08, "loss": 0.3895, "step": 7012 }, { "epoch": 4.863384188626907, "grad_norm": 0.42120634597732115, "learning_rate": 2.272413800091744e-08, "loss": 0.3891, "step": 7013 }, { "epoch": 4.864077669902913, "grad_norm": 0.40640859578001914, "learning_rate": 2.249419432910682e-08, "loss": 0.3423, "step": 7014 }, { "epoch": 4.8647711511789185, "grad_norm": 0.3910487057475866, "learning_rate": 2.2265417348608653e-08, "loss": 0.3702, "step": 7015 }, { "epoch": 4.865464632454923, "grad_norm": 0.4372197533274517, "learning_rate": 2.203780711304615e-08, "loss": 0.3635, "step": 7016 }, { "epoch": 4.866158113730929, "grad_norm": 0.416883411335525, "learning_rate": 2.1811363675769416e-08, "loss": 0.3641, "step": 7017 }, { "epoch": 4.866851595006935, "grad_norm": 0.3809042360514471, "learning_rate": 2.1586087089855436e-08, "loss": 0.3668, "step": 7018 }, { "epoch": 4.867545076282941, "grad_norm": 0.4222391476538577, "learning_rate": 2.136197740810697e-08, "loss": 0.4006, "step": 7019 }, { "epoch": 4.8682385575589455, "grad_norm": 0.3822094679396028, "learning_rate": 2.1139034683054783e-08, "loss": 0.3492, "step": 7020 }, { "epoch": 4.868932038834951, "grad_norm": 1.3133856866351414, "learning_rate": 2.0917258966953735e-08, "loss": 0.3494, "step": 7021 }, { "epoch": 4.869625520110957, "grad_norm": 0.4463626421740778, "learning_rate": 2.069665031178669e-08, "loss": 0.3427, "step": 7022 }, { "epoch": 4.870319001386963, "grad_norm": 0.38051163270916527, "learning_rate": 2.04772087692634e-08, "loss": 0.3123, "step": 7023 }, { "epoch": 4.871012482662968, "grad_norm": 0.410824829817691, "learning_rate": 2.0258934390819386e-08, "loss": 0.3313, "step": 7024 }, { "epoch": 4.8717059639389735, "grad_norm": 0.39629206857971727, "learning_rate": 2.0041827227615385e-08, "loss": 0.3072, "step": 7025 }, { "epoch": 4.872399445214979, "grad_norm": 0.4000981505989687, "learning_rate": 1.9825887330540693e-08, "loss": 0.3128, "step": 7026 }, { "epoch": 4.873092926490985, "grad_norm": 0.7868279340490983, "learning_rate": 1.9611114750209825e-08, "loss": 0.3539, "step": 7027 }, { "epoch": 4.87378640776699, "grad_norm": 0.3969314819095711, "learning_rate": 1.9397509536964177e-08, "loss": 0.3366, "step": 7028 }, { "epoch": 4.874479889042996, "grad_norm": 0.3771405960249163, "learning_rate": 1.9185071740871475e-08, "loss": 0.3362, "step": 7029 }, { "epoch": 4.875173370319001, "grad_norm": 0.3934923161098979, "learning_rate": 1.8973801411724668e-08, "loss": 0.3665, "step": 7030 }, { "epoch": 4.875866851595007, "grad_norm": 0.5085863969277675, "learning_rate": 1.87636985990447e-08, "loss": 0.3547, "step": 7031 }, { "epoch": 4.876560332871012, "grad_norm": 0.44542724936713324, "learning_rate": 1.8554763352078288e-08, "loss": 0.3308, "step": 7032 }, { "epoch": 4.877253814147018, "grad_norm": 0.40118939164960027, "learning_rate": 1.8346995719797366e-08, "loss": 0.3711, "step": 7033 }, { "epoch": 4.877947295423024, "grad_norm": 0.442827757482279, "learning_rate": 1.814039575090243e-08, "loss": 0.3765, "step": 7034 }, { "epoch": 4.878640776699029, "grad_norm": 0.41765429340238064, "learning_rate": 1.793496349381807e-08, "loss": 0.3575, "step": 7035 }, { "epoch": 4.879334257975035, "grad_norm": 0.44753901231903453, "learning_rate": 1.773069899669633e-08, "loss": 0.3795, "step": 7036 }, { "epoch": 4.88002773925104, "grad_norm": 0.3958715474893592, "learning_rate": 1.752760230741557e-08, "loss": 0.3638, "step": 7037 }, { "epoch": 4.880721220527046, "grad_norm": 0.3818979393266661, "learning_rate": 1.7325673473580496e-08, "loss": 0.3217, "step": 7038 }, { "epoch": 4.881414701803052, "grad_norm": 0.4106820176143562, "learning_rate": 1.7124912542520468e-08, "loss": 0.3213, "step": 7039 }, { "epoch": 4.8821081830790565, "grad_norm": 0.40224007247881505, "learning_rate": 1.6925319561293953e-08, "loss": 0.2979, "step": 7040 }, { "epoch": 4.882801664355062, "grad_norm": 0.4812245227426676, "learning_rate": 1.6726894576683527e-08, "loss": 0.308, "step": 7041 }, { "epoch": 4.883495145631068, "grad_norm": 0.4040953959083908, "learning_rate": 1.6529637635198103e-08, "loss": 0.3585, "step": 7042 }, { "epoch": 4.884188626907074, "grad_norm": 0.4779342816171157, "learning_rate": 1.633354878307347e-08, "loss": 0.3156, "step": 7043 }, { "epoch": 4.8848821081830796, "grad_norm": 0.41192796775293167, "learning_rate": 1.6138628066271756e-08, "loss": 0.3465, "step": 7044 }, { "epoch": 4.885575589459084, "grad_norm": 0.42753611477598175, "learning_rate": 1.594487553048085e-08, "loss": 0.401, "step": 7045 }, { "epoch": 4.88626907073509, "grad_norm": 0.39125203223697663, "learning_rate": 1.575229122111499e-08, "loss": 0.3145, "step": 7046 }, { "epoch": 4.886962552011096, "grad_norm": 0.4320726198104153, "learning_rate": 1.5560875183314172e-08, "loss": 0.3577, "step": 7047 }, { "epoch": 4.887656033287101, "grad_norm": 0.4639310463180436, "learning_rate": 1.537062746194584e-08, "loss": 0.3326, "step": 7048 }, { "epoch": 4.888349514563107, "grad_norm": 0.4011769240495747, "learning_rate": 1.518154810160155e-08, "loss": 0.3695, "step": 7049 }, { "epoch": 4.889042995839112, "grad_norm": 0.4083309067945322, "learning_rate": 1.4993637146600848e-08, "loss": 0.3475, "step": 7050 }, { "epoch": 4.889736477115118, "grad_norm": 0.4385380049866903, "learning_rate": 1.4806894640988501e-08, "loss": 0.3924, "step": 7051 }, { "epoch": 4.890429958391124, "grad_norm": 0.44989815131302324, "learning_rate": 1.4621320628535051e-08, "loss": 0.3389, "step": 7052 }, { "epoch": 4.891123439667129, "grad_norm": 0.41299846546591695, "learning_rate": 1.4436915152739039e-08, "loss": 0.3327, "step": 7053 }, { "epoch": 4.891816920943135, "grad_norm": 0.4180142352879065, "learning_rate": 1.4253678256822e-08, "loss": 0.3127, "step": 7054 }, { "epoch": 4.89251040221914, "grad_norm": 0.4036307171221583, "learning_rate": 1.4071609983735134e-08, "loss": 0.324, "step": 7055 }, { "epoch": 4.893203883495145, "grad_norm": 0.4269339794498483, "learning_rate": 1.3890710376152638e-08, "loss": 0.3415, "step": 7056 }, { "epoch": 4.893897364771151, "grad_norm": 0.4126353564337846, "learning_rate": 1.3710979476476705e-08, "loss": 0.3294, "step": 7057 }, { "epoch": 4.894590846047157, "grad_norm": 0.4174521233755747, "learning_rate": 1.3532417326834746e-08, "loss": 0.3769, "step": 7058 }, { "epoch": 4.8952843273231625, "grad_norm": 0.4129989514917506, "learning_rate": 1.3355023969080505e-08, "loss": 0.3409, "step": 7059 }, { "epoch": 4.895977808599168, "grad_norm": 0.44700227140726523, "learning_rate": 1.3178799444794054e-08, "loss": 0.36, "step": 7060 }, { "epoch": 4.896671289875173, "grad_norm": 0.42706512180609374, "learning_rate": 1.3003743795280133e-08, "loss": 0.3839, "step": 7061 }, { "epoch": 4.897364771151179, "grad_norm": 0.43007091027190886, "learning_rate": 1.282985706157147e-08, "loss": 0.371, "step": 7062 }, { "epoch": 4.898058252427185, "grad_norm": 0.43856483768546134, "learning_rate": 1.2657139284425468e-08, "loss": 0.391, "step": 7063 }, { "epoch": 4.89875173370319, "grad_norm": 0.4250172833116743, "learning_rate": 1.248559050432585e-08, "loss": 0.3798, "step": 7064 }, { "epoch": 4.899445214979195, "grad_norm": 0.3825188785065298, "learning_rate": 1.2315210761482676e-08, "loss": 0.3687, "step": 7065 }, { "epoch": 4.900138696255201, "grad_norm": 0.4270739073874196, "learning_rate": 1.2146000095831777e-08, "loss": 0.3225, "step": 7066 }, { "epoch": 4.900832177531207, "grad_norm": 0.5045902051465194, "learning_rate": 1.1977958547034207e-08, "loss": 0.3759, "step": 7067 }, { "epoch": 4.901525658807213, "grad_norm": 0.4327871423906362, "learning_rate": 1.1811086154478458e-08, "loss": 0.3883, "step": 7068 }, { "epoch": 4.902219140083218, "grad_norm": 0.40798227694211514, "learning_rate": 1.164538295727824e-08, "loss": 0.3457, "step": 7069 }, { "epoch": 4.902912621359223, "grad_norm": 0.4077172283402849, "learning_rate": 1.1480848994272486e-08, "loss": 0.3332, "step": 7070 }, { "epoch": 4.903606102635229, "grad_norm": 0.4030325795048964, "learning_rate": 1.131748430402757e-08, "loss": 0.3578, "step": 7071 }, { "epoch": 4.904299583911234, "grad_norm": 0.5523680722582051, "learning_rate": 1.1155288924834529e-08, "loss": 0.3861, "step": 7072 }, { "epoch": 4.90499306518724, "grad_norm": 0.4189087197149499, "learning_rate": 1.0994262894710728e-08, "loss": 0.3381, "step": 7073 }, { "epoch": 4.9056865464632455, "grad_norm": 0.5228980179247452, "learning_rate": 1.083440625139931e-08, "loss": 0.3184, "step": 7074 }, { "epoch": 4.906380027739251, "grad_norm": 0.43055173519184403, "learning_rate": 1.0675719032370303e-08, "loss": 0.3496, "step": 7075 }, { "epoch": 4.907073509015257, "grad_norm": 0.9588559473262878, "learning_rate": 1.0518201274817841e-08, "loss": 0.3406, "step": 7076 }, { "epoch": 4.907766990291262, "grad_norm": 0.43695351069747884, "learning_rate": 1.0361853015664058e-08, "loss": 0.3519, "step": 7077 }, { "epoch": 4.908460471567268, "grad_norm": 0.373248715528517, "learning_rate": 1.0206674291555196e-08, "loss": 0.3817, "step": 7078 }, { "epoch": 4.9091539528432735, "grad_norm": 0.42625262911793366, "learning_rate": 1.0052665138863827e-08, "loss": 0.3848, "step": 7079 }, { "epoch": 4.909847434119278, "grad_norm": 0.7145976646022153, "learning_rate": 9.89982559368885e-09, "loss": 0.3006, "step": 7080 }, { "epoch": 4.910540915395284, "grad_norm": 0.41867967950863977, "learning_rate": 9.748155691854943e-09, "loss": 0.38, "step": 7081 }, { "epoch": 4.91123439667129, "grad_norm": 0.44160569036967745, "learning_rate": 9.59765546891256e-09, "loss": 0.355, "step": 7082 }, { "epoch": 4.911927877947296, "grad_norm": 0.4537941931538326, "learning_rate": 9.448324960136812e-09, "loss": 0.336, "step": 7083 }, { "epoch": 4.9126213592233015, "grad_norm": 0.407616887547813, "learning_rate": 9.300164200530815e-09, "loss": 0.4169, "step": 7084 }, { "epoch": 4.913314840499306, "grad_norm": 0.38994637144499206, "learning_rate": 9.153173224821788e-09, "loss": 0.3356, "step": 7085 }, { "epoch": 4.914008321775312, "grad_norm": 0.4621612958587089, "learning_rate": 9.007352067463837e-09, "loss": 0.3511, "step": 7086 }, { "epoch": 4.914701803051318, "grad_norm": 0.40015128452525367, "learning_rate": 8.862700762635734e-09, "loss": 0.3559, "step": 7087 }, { "epoch": 4.915395284327323, "grad_norm": 0.42947360125160833, "learning_rate": 8.71921934424369e-09, "loss": 0.3678, "step": 7088 }, { "epoch": 4.9160887656033285, "grad_norm": 0.39992195945735676, "learning_rate": 8.57690784591747e-09, "loss": 0.3707, "step": 7089 }, { "epoch": 4.916782246879334, "grad_norm": 0.3998550897450325, "learning_rate": 8.435766301014837e-09, "loss": 0.3518, "step": 7090 }, { "epoch": 4.91747572815534, "grad_norm": 0.39413823337501064, "learning_rate": 8.295794742617658e-09, "loss": 0.3211, "step": 7091 }, { "epoch": 4.918169209431346, "grad_norm": 0.4563999153477434, "learning_rate": 8.156993203534691e-09, "loss": 0.3595, "step": 7092 }, { "epoch": 4.918862690707351, "grad_norm": 0.3928458532096198, "learning_rate": 8.019361716299912e-09, "loss": 0.3937, "step": 7093 }, { "epoch": 4.9195561719833565, "grad_norm": 0.4185983600252478, "learning_rate": 7.88290031317307e-09, "loss": 0.3549, "step": 7094 }, { "epoch": 4.920249653259362, "grad_norm": 0.7822984738244929, "learning_rate": 7.74760902613969e-09, "loss": 0.3522, "step": 7095 }, { "epoch": 4.920943134535367, "grad_norm": 0.4280494713069808, "learning_rate": 7.61348788691163e-09, "loss": 0.3868, "step": 7096 }, { "epoch": 4.921636615811373, "grad_norm": 0.5805247856423185, "learning_rate": 7.480536926925408e-09, "loss": 0.3219, "step": 7097 }, { "epoch": 4.922330097087379, "grad_norm": 0.39361981063657586, "learning_rate": 7.348756177343319e-09, "loss": 0.3418, "step": 7098 }, { "epoch": 4.9230235783633844, "grad_norm": 0.4323439155185724, "learning_rate": 7.218145669054544e-09, "loss": 0.365, "step": 7099 }, { "epoch": 4.92371705963939, "grad_norm": 0.6507807845998212, "learning_rate": 7.088705432672926e-09, "loss": 0.3992, "step": 7100 }, { "epoch": 4.924410540915395, "grad_norm": 0.4162201792567533, "learning_rate": 6.960435498538642e-09, "loss": 0.3523, "step": 7101 }, { "epoch": 4.925104022191401, "grad_norm": 0.3931946134907611, "learning_rate": 6.833335896716531e-09, "loss": 0.4205, "step": 7102 }, { "epoch": 4.925797503467407, "grad_norm": 0.42435835286730755, "learning_rate": 6.707406656998872e-09, "loss": 0.3542, "step": 7103 }, { "epoch": 4.9264909847434115, "grad_norm": 0.38737572198406295, "learning_rate": 6.5826478089014985e-09, "loss": 0.3453, "step": 7104 }, { "epoch": 4.927184466019417, "grad_norm": 0.4107577928920802, "learning_rate": 6.4590593816676875e-09, "loss": 0.3147, "step": 7105 }, { "epoch": 4.927877947295423, "grad_norm": 0.403061784628308, "learning_rate": 6.336641404265376e-09, "loss": 0.3768, "step": 7106 }, { "epoch": 4.928571428571429, "grad_norm": 0.40829423505655965, "learning_rate": 6.215393905388278e-09, "loss": 0.3678, "step": 7107 }, { "epoch": 4.929264909847435, "grad_norm": 0.4053521142682166, "learning_rate": 6.09531691345644e-09, "loss": 0.3432, "step": 7108 }, { "epoch": 4.9299583911234395, "grad_norm": 0.3785363134026289, "learning_rate": 5.976410456614567e-09, "loss": 0.2789, "step": 7109 }, { "epoch": 4.930651872399445, "grad_norm": 0.40055105473222985, "learning_rate": 5.858674562733701e-09, "loss": 0.3634, "step": 7110 }, { "epoch": 4.931345353675451, "grad_norm": 0.3899632262172668, "learning_rate": 5.7421092594101004e-09, "loss": 0.3175, "step": 7111 }, { "epoch": 4.932038834951456, "grad_norm": 0.4210211754862228, "learning_rate": 5.626714573966352e-09, "loss": 0.3144, "step": 7112 }, { "epoch": 4.932732316227462, "grad_norm": 0.40473876869257785, "learning_rate": 5.51249053344971e-09, "loss": 0.353, "step": 7113 }, { "epoch": 4.933425797503467, "grad_norm": 0.4070079827340145, "learning_rate": 5.3994371646332035e-09, "loss": 0.3554, "step": 7114 }, { "epoch": 4.934119278779473, "grad_norm": 0.40054824740715883, "learning_rate": 5.28755449401619e-09, "loss": 0.3547, "step": 7115 }, { "epoch": 4.934812760055479, "grad_norm": 0.3716953117983449, "learning_rate": 5.176842547823246e-09, "loss": 0.3348, "step": 7116 }, { "epoch": 4.935506241331484, "grad_norm": 0.4842082602057677, "learning_rate": 5.067301352004173e-09, "loss": 0.3615, "step": 7117 }, { "epoch": 4.93619972260749, "grad_norm": 0.3988125373189556, "learning_rate": 4.9589309322339855e-09, "loss": 0.334, "step": 7118 }, { "epoch": 4.936893203883495, "grad_norm": 0.4429792862866007, "learning_rate": 4.851731313915142e-09, "loss": 0.3452, "step": 7119 }, { "epoch": 4.9375866851595, "grad_norm": 0.3837224278088282, "learning_rate": 4.745702522174211e-09, "loss": 0.329, "step": 7120 }, { "epoch": 4.938280166435506, "grad_norm": 0.47356687930404123, "learning_rate": 4.64084458186298e-09, "loss": 0.3284, "step": 7121 }, { "epoch": 4.938973647711512, "grad_norm": 0.3972972665988645, "learning_rate": 4.537157517559565e-09, "loss": 0.3516, "step": 7122 }, { "epoch": 4.939667128987518, "grad_norm": 0.41403779851119105, "learning_rate": 4.434641353567859e-09, "loss": 0.376, "step": 7123 }, { "epoch": 4.940360610263523, "grad_norm": 0.3930310031152471, "learning_rate": 4.333296113916419e-09, "loss": 0.3105, "step": 7124 }, { "epoch": 4.941054091539528, "grad_norm": 0.4433083584155557, "learning_rate": 4.233121822359576e-09, "loss": 0.3449, "step": 7125 }, { "epoch": 4.941747572815534, "grad_norm": 0.3775190964602308, "learning_rate": 4.134118502378548e-09, "loss": 0.3548, "step": 7126 }, { "epoch": 4.94244105409154, "grad_norm": 0.532387712211218, "learning_rate": 4.036286177178661e-09, "loss": 0.354, "step": 7127 }, { "epoch": 4.943134535367545, "grad_norm": 0.4585703117502366, "learning_rate": 3.939624869689907e-09, "loss": 0.3615, "step": 7128 }, { "epoch": 4.94382801664355, "grad_norm": 1.480065738645681, "learning_rate": 3.844134602570826e-09, "loss": 0.3546, "step": 7129 }, { "epoch": 4.944521497919556, "grad_norm": 0.43030452013479187, "learning_rate": 3.749815398202405e-09, "loss": 0.3168, "step": 7130 }, { "epoch": 4.945214979195562, "grad_norm": 0.4260708572746793, "learning_rate": 3.656667278692516e-09, "loss": 0.3428, "step": 7131 }, { "epoch": 4.945908460471568, "grad_norm": 0.4638901817332141, "learning_rate": 3.5646902658748037e-09, "loss": 0.3501, "step": 7132 }, { "epoch": 4.946601941747573, "grad_norm": 0.4583420023247845, "learning_rate": 3.4738843813075795e-09, "loss": 0.3743, "step": 7133 }, { "epoch": 4.947295423023578, "grad_norm": 0.3987034843061641, "learning_rate": 3.3842496462754837e-09, "loss": 0.3417, "step": 7134 }, { "epoch": 4.947988904299584, "grad_norm": 0.40547612685671913, "learning_rate": 3.295786081788377e-09, "loss": 0.3861, "step": 7135 }, { "epoch": 4.948682385575589, "grad_norm": 0.4363235490099635, "learning_rate": 3.2084937085807844e-09, "loss": 0.3693, "step": 7136 }, { "epoch": 4.949375866851595, "grad_norm": 0.4457057527680636, "learning_rate": 3.1223725471135613e-09, "loss": 0.3442, "step": 7137 }, { "epoch": 4.950069348127601, "grad_norm": 0.37605432501494873, "learning_rate": 3.037422617573893e-09, "loss": 0.3539, "step": 7138 }, { "epoch": 4.950762829403606, "grad_norm": 0.4138186809228635, "learning_rate": 2.953643939871964e-09, "loss": 0.3365, "step": 7139 }, { "epoch": 4.951456310679612, "grad_norm": 0.37527648900238364, "learning_rate": 2.8710365336459546e-09, "loss": 0.3537, "step": 7140 }, { "epoch": 4.952149791955617, "grad_norm": 0.40656410976225216, "learning_rate": 2.789600418258154e-09, "loss": 0.3487, "step": 7141 }, { "epoch": 4.952843273231623, "grad_norm": 0.5160649085711888, "learning_rate": 2.7093356127960712e-09, "loss": 0.3644, "step": 7142 }, { "epoch": 4.9535367545076285, "grad_norm": 0.42775218125201714, "learning_rate": 2.6302421360741014e-09, "loss": 0.3152, "step": 7143 }, { "epoch": 4.954230235783633, "grad_norm": 0.4125229433260301, "learning_rate": 2.5523200066301935e-09, "loss": 0.3299, "step": 7144 }, { "epoch": 4.954923717059639, "grad_norm": 0.418855765509575, "learning_rate": 2.475569242729736e-09, "loss": 0.4071, "step": 7145 }, { "epoch": 4.955617198335645, "grad_norm": 0.43067685886835083, "learning_rate": 2.3999898623622288e-09, "loss": 0.385, "step": 7146 }, { "epoch": 4.956310679611651, "grad_norm": 0.40011527404403346, "learning_rate": 2.3255818832423894e-09, "loss": 0.3303, "step": 7147 }, { "epoch": 4.9570041608876565, "grad_norm": 0.42203301734339205, "learning_rate": 2.252345322811267e-09, "loss": 0.3437, "step": 7148 }, { "epoch": 4.957697642163661, "grad_norm": 0.3737632311503237, "learning_rate": 2.1802801982351294e-09, "loss": 0.3006, "step": 7149 }, { "epoch": 4.958391123439667, "grad_norm": 0.394844019404785, "learning_rate": 2.109386526405466e-09, "loss": 0.3175, "step": 7150 }, { "epoch": 4.959084604715673, "grad_norm": 0.4109833593874536, "learning_rate": 2.0396643239389834e-09, "loss": 0.3691, "step": 7151 }, { "epoch": 4.959778085991678, "grad_norm": 0.41160675767056626, "learning_rate": 1.9711136071787206e-09, "loss": 0.3755, "step": 7152 }, { "epoch": 4.960471567267684, "grad_norm": 0.3713147211785371, "learning_rate": 1.90373439219127e-09, "loss": 0.3131, "step": 7153 }, { "epoch": 4.961165048543689, "grad_norm": 0.675322868087702, "learning_rate": 1.8375266947712188e-09, "loss": 0.3772, "step": 7154 }, { "epoch": 4.961858529819695, "grad_norm": 0.44718262080735793, "learning_rate": 1.772490530436155e-09, "loss": 0.3534, "step": 7155 }, { "epoch": 4.962552011095701, "grad_norm": 0.38804943209509857, "learning_rate": 1.7086259144305507e-09, "loss": 0.3518, "step": 7156 }, { "epoch": 4.963245492371706, "grad_norm": 0.43518653897743365, "learning_rate": 1.6459328617240978e-09, "loss": 0.378, "step": 7157 }, { "epoch": 4.9639389736477115, "grad_norm": 0.4692201853941706, "learning_rate": 1.5844113870105981e-09, "loss": 0.3548, "step": 7158 }, { "epoch": 4.964632454923717, "grad_norm": 0.38375468242062444, "learning_rate": 1.524061504711294e-09, "loss": 0.3442, "step": 7159 }, { "epoch": 4.965325936199722, "grad_norm": 0.4532502337646082, "learning_rate": 1.4648832289709812e-09, "loss": 0.3792, "step": 7160 }, { "epoch": 4.966019417475728, "grad_norm": 0.3943191174588462, "learning_rate": 1.406876573660787e-09, "loss": 0.3334, "step": 7161 }, { "epoch": 4.966712898751734, "grad_norm": 0.45066506419530183, "learning_rate": 1.3500415523776123e-09, "loss": 0.3885, "step": 7162 }, { "epoch": 4.9674063800277395, "grad_norm": 0.4421344145332074, "learning_rate": 1.2943781784424681e-09, "loss": 0.3752, "step": 7163 }, { "epoch": 4.968099861303745, "grad_norm": 0.3944046743074427, "learning_rate": 1.2398864649032505e-09, "loss": 0.363, "step": 7164 }, { "epoch": 4.96879334257975, "grad_norm": 0.4313886380543036, "learning_rate": 1.1865664245314101e-09, "loss": 0.3576, "step": 7165 }, { "epoch": 4.969486823855756, "grad_norm": 0.819869336042323, "learning_rate": 1.1344180698258377e-09, "loss": 0.3821, "step": 7166 }, { "epoch": 4.970180305131762, "grad_norm": 0.45643068110033763, "learning_rate": 1.0834414130084236e-09, "loss": 0.3579, "step": 7167 }, { "epoch": 4.970873786407767, "grad_norm": 2.8618563914122612, "learning_rate": 1.0336364660290532e-09, "loss": 0.3536, "step": 7168 }, { "epoch": 4.971567267683772, "grad_norm": 0.386465653944153, "learning_rate": 9.850032405611665e-10, "loss": 0.3451, "step": 7169 }, { "epoch": 4.972260748959778, "grad_norm": 0.3893868756867282, "learning_rate": 9.375417480034232e-10, "loss": 0.351, "step": 7170 }, { "epoch": 4.972954230235784, "grad_norm": 0.4078707652612696, "learning_rate": 8.912519994813684e-10, "loss": 0.3475, "step": 7171 }, { "epoch": 4.97364771151179, "grad_norm": 0.40150829492419765, "learning_rate": 8.461340058446566e-10, "loss": 0.3925, "step": 7172 }, { "epoch": 4.9743411927877945, "grad_norm": 0.39618053064184006, "learning_rate": 8.02187777668162e-10, "loss": 0.3024, "step": 7173 }, { "epoch": 4.9750346740638, "grad_norm": 0.4162817270365035, "learning_rate": 7.594133252530888e-10, "loss": 0.3629, "step": 7174 }, { "epoch": 4.975728155339806, "grad_norm": 0.4112483806849657, "learning_rate": 7.178106586258615e-10, "loss": 0.3581, "step": 7175 }, { "epoch": 4.976421636615811, "grad_norm": 0.4093664270423176, "learning_rate": 6.773797875364585e-10, "loss": 0.4025, "step": 7176 }, { "epoch": 4.977115117891817, "grad_norm": 0.6649449873759345, "learning_rate": 6.381207214628538e-10, "loss": 0.403, "step": 7177 }, { "epoch": 4.9778085991678225, "grad_norm": 0.3949098028085531, "learning_rate": 6.000334696071309e-10, "loss": 0.3397, "step": 7178 }, { "epoch": 4.978502080443828, "grad_norm": 0.40806486102133166, "learning_rate": 5.631180408954829e-10, "loss": 0.3588, "step": 7179 }, { "epoch": 4.979195561719834, "grad_norm": 0.4325669155327499, "learning_rate": 5.273744439815431e-10, "loss": 0.346, "step": 7180 }, { "epoch": 4.979889042995839, "grad_norm": 0.4185228147340133, "learning_rate": 4.928026872436098e-10, "loss": 0.3812, "step": 7181 }, { "epoch": 4.980582524271845, "grad_norm": 0.4213328422111207, "learning_rate": 4.5940277878409047e-10, "loss": 0.3798, "step": 7182 }, { "epoch": 4.98127600554785, "grad_norm": 0.482819301252033, "learning_rate": 4.2717472643227785e-10, "loss": 0.3791, "step": 7183 }, { "epoch": 4.981969486823855, "grad_norm": 0.404326783039016, "learning_rate": 3.961185377421295e-10, "loss": 0.3556, "step": 7184 }, { "epoch": 4.982662968099861, "grad_norm": 0.41607815316935426, "learning_rate": 3.662342199933777e-10, "loss": 0.3772, "step": 7185 }, { "epoch": 4.983356449375867, "grad_norm": 0.3772116345648475, "learning_rate": 3.375217801898645e-10, "loss": 0.2853, "step": 7186 }, { "epoch": 4.984049930651873, "grad_norm": 0.4027984181463469, "learning_rate": 3.099812250617618e-10, "loss": 0.3264, "step": 7187 }, { "epoch": 4.984743411927878, "grad_norm": 0.4693428818263529, "learning_rate": 2.8361256106501644e-10, "loss": 0.378, "step": 7188 }, { "epoch": 4.985436893203883, "grad_norm": 0.4115243332211786, "learning_rate": 2.58415794379685e-10, "loss": 0.3393, "step": 7189 }, { "epoch": 4.986130374479889, "grad_norm": 0.41852247502798956, "learning_rate": 2.343909309115988e-10, "loss": 0.3628, "step": 7190 }, { "epoch": 4.986823855755895, "grad_norm": 0.4213731088509524, "learning_rate": 2.1153797629291928e-10, "loss": 0.3309, "step": 7191 }, { "epoch": 4.9875173370319, "grad_norm": 0.39253205449439826, "learning_rate": 1.8985693587880715e-10, "loss": 0.3565, "step": 7192 }, { "epoch": 4.9882108183079055, "grad_norm": 0.4284526466840562, "learning_rate": 1.6934781475241856e-10, "loss": 0.3823, "step": 7193 }, { "epoch": 4.988904299583911, "grad_norm": 0.41975107589138805, "learning_rate": 1.500106177204641e-10, "loss": 0.3774, "step": 7194 }, { "epoch": 4.989597780859917, "grad_norm": 0.37972385363613254, "learning_rate": 1.3184534931487414e-10, "loss": 0.3468, "step": 7195 }, { "epoch": 4.990291262135923, "grad_norm": 0.4092809448296632, "learning_rate": 1.148520137944642e-10, "loss": 0.3242, "step": 7196 }, { "epoch": 4.990984743411928, "grad_norm": 0.4228224967768626, "learning_rate": 9.903061514160428e-11, "loss": 0.3583, "step": 7197 }, { "epoch": 4.991678224687933, "grad_norm": 0.40379280414258806, "learning_rate": 8.43811570655495e-11, "loss": 0.3662, "step": 7198 }, { "epoch": 4.992371705963939, "grad_norm": 0.4440129635914101, "learning_rate": 7.090364299910946e-11, "loss": 0.3607, "step": 7199 }, { "epoch": 4.993065187239944, "grad_norm": 0.3868024627311072, "learning_rate": 5.859807610142377e-11, "loss": 0.3622, "step": 7200 }, { "epoch": 4.99375866851595, "grad_norm": 0.43580068144591233, "learning_rate": 4.746445925740695e-11, "loss": 0.3606, "step": 7201 }, { "epoch": 4.994452149791956, "grad_norm": 0.43794296060757415, "learning_rate": 3.750279507608312e-11, "loss": 0.3923, "step": 7202 }, { "epoch": 4.995145631067961, "grad_norm": 0.4056518962016014, "learning_rate": 2.8713085892806415e-11, "loss": 0.4026, "step": 7203 }, { "epoch": 4.995839112343967, "grad_norm": 0.36567630084588015, "learning_rate": 2.109533376759565e-11, "loss": 0.3209, "step": 7204 }, { "epoch": 4.996532593619972, "grad_norm": 0.462277374759319, "learning_rate": 1.4649540486244564e-11, "loss": 0.3666, "step": 7205 }, { "epoch": 4.997226074895978, "grad_norm": 0.4190622269402481, "learning_rate": 9.375707559211578e-12, "loss": 0.3559, "step": 7206 }, { "epoch": 4.997919556171984, "grad_norm": 0.4448874348254667, "learning_rate": 5.273836223285145e-12, "loss": 0.3286, "step": 7207 }, { "epoch": 4.9986130374479885, "grad_norm": 0.4704150656196769, "learning_rate": 2.3439274393632916e-12, "loss": 0.398, "step": 7208 }, { "epoch": 4.999306518723994, "grad_norm": 0.45786451727663974, "learning_rate": 5.859818941189588e-13, "loss": 0.3508, "step": 7209 }, { "epoch": 5.0, "grad_norm": 0.39151771220459436, "learning_rate": 0.0, "loss": 0.3327, "step": 7210 }, { "epoch": 5.0, "step": 7210, "total_flos": 708732517220352.0, "train_loss": 0.4619506940348965, "train_runtime": 68592.6905, "train_samples_per_second": 1.681, "train_steps_per_second": 0.105 } ], "logging_steps": 1, "max_steps": 7210, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 708732517220352.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }