{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9996540510620633, "eval_steps": 1807, "global_step": 7226, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00027675915034940844, "grad_norm": 157798.421875, "learning_rate": 0.0, "loss": 1.3986, "step": 1 }, { "epoch": 0.00027675915034940844, "eval_loss": 1.4790441989898682, "eval_runtime": 3257.9393, "eval_samples_per_second": 74.935, "eval_steps_per_second": 2.342, "step": 1 }, { "epoch": 0.0005535183006988169, "grad_norm": 123621.515625, "learning_rate": 4.6296296296296295e-08, "loss": 1.4154, "step": 2 }, { "epoch": 0.0008302774510482253, "grad_norm": 157624.015625, "learning_rate": 9.259259259259259e-08, "loss": 0.8467, "step": 3 }, { "epoch": 0.0011070366013976338, "grad_norm": 30580.8984375, "learning_rate": 1.3888888888888888e-07, "loss": 0.7702, "step": 4 }, { "epoch": 0.0013837957517470422, "grad_norm": 3539.05078125, "learning_rate": 1.8518518518518518e-07, "loss": 0.7559, "step": 5 }, { "epoch": 0.0016605549020964506, "grad_norm": 1374.5802001953125, "learning_rate": 2.3148148148148148e-07, "loss": 0.79, "step": 6 }, { "epoch": 0.001937314052445859, "grad_norm": 460.08746337890625, "learning_rate": 2.7777777777777776e-07, "loss": 0.7618, "step": 7 }, { "epoch": 0.0022140732027952676, "grad_norm": 547.9286499023438, "learning_rate": 3.2407407407407406e-07, "loss": 0.7249, "step": 8 }, { "epoch": 0.002490832353144676, "grad_norm": 227.90025329589844, "learning_rate": 3.7037037037037036e-07, "loss": 0.753, "step": 9 }, { "epoch": 0.0027675915034940843, "grad_norm": 268.5122375488281, "learning_rate": 4.1666666666666667e-07, "loss": 0.7355, "step": 10 }, { "epoch": 0.0030443506538434927, "grad_norm": 893.9356689453125, "learning_rate": 4.6296296296296297e-07, "loss": 0.7717, "step": 11 }, { "epoch": 0.003321109804192901, "grad_norm": 409.0135498046875, "learning_rate": 5.092592592592593e-07, "loss": 0.7491, "step": 12 }, { "epoch": 0.0035978689545423095, "grad_norm": 250.79421997070312, "learning_rate": 5.555555555555555e-07, "loss": 0.7421, "step": 13 }, { "epoch": 0.003874628104891718, "grad_norm": 181.8348846435547, "learning_rate": 6.018518518518519e-07, "loss": 0.7286, "step": 14 }, { "epoch": 0.004151387255241126, "grad_norm": 92.64807891845703, "learning_rate": 6.481481481481481e-07, "loss": 0.7448, "step": 15 }, { "epoch": 0.004428146405590535, "grad_norm": 213.4900360107422, "learning_rate": 6.944444444444446e-07, "loss": 0.7576, "step": 16 }, { "epoch": 0.004704905555939943, "grad_norm": 282.3073425292969, "learning_rate": 7.407407407407407e-07, "loss": 0.7118, "step": 17 }, { "epoch": 0.004981664706289352, "grad_norm": 277.14715576171875, "learning_rate": 7.870370370370371e-07, "loss": 0.6881, "step": 18 }, { "epoch": 0.00525842385663876, "grad_norm": 3345.447021484375, "learning_rate": 8.333333333333333e-07, "loss": 0.774, "step": 19 }, { "epoch": 0.005535183006988169, "grad_norm": 1044.3218994140625, "learning_rate": 8.796296296296297e-07, "loss": 0.7428, "step": 20 }, { "epoch": 0.005811942157337577, "grad_norm": 391.93756103515625, "learning_rate": 9.259259259259259e-07, "loss": 0.7476, "step": 21 }, { "epoch": 0.0060887013076869854, "grad_norm": 329.55657958984375, "learning_rate": 9.722222222222224e-07, "loss": 0.7206, "step": 22 }, { "epoch": 0.006365460458036393, "grad_norm": 410.5951232910156, "learning_rate": 1.0185185185185185e-06, "loss": 0.7739, "step": 23 }, { "epoch": 0.006642219608385802, "grad_norm": 358.52435302734375, "learning_rate": 1.0648148148148149e-06, "loss": 0.7436, "step": 24 }, { "epoch": 0.006918978758735211, "grad_norm": 280.7945861816406, "learning_rate": 1.111111111111111e-06, "loss": 0.7538, "step": 25 }, { "epoch": 0.007195737909084619, "grad_norm": 219.53965759277344, "learning_rate": 1.1574074074074076e-06, "loss": 0.7288, "step": 26 }, { "epoch": 0.007472497059434028, "grad_norm": 143.45375061035156, "learning_rate": 1.2037037037037037e-06, "loss": 0.7633, "step": 27 }, { "epoch": 0.007749256209783436, "grad_norm": 807.6080322265625, "learning_rate": 1.25e-06, "loss": 0.7216, "step": 28 }, { "epoch": 0.008026015360132845, "grad_norm": 279.67047119140625, "learning_rate": 1.2962962962962962e-06, "loss": 0.7513, "step": 29 }, { "epoch": 0.008302774510482253, "grad_norm": 115.38037109375, "learning_rate": 1.3425925925925928e-06, "loss": 0.6848, "step": 30 }, { "epoch": 0.00857953366083166, "grad_norm": 98.72652435302734, "learning_rate": 1.3888888888888892e-06, "loss": 0.7368, "step": 31 }, { "epoch": 0.00885629281118107, "grad_norm": 1260.2850341796875, "learning_rate": 1.4351851851851853e-06, "loss": 0.7267, "step": 32 }, { "epoch": 0.009133051961530478, "grad_norm": 636.1648559570312, "learning_rate": 1.4814814814814815e-06, "loss": 0.7543, "step": 33 }, { "epoch": 0.009409811111879886, "grad_norm": 426.1076965332031, "learning_rate": 1.527777777777778e-06, "loss": 0.742, "step": 34 }, { "epoch": 0.009686570262229296, "grad_norm": 62.214149475097656, "learning_rate": 1.5740740740740742e-06, "loss": 0.7406, "step": 35 }, { "epoch": 0.009963329412578704, "grad_norm": 64.74871063232422, "learning_rate": 1.6203703703703705e-06, "loss": 0.7237, "step": 36 }, { "epoch": 0.010240088562928112, "grad_norm": 124.65802764892578, "learning_rate": 1.6666666666666667e-06, "loss": 0.723, "step": 37 }, { "epoch": 0.01051684771327752, "grad_norm": 52.544761657714844, "learning_rate": 1.7129629629629632e-06, "loss": 0.7325, "step": 38 }, { "epoch": 0.01079360686362693, "grad_norm": 31.647884368896484, "learning_rate": 1.7592592592592594e-06, "loss": 0.7299, "step": 39 }, { "epoch": 0.011070366013976337, "grad_norm": 23.782878875732422, "learning_rate": 1.8055555555555557e-06, "loss": 0.7155, "step": 40 }, { "epoch": 0.011347125164325745, "grad_norm": 36.1136360168457, "learning_rate": 1.8518518518518519e-06, "loss": 0.7189, "step": 41 }, { "epoch": 0.011623884314675153, "grad_norm": 25.194530487060547, "learning_rate": 1.8981481481481484e-06, "loss": 0.7469, "step": 42 }, { "epoch": 0.011900643465024563, "grad_norm": 13.630550384521484, "learning_rate": 1.944444444444445e-06, "loss": 0.7504, "step": 43 }, { "epoch": 0.012177402615373971, "grad_norm": 24.556034088134766, "learning_rate": 1.9907407407407407e-06, "loss": 0.7062, "step": 44 }, { "epoch": 0.012454161765723379, "grad_norm": 27.14192771911621, "learning_rate": 2.037037037037037e-06, "loss": 0.7501, "step": 45 }, { "epoch": 0.012730920916072787, "grad_norm": 68.65731811523438, "learning_rate": 2.0833333333333334e-06, "loss": 0.6908, "step": 46 }, { "epoch": 0.013007680066422196, "grad_norm": 25.026607513427734, "learning_rate": 2.1296296296296298e-06, "loss": 0.7, "step": 47 }, { "epoch": 0.013284439216771604, "grad_norm": 28.280019760131836, "learning_rate": 2.175925925925926e-06, "loss": 0.7307, "step": 48 }, { "epoch": 0.013561198367121012, "grad_norm": 16.199451446533203, "learning_rate": 2.222222222222222e-06, "loss": 0.7192, "step": 49 }, { "epoch": 0.013837957517470422, "grad_norm": 20.097919464111328, "learning_rate": 2.268518518518519e-06, "loss": 0.7385, "step": 50 }, { "epoch": 0.01411471666781983, "grad_norm": 9.508541107177734, "learning_rate": 2.314814814814815e-06, "loss": 0.7246, "step": 51 }, { "epoch": 0.014391475818169238, "grad_norm": 21.276639938354492, "learning_rate": 2.361111111111111e-06, "loss": 0.6785, "step": 52 }, { "epoch": 0.014668234968518646, "grad_norm": 8.181532859802246, "learning_rate": 2.4074074074074075e-06, "loss": 0.6883, "step": 53 }, { "epoch": 0.014944994118868056, "grad_norm": 8.844629287719727, "learning_rate": 2.453703703703704e-06, "loss": 0.6914, "step": 54 }, { "epoch": 0.015221753269217464, "grad_norm": 12.433276176452637, "learning_rate": 2.5e-06, "loss": 0.6742, "step": 55 }, { "epoch": 0.015498512419566872, "grad_norm": 5.934035301208496, "learning_rate": 2.5462962962962966e-06, "loss": 0.6911, "step": 56 }, { "epoch": 0.01577527156991628, "grad_norm": 11.366135597229004, "learning_rate": 2.5925925925925925e-06, "loss": 0.7333, "step": 57 }, { "epoch": 0.01605203072026569, "grad_norm": 20.438087463378906, "learning_rate": 2.6388888888888893e-06, "loss": 0.6913, "step": 58 }, { "epoch": 0.016328789870615097, "grad_norm": 10.467955589294434, "learning_rate": 2.6851851851851856e-06, "loss": 0.7421, "step": 59 }, { "epoch": 0.016605549020964505, "grad_norm": 8.804167747497559, "learning_rate": 2.7314814814814816e-06, "loss": 0.7006, "step": 60 }, { "epoch": 0.016882308171313913, "grad_norm": 11.138696670532227, "learning_rate": 2.7777777777777783e-06, "loss": 0.7006, "step": 61 }, { "epoch": 0.01715906732166332, "grad_norm": 5.030925750732422, "learning_rate": 2.8240740740740743e-06, "loss": 0.735, "step": 62 }, { "epoch": 0.017435826472012732, "grad_norm": 6.271327018737793, "learning_rate": 2.8703703703703706e-06, "loss": 0.6595, "step": 63 }, { "epoch": 0.01771258562236214, "grad_norm": 5.730277061462402, "learning_rate": 2.916666666666667e-06, "loss": 0.7, "step": 64 }, { "epoch": 0.01798934477271155, "grad_norm": 6.434383869171143, "learning_rate": 2.962962962962963e-06, "loss": 0.7019, "step": 65 }, { "epoch": 0.018266103923060956, "grad_norm": 6.326523780822754, "learning_rate": 3.0092592592592597e-06, "loss": 0.6672, "step": 66 }, { "epoch": 0.018542863073410364, "grad_norm": 8.662154197692871, "learning_rate": 3.055555555555556e-06, "loss": 0.6734, "step": 67 }, { "epoch": 0.018819622223759772, "grad_norm": 5.192435264587402, "learning_rate": 3.101851851851852e-06, "loss": 0.6912, "step": 68 }, { "epoch": 0.01909638137410918, "grad_norm": 3.191532611846924, "learning_rate": 3.1481481481481483e-06, "loss": 0.6677, "step": 69 }, { "epoch": 0.01937314052445859, "grad_norm": 9.667448997497559, "learning_rate": 3.1944444444444443e-06, "loss": 0.6913, "step": 70 }, { "epoch": 0.019649899674808, "grad_norm": 6.778076648712158, "learning_rate": 3.240740740740741e-06, "loss": 0.6946, "step": 71 }, { "epoch": 0.019926658825157408, "grad_norm": 4.716245174407959, "learning_rate": 3.2870370370370374e-06, "loss": 0.7056, "step": 72 }, { "epoch": 0.020203417975506816, "grad_norm": 4.973514080047607, "learning_rate": 3.3333333333333333e-06, "loss": 0.6576, "step": 73 }, { "epoch": 0.020480177125856223, "grad_norm": 5.371499061584473, "learning_rate": 3.37962962962963e-06, "loss": 0.662, "step": 74 }, { "epoch": 0.02075693627620563, "grad_norm": 3.7577333450317383, "learning_rate": 3.4259259259259265e-06, "loss": 0.7073, "step": 75 }, { "epoch": 0.02103369542655504, "grad_norm": 3.848222255706787, "learning_rate": 3.4722222222222224e-06, "loss": 0.6891, "step": 76 }, { "epoch": 0.021310454576904447, "grad_norm": 4.544133186340332, "learning_rate": 3.5185185185185187e-06, "loss": 0.6892, "step": 77 }, { "epoch": 0.02158721372725386, "grad_norm": 6.629614353179932, "learning_rate": 3.5648148148148147e-06, "loss": 0.6822, "step": 78 }, { "epoch": 0.021863972877603267, "grad_norm": 7.30181884765625, "learning_rate": 3.6111111111111115e-06, "loss": 0.7095, "step": 79 }, { "epoch": 0.022140732027952675, "grad_norm": 3.7526092529296875, "learning_rate": 3.657407407407408e-06, "loss": 0.6276, "step": 80 }, { "epoch": 0.022417491178302083, "grad_norm": 6.957573413848877, "learning_rate": 3.7037037037037037e-06, "loss": 0.66, "step": 81 }, { "epoch": 0.02269425032865149, "grad_norm": 3.1005606651306152, "learning_rate": 3.7500000000000005e-06, "loss": 0.7057, "step": 82 }, { "epoch": 0.0229710094790009, "grad_norm": 2.814924478530884, "learning_rate": 3.796296296296297e-06, "loss": 0.7023, "step": 83 }, { "epoch": 0.023247768629350306, "grad_norm": 5.145057678222656, "learning_rate": 3.842592592592592e-06, "loss": 0.7066, "step": 84 }, { "epoch": 0.023524527779699718, "grad_norm": 2.512134313583374, "learning_rate": 3.88888888888889e-06, "loss": 0.6827, "step": 85 }, { "epoch": 0.023801286930049126, "grad_norm": 3.438232660293579, "learning_rate": 3.935185185185186e-06, "loss": 0.6374, "step": 86 }, { "epoch": 0.024078046080398534, "grad_norm": 2.3984875679016113, "learning_rate": 3.9814814814814814e-06, "loss": 0.6536, "step": 87 }, { "epoch": 0.024354805230747942, "grad_norm": 3.0461881160736084, "learning_rate": 4.027777777777779e-06, "loss": 0.6619, "step": 88 }, { "epoch": 0.02463156438109735, "grad_norm": 1.6848117113113403, "learning_rate": 4.074074074074074e-06, "loss": 0.6746, "step": 89 }, { "epoch": 0.024908323531446758, "grad_norm": 1.9511938095092773, "learning_rate": 4.1203703703703705e-06, "loss": 0.6477, "step": 90 }, { "epoch": 0.025185082681796166, "grad_norm": 0.9895209074020386, "learning_rate": 4.166666666666667e-06, "loss": 0.699, "step": 91 }, { "epoch": 0.025461841832145574, "grad_norm": 2.935434103012085, "learning_rate": 4.212962962962963e-06, "loss": 0.6447, "step": 92 }, { "epoch": 0.025738600982494985, "grad_norm": 1.0218720436096191, "learning_rate": 4.2592592592592596e-06, "loss": 0.6911, "step": 93 }, { "epoch": 0.026015360132844393, "grad_norm": 6.8750739097595215, "learning_rate": 4.305555555555556e-06, "loss": 0.663, "step": 94 }, { "epoch": 0.0262921192831938, "grad_norm": 3.8697006702423096, "learning_rate": 4.351851851851852e-06, "loss": 0.6747, "step": 95 }, { "epoch": 0.02656887843354321, "grad_norm": 1.398425817489624, "learning_rate": 4.398148148148149e-06, "loss": 0.6849, "step": 96 }, { "epoch": 0.026845637583892617, "grad_norm": 1.5975028276443481, "learning_rate": 4.444444444444444e-06, "loss": 0.6698, "step": 97 }, { "epoch": 0.027122396734242025, "grad_norm": 1.2972773313522339, "learning_rate": 4.490740740740741e-06, "loss": 0.6565, "step": 98 }, { "epoch": 0.027399155884591433, "grad_norm": 1.9984036684036255, "learning_rate": 4.537037037037038e-06, "loss": 0.6718, "step": 99 }, { "epoch": 0.027675915034940844, "grad_norm": 1.8312100172042847, "learning_rate": 4.583333333333333e-06, "loss": 0.6522, "step": 100 }, { "epoch": 0.027952674185290252, "grad_norm": 0.857390820980072, "learning_rate": 4.62962962962963e-06, "loss": 0.6514, "step": 101 }, { "epoch": 0.02822943333563966, "grad_norm": 3.698849678039551, "learning_rate": 4.675925925925927e-06, "loss": 0.6604, "step": 102 }, { "epoch": 0.028506192485989068, "grad_norm": 3.3909335136413574, "learning_rate": 4.722222222222222e-06, "loss": 0.6462, "step": 103 }, { "epoch": 0.028782951636338476, "grad_norm": 0.7684594988822937, "learning_rate": 4.768518518518519e-06, "loss": 0.6738, "step": 104 }, { "epoch": 0.029059710786687884, "grad_norm": 1.1762832403182983, "learning_rate": 4.814814814814815e-06, "loss": 0.6493, "step": 105 }, { "epoch": 0.029336469937037292, "grad_norm": 1.2886202335357666, "learning_rate": 4.861111111111111e-06, "loss": 0.6919, "step": 106 }, { "epoch": 0.029613229087386703, "grad_norm": 4.14570426940918, "learning_rate": 4.907407407407408e-06, "loss": 0.6456, "step": 107 }, { "epoch": 0.02988998823773611, "grad_norm": 3.7691867351531982, "learning_rate": 4.953703703703704e-06, "loss": 0.6803, "step": 108 }, { "epoch": 0.03016674738808552, "grad_norm": 4.378964900970459, "learning_rate": 5e-06, "loss": 0.657, "step": 109 }, { "epoch": 0.030443506538434927, "grad_norm": 2.6681692600250244, "learning_rate": 5.046296296296297e-06, "loss": 0.6363, "step": 110 }, { "epoch": 0.030720265688784335, "grad_norm": 28.402908325195312, "learning_rate": 5.092592592592593e-06, "loss": 0.641, "step": 111 }, { "epoch": 0.030997024839133743, "grad_norm": 1.2171270847320557, "learning_rate": 5.138888888888889e-06, "loss": 0.6366, "step": 112 }, { "epoch": 0.031273783989483155, "grad_norm": 1.4020628929138184, "learning_rate": 5.185185185185185e-06, "loss": 0.6291, "step": 113 }, { "epoch": 0.03155054313983256, "grad_norm": 11.452168464660645, "learning_rate": 5.231481481481482e-06, "loss": 0.6506, "step": 114 }, { "epoch": 0.03182730229018197, "grad_norm": 3.9192495346069336, "learning_rate": 5.2777777777777785e-06, "loss": 0.6274, "step": 115 }, { "epoch": 0.03210406144053138, "grad_norm": 2.005006790161133, "learning_rate": 5.324074074074075e-06, "loss": 0.6149, "step": 116 }, { "epoch": 0.032380820590880786, "grad_norm": 1.2242581844329834, "learning_rate": 5.370370370370371e-06, "loss": 0.6489, "step": 117 }, { "epoch": 0.032657579741230194, "grad_norm": 1.1410326957702637, "learning_rate": 5.416666666666667e-06, "loss": 0.6245, "step": 118 }, { "epoch": 0.0329343388915796, "grad_norm": 1.7393335103988647, "learning_rate": 5.462962962962963e-06, "loss": 0.6361, "step": 119 }, { "epoch": 0.03321109804192901, "grad_norm": 13.37915325164795, "learning_rate": 5.5092592592592595e-06, "loss": 0.652, "step": 120 }, { "epoch": 0.03348785719227842, "grad_norm": 7.169788360595703, "learning_rate": 5.555555555555557e-06, "loss": 0.6524, "step": 121 }, { "epoch": 0.033764616342627826, "grad_norm": 3.098963975906372, "learning_rate": 5.601851851851853e-06, "loss": 0.6241, "step": 122 }, { "epoch": 0.034041375492977234, "grad_norm": 1.9269957542419434, "learning_rate": 5.6481481481481485e-06, "loss": 0.6329, "step": 123 }, { "epoch": 0.03431813464332664, "grad_norm": 2.116347312927246, "learning_rate": 5.694444444444445e-06, "loss": 0.6459, "step": 124 }, { "epoch": 0.03459489379367605, "grad_norm": 3.9036386013031006, "learning_rate": 5.740740740740741e-06, "loss": 0.6605, "step": 125 }, { "epoch": 0.034871652944025465, "grad_norm": 223.25599670410156, "learning_rate": 5.787037037037038e-06, "loss": 0.6564, "step": 126 }, { "epoch": 0.03514841209437487, "grad_norm": 30.131168365478516, "learning_rate": 5.833333333333334e-06, "loss": 0.6491, "step": 127 }, { "epoch": 0.03542517124472428, "grad_norm": 2.0968823432922363, "learning_rate": 5.8796296296296295e-06, "loss": 0.6715, "step": 128 }, { "epoch": 0.03570193039507369, "grad_norm": 2.5923779010772705, "learning_rate": 5.925925925925926e-06, "loss": 0.6784, "step": 129 }, { "epoch": 0.0359786895454231, "grad_norm": 3.648529291152954, "learning_rate": 5.972222222222222e-06, "loss": 0.6181, "step": 130 }, { "epoch": 0.036255448695772505, "grad_norm": 4.013481140136719, "learning_rate": 6.018518518518519e-06, "loss": 0.6398, "step": 131 }, { "epoch": 0.03653220784612191, "grad_norm": 0.6803940534591675, "learning_rate": 6.064814814814816e-06, "loss": 0.6733, "step": 132 }, { "epoch": 0.03680896699647132, "grad_norm": 0.36187082529067993, "learning_rate": 6.111111111111112e-06, "loss": 0.6287, "step": 133 }, { "epoch": 0.03708572614682073, "grad_norm": 0.4760085642337799, "learning_rate": 6.157407407407408e-06, "loss": 0.6672, "step": 134 }, { "epoch": 0.03736248529717014, "grad_norm": 1.2846165895462036, "learning_rate": 6.203703703703704e-06, "loss": 0.6237, "step": 135 }, { "epoch": 0.037639244447519545, "grad_norm": 1.146018624305725, "learning_rate": 6.25e-06, "loss": 0.6182, "step": 136 }, { "epoch": 0.03791600359786895, "grad_norm": 0.6165791153907776, "learning_rate": 6.296296296296297e-06, "loss": 0.6777, "step": 137 }, { "epoch": 0.03819276274821836, "grad_norm": 0.40663859248161316, "learning_rate": 6.342592592592594e-06, "loss": 0.6572, "step": 138 }, { "epoch": 0.03846952189856777, "grad_norm": 0.7794756293296814, "learning_rate": 6.3888888888888885e-06, "loss": 0.6696, "step": 139 }, { "epoch": 0.03874628104891718, "grad_norm": 0.9923276305198669, "learning_rate": 6.435185185185186e-06, "loss": 0.6301, "step": 140 }, { "epoch": 0.03902304019926659, "grad_norm": 0.4137578010559082, "learning_rate": 6.481481481481482e-06, "loss": 0.6564, "step": 141 }, { "epoch": 0.039299799349616, "grad_norm": 0.34878572821617126, "learning_rate": 6.5277777777777784e-06, "loss": 0.6379, "step": 142 }, { "epoch": 0.03957655849996541, "grad_norm": 1.2829681634902954, "learning_rate": 6.574074074074075e-06, "loss": 0.6644, "step": 143 }, { "epoch": 0.039853317650314815, "grad_norm": 0.2951884865760803, "learning_rate": 6.620370370370371e-06, "loss": 0.6339, "step": 144 }, { "epoch": 0.04013007680066422, "grad_norm": 0.2863519787788391, "learning_rate": 6.666666666666667e-06, "loss": 0.6513, "step": 145 }, { "epoch": 0.04040683595101363, "grad_norm": 0.28115952014923096, "learning_rate": 6.712962962962963e-06, "loss": 0.6491, "step": 146 }, { "epoch": 0.04068359510136304, "grad_norm": 0.3666872978210449, "learning_rate": 6.75925925925926e-06, "loss": 0.6843, "step": 147 }, { "epoch": 0.04096035425171245, "grad_norm": 0.4518100321292877, "learning_rate": 6.8055555555555566e-06, "loss": 0.6037, "step": 148 }, { "epoch": 0.041237113402061855, "grad_norm": 0.3357824981212616, "learning_rate": 6.851851851851853e-06, "loss": 0.6236, "step": 149 }, { "epoch": 0.04151387255241126, "grad_norm": 0.5667005181312561, "learning_rate": 6.898148148148148e-06, "loss": 0.6243, "step": 150 }, { "epoch": 0.04179063170276067, "grad_norm": 1.0552740097045898, "learning_rate": 6.944444444444445e-06, "loss": 0.6237, "step": 151 }, { "epoch": 0.04206739085311008, "grad_norm": 1.569752812385559, "learning_rate": 6.990740740740741e-06, "loss": 0.6414, "step": 152 }, { "epoch": 0.04234415000345949, "grad_norm": 1.1076287031173706, "learning_rate": 7.0370370370370375e-06, "loss": 0.6289, "step": 153 }, { "epoch": 0.042620909153808895, "grad_norm": 0.21923883259296417, "learning_rate": 7.083333333333335e-06, "loss": 0.6366, "step": 154 }, { "epoch": 0.04289766830415831, "grad_norm": 0.2349029928445816, "learning_rate": 7.129629629629629e-06, "loss": 0.6606, "step": 155 }, { "epoch": 0.04317442745450772, "grad_norm": 0.2937617897987366, "learning_rate": 7.1759259259259266e-06, "loss": 0.6392, "step": 156 }, { "epoch": 0.043451186604857125, "grad_norm": 0.5434517860412598, "learning_rate": 7.222222222222223e-06, "loss": 0.6495, "step": 157 }, { "epoch": 0.04372794575520653, "grad_norm": 0.28126370906829834, "learning_rate": 7.268518518518519e-06, "loss": 0.6507, "step": 158 }, { "epoch": 0.04400470490555594, "grad_norm": 0.4405106008052826, "learning_rate": 7.314814814814816e-06, "loss": 0.6195, "step": 159 }, { "epoch": 0.04428146405590535, "grad_norm": 0.3521764278411865, "learning_rate": 7.361111111111112e-06, "loss": 0.6567, "step": 160 }, { "epoch": 0.04455822320625476, "grad_norm": 0.21893535554409027, "learning_rate": 7.4074074074074075e-06, "loss": 0.637, "step": 161 }, { "epoch": 0.044834982356604165, "grad_norm": 0.3645428717136383, "learning_rate": 7.453703703703704e-06, "loss": 0.6346, "step": 162 }, { "epoch": 0.04511174150695357, "grad_norm": 0.46214672923088074, "learning_rate": 7.500000000000001e-06, "loss": 0.6076, "step": 163 }, { "epoch": 0.04538850065730298, "grad_norm": 0.23618219792842865, "learning_rate": 7.546296296296297e-06, "loss": 0.62, "step": 164 }, { "epoch": 0.04566525980765239, "grad_norm": 0.32073745131492615, "learning_rate": 7.592592592592594e-06, "loss": 0.6636, "step": 165 }, { "epoch": 0.0459420189580018, "grad_norm": 0.2659473717212677, "learning_rate": 7.638888888888888e-06, "loss": 0.6528, "step": 166 }, { "epoch": 0.046218778108351205, "grad_norm": 0.2652572691440582, "learning_rate": 7.685185185185185e-06, "loss": 0.6512, "step": 167 }, { "epoch": 0.04649553725870061, "grad_norm": 1.0766971111297607, "learning_rate": 7.731481481481483e-06, "loss": 0.6486, "step": 168 }, { "epoch": 0.04677229640905002, "grad_norm": 0.9524430632591248, "learning_rate": 7.77777777777778e-06, "loss": 0.6324, "step": 169 }, { "epoch": 0.047049055559399436, "grad_norm": 0.23585255444049835, "learning_rate": 7.824074074074076e-06, "loss": 0.6379, "step": 170 }, { "epoch": 0.047325814709748844, "grad_norm": 0.6015676259994507, "learning_rate": 7.870370370370372e-06, "loss": 0.6335, "step": 171 }, { "epoch": 0.04760257386009825, "grad_norm": 0.2714556157588959, "learning_rate": 7.916666666666667e-06, "loss": 0.6534, "step": 172 }, { "epoch": 0.04787933301044766, "grad_norm": 0.21220026910305023, "learning_rate": 7.962962962962963e-06, "loss": 0.6912, "step": 173 }, { "epoch": 0.04815609216079707, "grad_norm": 0.2169038951396942, "learning_rate": 8.00925925925926e-06, "loss": 0.6534, "step": 174 }, { "epoch": 0.048432851311146476, "grad_norm": 0.19888252019882202, "learning_rate": 8.055555555555557e-06, "loss": 0.619, "step": 175 }, { "epoch": 0.048709610461495884, "grad_norm": 0.21066194772720337, "learning_rate": 8.101851851851854e-06, "loss": 0.6527, "step": 176 }, { "epoch": 0.04898636961184529, "grad_norm": 0.2448824793100357, "learning_rate": 8.148148148148148e-06, "loss": 0.6471, "step": 177 }, { "epoch": 0.0492631287621947, "grad_norm": 0.25218138098716736, "learning_rate": 8.194444444444445e-06, "loss": 0.6178, "step": 178 }, { "epoch": 0.04953988791254411, "grad_norm": 0.5188933610916138, "learning_rate": 8.240740740740741e-06, "loss": 0.6578, "step": 179 }, { "epoch": 0.049816647062893515, "grad_norm": 0.26518234610557556, "learning_rate": 8.287037037037037e-06, "loss": 0.6014, "step": 180 }, { "epoch": 0.05009340621324292, "grad_norm": 0.3839775323867798, "learning_rate": 8.333333333333334e-06, "loss": 0.5877, "step": 181 }, { "epoch": 0.05037016536359233, "grad_norm": 0.24008437991142273, "learning_rate": 8.37962962962963e-06, "loss": 0.6378, "step": 182 }, { "epoch": 0.05064692451394174, "grad_norm": 0.29770928621292114, "learning_rate": 8.425925925925926e-06, "loss": 0.6271, "step": 183 }, { "epoch": 0.05092368366429115, "grad_norm": 0.22119256854057312, "learning_rate": 8.472222222222223e-06, "loss": 0.6187, "step": 184 }, { "epoch": 0.05120044281464056, "grad_norm": 0.31639784574508667, "learning_rate": 8.518518518518519e-06, "loss": 0.6207, "step": 185 }, { "epoch": 0.05147720196498997, "grad_norm": 0.29625654220581055, "learning_rate": 8.564814814814816e-06, "loss": 0.6363, "step": 186 }, { "epoch": 0.05175396111533938, "grad_norm": 0.2501123547554016, "learning_rate": 8.611111111111112e-06, "loss": 0.6116, "step": 187 }, { "epoch": 0.052030720265688786, "grad_norm": 0.23208379745483398, "learning_rate": 8.657407407407408e-06, "loss": 0.6205, "step": 188 }, { "epoch": 0.052307479416038194, "grad_norm": 0.2510979175567627, "learning_rate": 8.703703703703705e-06, "loss": 0.6642, "step": 189 }, { "epoch": 0.0525842385663876, "grad_norm": 0.23138566315174103, "learning_rate": 8.750000000000001e-06, "loss": 0.6191, "step": 190 }, { "epoch": 0.05286099771673701, "grad_norm": 0.21357129514217377, "learning_rate": 8.796296296296297e-06, "loss": 0.6348, "step": 191 }, { "epoch": 0.05313775686708642, "grad_norm": 0.20557576417922974, "learning_rate": 8.842592592592594e-06, "loss": 0.6104, "step": 192 }, { "epoch": 0.053414516017435826, "grad_norm": 0.20114965736865997, "learning_rate": 8.888888888888888e-06, "loss": 0.5988, "step": 193 }, { "epoch": 0.053691275167785234, "grad_norm": 0.20703671872615814, "learning_rate": 8.935185185185186e-06, "loss": 0.6772, "step": 194 }, { "epoch": 0.05396803431813464, "grad_norm": 0.21530967950820923, "learning_rate": 8.981481481481483e-06, "loss": 0.6371, "step": 195 }, { "epoch": 0.05424479346848405, "grad_norm": 0.21983705461025238, "learning_rate": 9.027777777777779e-06, "loss": 0.6784, "step": 196 }, { "epoch": 0.05452155261883346, "grad_norm": 0.23038871586322784, "learning_rate": 9.074074074074075e-06, "loss": 0.6479, "step": 197 }, { "epoch": 0.054798311769182866, "grad_norm": 0.2793519198894501, "learning_rate": 9.120370370370372e-06, "loss": 0.6251, "step": 198 }, { "epoch": 0.05507507091953228, "grad_norm": 0.19683951139450073, "learning_rate": 9.166666666666666e-06, "loss": 0.6028, "step": 199 }, { "epoch": 0.05535183006988169, "grad_norm": 0.2583061754703522, "learning_rate": 9.212962962962963e-06, "loss": 0.6577, "step": 200 }, { "epoch": 0.055628589220231096, "grad_norm": 0.20511524379253387, "learning_rate": 9.25925925925926e-06, "loss": 0.643, "step": 201 }, { "epoch": 0.055905348370580504, "grad_norm": 0.25865089893341064, "learning_rate": 9.305555555555557e-06, "loss": 0.6221, "step": 202 }, { "epoch": 0.05618210752092991, "grad_norm": 0.22501271963119507, "learning_rate": 9.351851851851854e-06, "loss": 0.6415, "step": 203 }, { "epoch": 0.05645886667127932, "grad_norm": 0.3372131884098053, "learning_rate": 9.398148148148148e-06, "loss": 0.6503, "step": 204 }, { "epoch": 0.05673562582162873, "grad_norm": 0.23736023902893066, "learning_rate": 9.444444444444445e-06, "loss": 0.6231, "step": 205 }, { "epoch": 0.057012384971978136, "grad_norm": 0.2767028510570526, "learning_rate": 9.490740740740741e-06, "loss": 0.6281, "step": 206 }, { "epoch": 0.057289144122327544, "grad_norm": 0.2034919410943985, "learning_rate": 9.537037037037037e-06, "loss": 0.622, "step": 207 }, { "epoch": 0.05756590327267695, "grad_norm": 0.21250395476818085, "learning_rate": 9.583333333333335e-06, "loss": 0.6217, "step": 208 }, { "epoch": 0.05784266242302636, "grad_norm": 0.21608176827430725, "learning_rate": 9.62962962962963e-06, "loss": 0.6197, "step": 209 }, { "epoch": 0.05811942157337577, "grad_norm": 0.2632680833339691, "learning_rate": 9.675925925925926e-06, "loss": 0.6072, "step": 210 }, { "epoch": 0.058396180723725176, "grad_norm": 0.2304650992155075, "learning_rate": 9.722222222222223e-06, "loss": 0.6043, "step": 211 }, { "epoch": 0.058672939874074584, "grad_norm": 0.1886640191078186, "learning_rate": 9.768518518518519e-06, "loss": 0.6073, "step": 212 }, { "epoch": 0.05894969902442399, "grad_norm": 0.23273925483226776, "learning_rate": 9.814814814814815e-06, "loss": 0.6406, "step": 213 }, { "epoch": 0.05922645817477341, "grad_norm": 0.20671498775482178, "learning_rate": 9.861111111111112e-06, "loss": 0.6157, "step": 214 }, { "epoch": 0.059503217325122815, "grad_norm": 0.22609414160251617, "learning_rate": 9.907407407407408e-06, "loss": 0.6067, "step": 215 }, { "epoch": 0.05977997647547222, "grad_norm": 0.2549595236778259, "learning_rate": 9.953703703703704e-06, "loss": 0.6169, "step": 216 }, { "epoch": 0.06005673562582163, "grad_norm": 0.21189779043197632, "learning_rate": 1e-05, "loss": 0.6725, "step": 217 }, { "epoch": 0.06033349477617104, "grad_norm": 0.21311138570308685, "learning_rate": 1e-05, "loss": 0.6181, "step": 218 }, { "epoch": 0.060610253926520447, "grad_norm": 0.19230008125305176, "learning_rate": 1e-05, "loss": 0.6378, "step": 219 }, { "epoch": 0.060887013076869854, "grad_norm": 0.22865977883338928, "learning_rate": 1e-05, "loss": 0.6209, "step": 220 }, { "epoch": 0.06116377222721926, "grad_norm": 0.19346465170383453, "learning_rate": 1e-05, "loss": 0.6522, "step": 221 }, { "epoch": 0.06144053137756867, "grad_norm": 0.20622241497039795, "learning_rate": 1e-05, "loss": 0.6339, "step": 222 }, { "epoch": 0.06171729052791808, "grad_norm": 0.20087113976478577, "learning_rate": 1e-05, "loss": 0.5871, "step": 223 }, { "epoch": 0.061994049678267486, "grad_norm": 0.21423204243183136, "learning_rate": 1e-05, "loss": 0.6207, "step": 224 }, { "epoch": 0.062270808828616894, "grad_norm": 0.21743711829185486, "learning_rate": 1e-05, "loss": 0.6298, "step": 225 }, { "epoch": 0.06254756797896631, "grad_norm": 0.20652930438518524, "learning_rate": 1e-05, "loss": 0.6624, "step": 226 }, { "epoch": 0.06282432712931571, "grad_norm": 0.20066680014133453, "learning_rate": 1e-05, "loss": 0.6018, "step": 227 }, { "epoch": 0.06310108627966513, "grad_norm": 0.20735637843608856, "learning_rate": 1e-05, "loss": 0.5828, "step": 228 }, { "epoch": 0.06337784543001453, "grad_norm": 0.23497723042964935, "learning_rate": 1e-05, "loss": 0.6041, "step": 229 }, { "epoch": 0.06365460458036394, "grad_norm": 0.18698714673519135, "learning_rate": 1e-05, "loss": 0.6073, "step": 230 }, { "epoch": 0.06393136373071334, "grad_norm": 0.1992734670639038, "learning_rate": 1e-05, "loss": 0.5906, "step": 231 }, { "epoch": 0.06420812288106276, "grad_norm": 0.21657909452915192, "learning_rate": 1e-05, "loss": 0.6178, "step": 232 }, { "epoch": 0.06448488203141216, "grad_norm": 0.20375502109527588, "learning_rate": 1e-05, "loss": 0.6067, "step": 233 }, { "epoch": 0.06476164118176157, "grad_norm": 0.20221443474292755, "learning_rate": 1e-05, "loss": 0.6402, "step": 234 }, { "epoch": 0.06503840033211097, "grad_norm": 0.19368258118629456, "learning_rate": 1e-05, "loss": 0.6236, "step": 235 }, { "epoch": 0.06531515948246039, "grad_norm": 0.22681809961795807, "learning_rate": 1e-05, "loss": 0.6245, "step": 236 }, { "epoch": 0.0655919186328098, "grad_norm": 0.1955006867647171, "learning_rate": 1e-05, "loss": 0.6589, "step": 237 }, { "epoch": 0.0658686777831592, "grad_norm": 0.2670384645462036, "learning_rate": 1e-05, "loss": 0.6324, "step": 238 }, { "epoch": 0.06614543693350862, "grad_norm": 0.47994279861450195, "learning_rate": 1e-05, "loss": 0.6022, "step": 239 }, { "epoch": 0.06642219608385802, "grad_norm": 0.31617793440818787, "learning_rate": 1e-05, "loss": 0.6285, "step": 240 }, { "epoch": 0.06669895523420744, "grad_norm": 0.2623136639595032, "learning_rate": 1e-05, "loss": 0.616, "step": 241 }, { "epoch": 0.06697571438455684, "grad_norm": 0.22985142469406128, "learning_rate": 1e-05, "loss": 0.6311, "step": 242 }, { "epoch": 0.06725247353490625, "grad_norm": 0.22984251379966736, "learning_rate": 1e-05, "loss": 0.6039, "step": 243 }, { "epoch": 0.06752923268525565, "grad_norm": 0.22139348089694977, "learning_rate": 1e-05, "loss": 0.6538, "step": 244 }, { "epoch": 0.06780599183560507, "grad_norm": 0.19513851404190063, "learning_rate": 1e-05, "loss": 0.6537, "step": 245 }, { "epoch": 0.06808275098595447, "grad_norm": 0.27259740233421326, "learning_rate": 1e-05, "loss": 0.622, "step": 246 }, { "epoch": 0.06835951013630388, "grad_norm": 0.217725932598114, "learning_rate": 1e-05, "loss": 0.6069, "step": 247 }, { "epoch": 0.06863626928665328, "grad_norm": 0.19623306393623352, "learning_rate": 1e-05, "loss": 0.6162, "step": 248 }, { "epoch": 0.0689130284370027, "grad_norm": 0.19352790713310242, "learning_rate": 1e-05, "loss": 0.6004, "step": 249 }, { "epoch": 0.0691897875873521, "grad_norm": 0.1959952563047409, "learning_rate": 1e-05, "loss": 0.6174, "step": 250 }, { "epoch": 0.06946654673770152, "grad_norm": 0.20604297518730164, "learning_rate": 1e-05, "loss": 0.6004, "step": 251 }, { "epoch": 0.06974330588805093, "grad_norm": 0.18622221052646637, "learning_rate": 1e-05, "loss": 0.6393, "step": 252 }, { "epoch": 0.07002006503840033, "grad_norm": 0.2037539780139923, "learning_rate": 1e-05, "loss": 0.6078, "step": 253 }, { "epoch": 0.07029682418874975, "grad_norm": 0.19208380579948425, "learning_rate": 1e-05, "loss": 0.6377, "step": 254 }, { "epoch": 0.07057358333909915, "grad_norm": 0.21859681606292725, "learning_rate": 1e-05, "loss": 0.6429, "step": 255 }, { "epoch": 0.07085034248944856, "grad_norm": 0.21776020526885986, "learning_rate": 1e-05, "loss": 0.6366, "step": 256 }, { "epoch": 0.07112710163979796, "grad_norm": 0.21601106226444244, "learning_rate": 1e-05, "loss": 0.6001, "step": 257 }, { "epoch": 0.07140386079014738, "grad_norm": 0.19347167015075684, "learning_rate": 1e-05, "loss": 0.62, "step": 258 }, { "epoch": 0.07168061994049678, "grad_norm": 0.205342099070549, "learning_rate": 1e-05, "loss": 0.6141, "step": 259 }, { "epoch": 0.0719573790908462, "grad_norm": 0.19053463637828827, "learning_rate": 1e-05, "loss": 0.6446, "step": 260 }, { "epoch": 0.0722341382411956, "grad_norm": 0.19013425707817078, "learning_rate": 1e-05, "loss": 0.5939, "step": 261 }, { "epoch": 0.07251089739154501, "grad_norm": 0.19733259081840515, "learning_rate": 1e-05, "loss": 0.5894, "step": 262 }, { "epoch": 0.07278765654189441, "grad_norm": 0.1899648755788803, "learning_rate": 1e-05, "loss": 0.589, "step": 263 }, { "epoch": 0.07306441569224383, "grad_norm": 0.20410655438899994, "learning_rate": 1e-05, "loss": 0.5903, "step": 264 }, { "epoch": 0.07334117484259323, "grad_norm": 0.19814811646938324, "learning_rate": 1e-05, "loss": 0.6337, "step": 265 }, { "epoch": 0.07361793399294264, "grad_norm": 0.18619553744792938, "learning_rate": 1e-05, "loss": 0.61, "step": 266 }, { "epoch": 0.07389469314329206, "grad_norm": 0.2084473818540573, "learning_rate": 1e-05, "loss": 0.6302, "step": 267 }, { "epoch": 0.07417145229364146, "grad_norm": 0.1897633820772171, "learning_rate": 1e-05, "loss": 0.6224, "step": 268 }, { "epoch": 0.07444821144399087, "grad_norm": 0.2208409160375595, "learning_rate": 1e-05, "loss": 0.6134, "step": 269 }, { "epoch": 0.07472497059434027, "grad_norm": 0.218243807554245, "learning_rate": 1e-05, "loss": 0.653, "step": 270 }, { "epoch": 0.07500172974468969, "grad_norm": 0.21829639375209808, "learning_rate": 1e-05, "loss": 0.6253, "step": 271 }, { "epoch": 0.07527848889503909, "grad_norm": 0.22339008748531342, "learning_rate": 1e-05, "loss": 0.6207, "step": 272 }, { "epoch": 0.0755552480453885, "grad_norm": 0.20800842344760895, "learning_rate": 1e-05, "loss": 0.6055, "step": 273 }, { "epoch": 0.0758320071957379, "grad_norm": 0.19653929769992828, "learning_rate": 1e-05, "loss": 0.6167, "step": 274 }, { "epoch": 0.07610876634608732, "grad_norm": 0.2204490602016449, "learning_rate": 1e-05, "loss": 0.6467, "step": 275 }, { "epoch": 0.07638552549643672, "grad_norm": 0.23937895894050598, "learning_rate": 1e-05, "loss": 0.6364, "step": 276 }, { "epoch": 0.07666228464678614, "grad_norm": 0.20480842888355255, "learning_rate": 1e-05, "loss": 0.585, "step": 277 }, { "epoch": 0.07693904379713554, "grad_norm": 0.20947423577308655, "learning_rate": 1e-05, "loss": 0.6114, "step": 278 }, { "epoch": 0.07721580294748495, "grad_norm": 0.22162465751171112, "learning_rate": 1e-05, "loss": 0.623, "step": 279 }, { "epoch": 0.07749256209783437, "grad_norm": 0.19330671429634094, "learning_rate": 1e-05, "loss": 0.6047, "step": 280 }, { "epoch": 0.07776932124818377, "grad_norm": 0.19664883613586426, "learning_rate": 1e-05, "loss": 0.6211, "step": 281 }, { "epoch": 0.07804608039853318, "grad_norm": 0.2685168981552124, "learning_rate": 1e-05, "loss": 0.6476, "step": 282 }, { "epoch": 0.07832283954888258, "grad_norm": 0.18818792700767517, "learning_rate": 1e-05, "loss": 0.659, "step": 283 }, { "epoch": 0.078599598699232, "grad_norm": 0.20417141914367676, "learning_rate": 1e-05, "loss": 0.6252, "step": 284 }, { "epoch": 0.0788763578495814, "grad_norm": 0.1978767067193985, "learning_rate": 1e-05, "loss": 0.6025, "step": 285 }, { "epoch": 0.07915311699993081, "grad_norm": 0.21175970137119293, "learning_rate": 1e-05, "loss": 0.6074, "step": 286 }, { "epoch": 0.07942987615028022, "grad_norm": 0.24406464397907257, "learning_rate": 1e-05, "loss": 0.5946, "step": 287 }, { "epoch": 0.07970663530062963, "grad_norm": 0.22358612716197968, "learning_rate": 1e-05, "loss": 0.6185, "step": 288 }, { "epoch": 0.07998339445097903, "grad_norm": 0.28929826617240906, "learning_rate": 1e-05, "loss": 0.592, "step": 289 }, { "epoch": 0.08026015360132845, "grad_norm": 0.20873771607875824, "learning_rate": 1e-05, "loss": 0.6344, "step": 290 }, { "epoch": 0.08053691275167785, "grad_norm": 0.19265055656433105, "learning_rate": 1e-05, "loss": 0.6003, "step": 291 }, { "epoch": 0.08081367190202726, "grad_norm": 0.19973857700824738, "learning_rate": 1e-05, "loss": 0.6372, "step": 292 }, { "epoch": 0.08109043105237666, "grad_norm": 0.19608935713768005, "learning_rate": 1e-05, "loss": 0.6004, "step": 293 }, { "epoch": 0.08136719020272608, "grad_norm": 0.23249180614948273, "learning_rate": 1e-05, "loss": 0.6062, "step": 294 }, { "epoch": 0.08164394935307549, "grad_norm": 0.21060074865818024, "learning_rate": 1e-05, "loss": 0.6394, "step": 295 }, { "epoch": 0.0819207085034249, "grad_norm": 0.20283114910125732, "learning_rate": 1e-05, "loss": 0.6061, "step": 296 }, { "epoch": 0.08219746765377431, "grad_norm": 0.1899746209383011, "learning_rate": 1e-05, "loss": 0.6519, "step": 297 }, { "epoch": 0.08247422680412371, "grad_norm": 0.21730415523052216, "learning_rate": 1e-05, "loss": 0.6204, "step": 298 }, { "epoch": 0.08275098595447312, "grad_norm": 0.22047756612300873, "learning_rate": 1e-05, "loss": 0.6161, "step": 299 }, { "epoch": 0.08302774510482253, "grad_norm": 0.25383055210113525, "learning_rate": 1e-05, "loss": 0.5928, "step": 300 }, { "epoch": 0.08330450425517194, "grad_norm": 0.1959543079137802, "learning_rate": 1e-05, "loss": 0.6075, "step": 301 }, { "epoch": 0.08358126340552134, "grad_norm": 0.22606919705867767, "learning_rate": 1e-05, "loss": 0.5924, "step": 302 }, { "epoch": 0.08385802255587076, "grad_norm": 0.2099144160747528, "learning_rate": 1e-05, "loss": 0.6053, "step": 303 }, { "epoch": 0.08413478170622016, "grad_norm": 0.20953500270843506, "learning_rate": 1e-05, "loss": 0.6563, "step": 304 }, { "epoch": 0.08441154085656957, "grad_norm": 0.2006242722272873, "learning_rate": 1e-05, "loss": 0.607, "step": 305 }, { "epoch": 0.08468830000691897, "grad_norm": 0.20386873185634613, "learning_rate": 1e-05, "loss": 0.6, "step": 306 }, { "epoch": 0.08496505915726839, "grad_norm": 0.1987786740064621, "learning_rate": 1e-05, "loss": 0.6276, "step": 307 }, { "epoch": 0.08524181830761779, "grad_norm": 0.20822134613990784, "learning_rate": 1e-05, "loss": 0.6371, "step": 308 }, { "epoch": 0.0855185774579672, "grad_norm": 0.18026457726955414, "learning_rate": 1e-05, "loss": 0.6089, "step": 309 }, { "epoch": 0.08579533660831662, "grad_norm": 0.28235384821891785, "learning_rate": 1e-05, "loss": 0.6187, "step": 310 }, { "epoch": 0.08607209575866602, "grad_norm": 0.2135591059923172, "learning_rate": 1e-05, "loss": 0.5728, "step": 311 }, { "epoch": 0.08634885490901544, "grad_norm": 0.2963331639766693, "learning_rate": 1e-05, "loss": 0.6204, "step": 312 }, { "epoch": 0.08662561405936484, "grad_norm": 0.20255491137504578, "learning_rate": 1e-05, "loss": 0.6286, "step": 313 }, { "epoch": 0.08690237320971425, "grad_norm": 0.2551748752593994, "learning_rate": 1e-05, "loss": 0.6215, "step": 314 }, { "epoch": 0.08717913236006365, "grad_norm": 0.20258182287216187, "learning_rate": 1e-05, "loss": 0.5966, "step": 315 }, { "epoch": 0.08745589151041307, "grad_norm": 0.19976846873760223, "learning_rate": 1e-05, "loss": 0.6341, "step": 316 }, { "epoch": 0.08773265066076247, "grad_norm": 0.27117499709129333, "learning_rate": 1e-05, "loss": 0.6028, "step": 317 }, { "epoch": 0.08800940981111188, "grad_norm": 0.20961180329322815, "learning_rate": 1e-05, "loss": 0.6276, "step": 318 }, { "epoch": 0.08828616896146128, "grad_norm": 0.21909400820732117, "learning_rate": 1e-05, "loss": 0.6289, "step": 319 }, { "epoch": 0.0885629281118107, "grad_norm": 0.2048419713973999, "learning_rate": 1e-05, "loss": 0.611, "step": 320 }, { "epoch": 0.0888396872621601, "grad_norm": 0.21298743784427643, "learning_rate": 1e-05, "loss": 0.6296, "step": 321 }, { "epoch": 0.08911644641250951, "grad_norm": 0.2022523432970047, "learning_rate": 1e-05, "loss": 0.6268, "step": 322 }, { "epoch": 0.08939320556285892, "grad_norm": 0.18918123841285706, "learning_rate": 1e-05, "loss": 0.6179, "step": 323 }, { "epoch": 0.08966996471320833, "grad_norm": 0.18272995948791504, "learning_rate": 1e-05, "loss": 0.596, "step": 324 }, { "epoch": 0.08994672386355775, "grad_norm": 0.1849115639925003, "learning_rate": 1e-05, "loss": 0.5979, "step": 325 }, { "epoch": 0.09022348301390715, "grad_norm": 0.19439859688282013, "learning_rate": 1e-05, "loss": 0.6113, "step": 326 }, { "epoch": 0.09050024216425656, "grad_norm": 0.2337407022714615, "learning_rate": 1e-05, "loss": 0.585, "step": 327 }, { "epoch": 0.09077700131460596, "grad_norm": 0.18872113525867462, "learning_rate": 1e-05, "loss": 0.6039, "step": 328 }, { "epoch": 0.09105376046495538, "grad_norm": 0.19705691933631897, "learning_rate": 1e-05, "loss": 0.5929, "step": 329 }, { "epoch": 0.09133051961530478, "grad_norm": 0.20793798565864563, "learning_rate": 1e-05, "loss": 0.6104, "step": 330 }, { "epoch": 0.0916072787656542, "grad_norm": 0.21438376605510712, "learning_rate": 1e-05, "loss": 0.6433, "step": 331 }, { "epoch": 0.0918840379160036, "grad_norm": 0.2413180023431778, "learning_rate": 1e-05, "loss": 0.6106, "step": 332 }, { "epoch": 0.09216079706635301, "grad_norm": 0.21440115571022034, "learning_rate": 1e-05, "loss": 0.603, "step": 333 }, { "epoch": 0.09243755621670241, "grad_norm": 0.2160363495349884, "learning_rate": 1e-05, "loss": 0.6161, "step": 334 }, { "epoch": 0.09271431536705182, "grad_norm": 0.18942591547966003, "learning_rate": 1e-05, "loss": 0.6081, "step": 335 }, { "epoch": 0.09299107451740123, "grad_norm": 0.19234995543956757, "learning_rate": 1e-05, "loss": 0.6065, "step": 336 }, { "epoch": 0.09326783366775064, "grad_norm": 0.22266258299350739, "learning_rate": 1e-05, "loss": 0.6037, "step": 337 }, { "epoch": 0.09354459281810004, "grad_norm": 0.2160872220993042, "learning_rate": 1e-05, "loss": 0.6257, "step": 338 }, { "epoch": 0.09382135196844946, "grad_norm": 0.20593659579753876, "learning_rate": 1e-05, "loss": 0.5917, "step": 339 }, { "epoch": 0.09409811111879887, "grad_norm": 0.1987587958574295, "learning_rate": 1e-05, "loss": 0.6111, "step": 340 }, { "epoch": 0.09437487026914827, "grad_norm": 0.18456101417541504, "learning_rate": 1e-05, "loss": 0.592, "step": 341 }, { "epoch": 0.09465162941949769, "grad_norm": 0.24350695312023163, "learning_rate": 1e-05, "loss": 0.6087, "step": 342 }, { "epoch": 0.09492838856984709, "grad_norm": 0.2171090990304947, "learning_rate": 1e-05, "loss": 0.6151, "step": 343 }, { "epoch": 0.0952051477201965, "grad_norm": 0.206729918718338, "learning_rate": 1e-05, "loss": 0.5844, "step": 344 }, { "epoch": 0.0954819068705459, "grad_norm": 0.20420055091381073, "learning_rate": 1e-05, "loss": 0.6198, "step": 345 }, { "epoch": 0.09575866602089532, "grad_norm": 0.21876345574855804, "learning_rate": 1e-05, "loss": 0.6521, "step": 346 }, { "epoch": 0.09603542517124472, "grad_norm": 0.1897173374891281, "learning_rate": 1e-05, "loss": 0.6125, "step": 347 }, { "epoch": 0.09631218432159414, "grad_norm": 0.21414470672607422, "learning_rate": 1e-05, "loss": 0.6272, "step": 348 }, { "epoch": 0.09658894347194354, "grad_norm": 0.1911127269268036, "learning_rate": 1e-05, "loss": 0.6094, "step": 349 }, { "epoch": 0.09686570262229295, "grad_norm": 0.19825570285320282, "learning_rate": 1e-05, "loss": 0.5961, "step": 350 }, { "epoch": 0.09714246177264235, "grad_norm": 0.22352299094200134, "learning_rate": 1e-05, "loss": 0.6169, "step": 351 }, { "epoch": 0.09741922092299177, "grad_norm": 0.2026997059583664, "learning_rate": 1e-05, "loss": 0.5928, "step": 352 }, { "epoch": 0.09769598007334117, "grad_norm": 0.20680588483810425, "learning_rate": 1e-05, "loss": 0.5809, "step": 353 }, { "epoch": 0.09797273922369058, "grad_norm": 0.19607611000537872, "learning_rate": 1e-05, "loss": 0.6044, "step": 354 }, { "epoch": 0.09824949837404, "grad_norm": 0.22918154299259186, "learning_rate": 1e-05, "loss": 0.5867, "step": 355 }, { "epoch": 0.0985262575243894, "grad_norm": 0.24628005921840668, "learning_rate": 1e-05, "loss": 0.609, "step": 356 }, { "epoch": 0.09880301667473881, "grad_norm": 0.24280641973018646, "learning_rate": 1e-05, "loss": 0.5991, "step": 357 }, { "epoch": 0.09907977582508821, "grad_norm": 0.1977980136871338, "learning_rate": 1e-05, "loss": 0.5823, "step": 358 }, { "epoch": 0.09935653497543763, "grad_norm": 0.2062433660030365, "learning_rate": 1e-05, "loss": 0.6162, "step": 359 }, { "epoch": 0.09963329412578703, "grad_norm": 0.20944705605506897, "learning_rate": 1e-05, "loss": 0.6195, "step": 360 }, { "epoch": 0.09991005327613645, "grad_norm": 0.18767501413822174, "learning_rate": 1e-05, "loss": 0.6053, "step": 361 }, { "epoch": 0.10018681242648585, "grad_norm": 0.197841078042984, "learning_rate": 1e-05, "loss": 0.6287, "step": 362 }, { "epoch": 0.10046357157683526, "grad_norm": 0.19908173382282257, "learning_rate": 1e-05, "loss": 0.6605, "step": 363 }, { "epoch": 0.10074033072718466, "grad_norm": 0.1937711238861084, "learning_rate": 1e-05, "loss": 0.5964, "step": 364 }, { "epoch": 0.10101708987753408, "grad_norm": 0.21988946199417114, "learning_rate": 1e-05, "loss": 0.6018, "step": 365 }, { "epoch": 0.10129384902788348, "grad_norm": 0.19071392714977264, "learning_rate": 1e-05, "loss": 0.611, "step": 366 }, { "epoch": 0.1015706081782329, "grad_norm": 0.2395557016134262, "learning_rate": 1e-05, "loss": 0.6178, "step": 367 }, { "epoch": 0.1018473673285823, "grad_norm": 0.2056041806936264, "learning_rate": 1e-05, "loss": 0.6117, "step": 368 }, { "epoch": 0.10212412647893171, "grad_norm": 0.23734906315803528, "learning_rate": 1e-05, "loss": 0.6206, "step": 369 }, { "epoch": 0.10240088562928112, "grad_norm": 0.19368208944797516, "learning_rate": 1e-05, "loss": 0.6172, "step": 370 }, { "epoch": 0.10267764477963053, "grad_norm": 0.18824712932109833, "learning_rate": 1e-05, "loss": 0.6005, "step": 371 }, { "epoch": 0.10295440392997994, "grad_norm": 0.19340920448303223, "learning_rate": 1e-05, "loss": 0.5949, "step": 372 }, { "epoch": 0.10323116308032934, "grad_norm": 0.22678983211517334, "learning_rate": 1e-05, "loss": 0.5806, "step": 373 }, { "epoch": 0.10350792223067876, "grad_norm": 0.1999884694814682, "learning_rate": 1e-05, "loss": 0.6296, "step": 374 }, { "epoch": 0.10378468138102816, "grad_norm": 0.18501682579517365, "learning_rate": 1e-05, "loss": 0.6278, "step": 375 }, { "epoch": 0.10406144053137757, "grad_norm": 0.21460767090320587, "learning_rate": 1e-05, "loss": 0.5916, "step": 376 }, { "epoch": 0.10433819968172697, "grad_norm": 0.19576792418956757, "learning_rate": 1e-05, "loss": 0.6095, "step": 377 }, { "epoch": 0.10461495883207639, "grad_norm": 0.19378715753555298, "learning_rate": 1e-05, "loss": 0.5968, "step": 378 }, { "epoch": 0.10489171798242579, "grad_norm": 0.2031077891588211, "learning_rate": 1e-05, "loss": 0.6424, "step": 379 }, { "epoch": 0.1051684771327752, "grad_norm": 0.2062024474143982, "learning_rate": 1e-05, "loss": 0.628, "step": 380 }, { "epoch": 0.1054452362831246, "grad_norm": 0.20131751894950867, "learning_rate": 1e-05, "loss": 0.6071, "step": 381 }, { "epoch": 0.10572199543347402, "grad_norm": 0.19801056385040283, "learning_rate": 1e-05, "loss": 0.5958, "step": 382 }, { "epoch": 0.10599875458382342, "grad_norm": 0.2256162017583847, "learning_rate": 1e-05, "loss": 0.6092, "step": 383 }, { "epoch": 0.10627551373417284, "grad_norm": 0.18101416528224945, "learning_rate": 1e-05, "loss": 0.5902, "step": 384 }, { "epoch": 0.10655227288452225, "grad_norm": 0.19238883256912231, "learning_rate": 1e-05, "loss": 0.6332, "step": 385 }, { "epoch": 0.10682903203487165, "grad_norm": 0.19777430593967438, "learning_rate": 1e-05, "loss": 0.6104, "step": 386 }, { "epoch": 0.10710579118522107, "grad_norm": 0.2083672434091568, "learning_rate": 1e-05, "loss": 0.6143, "step": 387 }, { "epoch": 0.10738255033557047, "grad_norm": 0.21094585955142975, "learning_rate": 1e-05, "loss": 0.6426, "step": 388 }, { "epoch": 0.10765930948591988, "grad_norm": 0.18793919682502747, "learning_rate": 1e-05, "loss": 0.5791, "step": 389 }, { "epoch": 0.10793606863626928, "grad_norm": 0.20381595194339752, "learning_rate": 1e-05, "loss": 0.6351, "step": 390 }, { "epoch": 0.1082128277866187, "grad_norm": 0.1856151521205902, "learning_rate": 1e-05, "loss": 0.5902, "step": 391 }, { "epoch": 0.1084895869369681, "grad_norm": 0.18217995762825012, "learning_rate": 1e-05, "loss": 0.5691, "step": 392 }, { "epoch": 0.10876634608731751, "grad_norm": 0.2125178873538971, "learning_rate": 1e-05, "loss": 0.5859, "step": 393 }, { "epoch": 0.10904310523766692, "grad_norm": 0.21772488951683044, "learning_rate": 1e-05, "loss": 0.5893, "step": 394 }, { "epoch": 0.10931986438801633, "grad_norm": 0.18221233785152435, "learning_rate": 1e-05, "loss": 0.5758, "step": 395 }, { "epoch": 0.10959662353836573, "grad_norm": 0.1953209638595581, "learning_rate": 1e-05, "loss": 0.626, "step": 396 }, { "epoch": 0.10987338268871515, "grad_norm": 0.19208577275276184, "learning_rate": 1e-05, "loss": 0.6081, "step": 397 }, { "epoch": 0.11015014183906456, "grad_norm": 0.1918403059244156, "learning_rate": 1e-05, "loss": 0.6182, "step": 398 }, { "epoch": 0.11042690098941396, "grad_norm": 0.20833636820316315, "learning_rate": 1e-05, "loss": 0.6453, "step": 399 }, { "epoch": 0.11070366013976338, "grad_norm": 0.2088242769241333, "learning_rate": 1e-05, "loss": 0.6238, "step": 400 }, { "epoch": 0.11098041929011278, "grad_norm": 0.1965329498052597, "learning_rate": 1e-05, "loss": 0.6135, "step": 401 }, { "epoch": 0.11125717844046219, "grad_norm": 0.19225244224071503, "learning_rate": 1e-05, "loss": 0.6085, "step": 402 }, { "epoch": 0.1115339375908116, "grad_norm": 0.21272139251232147, "learning_rate": 1e-05, "loss": 0.5859, "step": 403 }, { "epoch": 0.11181069674116101, "grad_norm": 0.2481372207403183, "learning_rate": 1e-05, "loss": 0.6289, "step": 404 }, { "epoch": 0.11208745589151041, "grad_norm": 0.2115921676158905, "learning_rate": 1e-05, "loss": 0.5757, "step": 405 }, { "epoch": 0.11236421504185982, "grad_norm": 0.24161191284656525, "learning_rate": 1e-05, "loss": 0.6186, "step": 406 }, { "epoch": 0.11264097419220923, "grad_norm": 0.1890670210123062, "learning_rate": 1e-05, "loss": 0.6026, "step": 407 }, { "epoch": 0.11291773334255864, "grad_norm": 0.22289684414863586, "learning_rate": 1e-05, "loss": 0.5797, "step": 408 }, { "epoch": 0.11319449249290804, "grad_norm": 0.2049790769815445, "learning_rate": 1e-05, "loss": 0.6215, "step": 409 }, { "epoch": 0.11347125164325746, "grad_norm": 0.1945558339357376, "learning_rate": 1e-05, "loss": 0.6339, "step": 410 }, { "epoch": 0.11374801079360686, "grad_norm": 0.2318001538515091, "learning_rate": 1e-05, "loss": 0.602, "step": 411 }, { "epoch": 0.11402476994395627, "grad_norm": 0.21207961440086365, "learning_rate": 1e-05, "loss": 0.5921, "step": 412 }, { "epoch": 0.11430152909430569, "grad_norm": 0.2530597150325775, "learning_rate": 1e-05, "loss": 0.58, "step": 413 }, { "epoch": 0.11457828824465509, "grad_norm": 0.1922450214624405, "learning_rate": 1e-05, "loss": 0.5628, "step": 414 }, { "epoch": 0.1148550473950045, "grad_norm": 0.2682504653930664, "learning_rate": 1e-05, "loss": 0.6043, "step": 415 }, { "epoch": 0.1151318065453539, "grad_norm": 0.20378156006336212, "learning_rate": 1e-05, "loss": 0.5829, "step": 416 }, { "epoch": 0.11540856569570332, "grad_norm": 0.18996180593967438, "learning_rate": 1e-05, "loss": 0.6359, "step": 417 }, { "epoch": 0.11568532484605272, "grad_norm": 0.2250959575176239, "learning_rate": 1e-05, "loss": 0.6094, "step": 418 }, { "epoch": 0.11596208399640213, "grad_norm": 0.19345486164093018, "learning_rate": 1e-05, "loss": 0.5847, "step": 419 }, { "epoch": 0.11623884314675154, "grad_norm": 0.19069959223270416, "learning_rate": 1e-05, "loss": 0.6065, "step": 420 }, { "epoch": 0.11651560229710095, "grad_norm": 0.24947337806224823, "learning_rate": 1e-05, "loss": 0.6581, "step": 421 }, { "epoch": 0.11679236144745035, "grad_norm": 0.22266733646392822, "learning_rate": 1e-05, "loss": 0.6145, "step": 422 }, { "epoch": 0.11706912059779977, "grad_norm": 0.19684013724327087, "learning_rate": 1e-05, "loss": 0.596, "step": 423 }, { "epoch": 0.11734587974814917, "grad_norm": 0.19118964672088623, "learning_rate": 1e-05, "loss": 0.6351, "step": 424 }, { "epoch": 0.11762263889849858, "grad_norm": 0.2125072479248047, "learning_rate": 1e-05, "loss": 0.6015, "step": 425 }, { "epoch": 0.11789939804884798, "grad_norm": 0.18784098327159882, "learning_rate": 1e-05, "loss": 0.5752, "step": 426 }, { "epoch": 0.1181761571991974, "grad_norm": 0.20243337750434875, "learning_rate": 1e-05, "loss": 0.6225, "step": 427 }, { "epoch": 0.11845291634954681, "grad_norm": 0.20664525032043457, "learning_rate": 1e-05, "loss": 0.6549, "step": 428 }, { "epoch": 0.11872967549989621, "grad_norm": 0.19596409797668457, "learning_rate": 1e-05, "loss": 0.5734, "step": 429 }, { "epoch": 0.11900643465024563, "grad_norm": 0.24073566496372223, "learning_rate": 1e-05, "loss": 0.6022, "step": 430 }, { "epoch": 0.11928319380059503, "grad_norm": 0.19665737450122833, "learning_rate": 1e-05, "loss": 0.5817, "step": 431 }, { "epoch": 0.11955995295094445, "grad_norm": 0.17740218341350555, "learning_rate": 1e-05, "loss": 0.5943, "step": 432 }, { "epoch": 0.11983671210129385, "grad_norm": 0.19059506058692932, "learning_rate": 1e-05, "loss": 0.5851, "step": 433 }, { "epoch": 0.12011347125164326, "grad_norm": 0.20163065195083618, "learning_rate": 1e-05, "loss": 0.6517, "step": 434 }, { "epoch": 0.12039023040199266, "grad_norm": 0.18564929068088531, "learning_rate": 1e-05, "loss": 0.5726, "step": 435 }, { "epoch": 0.12066698955234208, "grad_norm": 0.18492631614208221, "learning_rate": 1e-05, "loss": 0.6227, "step": 436 }, { "epoch": 0.12094374870269148, "grad_norm": 0.18544848263263702, "learning_rate": 1e-05, "loss": 0.5994, "step": 437 }, { "epoch": 0.12122050785304089, "grad_norm": 0.18930791318416595, "learning_rate": 1e-05, "loss": 0.5863, "step": 438 }, { "epoch": 0.1214972670033903, "grad_norm": 0.198263019323349, "learning_rate": 1e-05, "loss": 0.6175, "step": 439 }, { "epoch": 0.12177402615373971, "grad_norm": 0.18999268114566803, "learning_rate": 1e-05, "loss": 0.5951, "step": 440 }, { "epoch": 0.12205078530408911, "grad_norm": 0.20250758528709412, "learning_rate": 1e-05, "loss": 0.6049, "step": 441 }, { "epoch": 0.12232754445443852, "grad_norm": 0.18838705122470856, "learning_rate": 1e-05, "loss": 0.6095, "step": 442 }, { "epoch": 0.12260430360478794, "grad_norm": 0.2055884301662445, "learning_rate": 1e-05, "loss": 0.6004, "step": 443 }, { "epoch": 0.12288106275513734, "grad_norm": 0.20360037684440613, "learning_rate": 1e-05, "loss": 0.6041, "step": 444 }, { "epoch": 0.12315782190548676, "grad_norm": 0.1861664056777954, "learning_rate": 1e-05, "loss": 0.6208, "step": 445 }, { "epoch": 0.12343458105583616, "grad_norm": 0.19777943193912506, "learning_rate": 1e-05, "loss": 0.5986, "step": 446 }, { "epoch": 0.12371134020618557, "grad_norm": 0.1972043663263321, "learning_rate": 1e-05, "loss": 0.5997, "step": 447 }, { "epoch": 0.12398809935653497, "grad_norm": 0.20657187700271606, "learning_rate": 1e-05, "loss": 0.5862, "step": 448 }, { "epoch": 0.12426485850688439, "grad_norm": 0.18594728410243988, "learning_rate": 1e-05, "loss": 0.5802, "step": 449 }, { "epoch": 0.12454161765723379, "grad_norm": 0.1777360886335373, "learning_rate": 1e-05, "loss": 0.5764, "step": 450 }, { "epoch": 0.1248183768075832, "grad_norm": 0.2053430676460266, "learning_rate": 1e-05, "loss": 0.5793, "step": 451 }, { "epoch": 0.12509513595793262, "grad_norm": 0.2052958756685257, "learning_rate": 1e-05, "loss": 0.5745, "step": 452 }, { "epoch": 0.125371895108282, "grad_norm": 0.20246225595474243, "learning_rate": 1e-05, "loss": 0.6112, "step": 453 }, { "epoch": 0.12564865425863142, "grad_norm": 0.204086571931839, "learning_rate": 1e-05, "loss": 0.6049, "step": 454 }, { "epoch": 0.12592541340898084, "grad_norm": 0.2037525624036789, "learning_rate": 1e-05, "loss": 0.6161, "step": 455 }, { "epoch": 0.12620217255933025, "grad_norm": 0.21025724709033966, "learning_rate": 1e-05, "loss": 0.6123, "step": 456 }, { "epoch": 0.12647893170967967, "grad_norm": 0.19465531408786774, "learning_rate": 1e-05, "loss": 0.5836, "step": 457 }, { "epoch": 0.12675569086002905, "grad_norm": 0.1986858993768692, "learning_rate": 1e-05, "loss": 0.6235, "step": 458 }, { "epoch": 0.12703245001037847, "grad_norm": 0.20401056110858917, "learning_rate": 1e-05, "loss": 0.6425, "step": 459 }, { "epoch": 0.12730920916072788, "grad_norm": 0.1952493041753769, "learning_rate": 1e-05, "loss": 0.5937, "step": 460 }, { "epoch": 0.1275859683110773, "grad_norm": 0.19165979325771332, "learning_rate": 1e-05, "loss": 0.6349, "step": 461 }, { "epoch": 0.12786272746142668, "grad_norm": 0.20643185079097748, "learning_rate": 1e-05, "loss": 0.6253, "step": 462 }, { "epoch": 0.1281394866117761, "grad_norm": 0.19166037440299988, "learning_rate": 1e-05, "loss": 0.585, "step": 463 }, { "epoch": 0.1284162457621255, "grad_norm": 0.2005113959312439, "learning_rate": 1e-05, "loss": 0.609, "step": 464 }, { "epoch": 0.12869300491247493, "grad_norm": 0.20226792991161346, "learning_rate": 1e-05, "loss": 0.6122, "step": 465 }, { "epoch": 0.12896976406282432, "grad_norm": 0.1886306256055832, "learning_rate": 1e-05, "loss": 0.611, "step": 466 }, { "epoch": 0.12924652321317373, "grad_norm": 0.1967301070690155, "learning_rate": 1e-05, "loss": 0.6018, "step": 467 }, { "epoch": 0.12952328236352315, "grad_norm": 0.21677249670028687, "learning_rate": 1e-05, "loss": 0.587, "step": 468 }, { "epoch": 0.12980004151387256, "grad_norm": 0.1839626282453537, "learning_rate": 1e-05, "loss": 0.5764, "step": 469 }, { "epoch": 0.13007680066422195, "grad_norm": 0.20530344545841217, "learning_rate": 1e-05, "loss": 0.6237, "step": 470 }, { "epoch": 0.13035355981457136, "grad_norm": 0.18836864829063416, "learning_rate": 1e-05, "loss": 0.6131, "step": 471 }, { "epoch": 0.13063031896492078, "grad_norm": 0.18897645175457, "learning_rate": 1e-05, "loss": 0.6192, "step": 472 }, { "epoch": 0.1309070781152702, "grad_norm": 0.20973804593086243, "learning_rate": 1e-05, "loss": 0.6299, "step": 473 }, { "epoch": 0.1311838372656196, "grad_norm": 0.18871185183525085, "learning_rate": 1e-05, "loss": 0.604, "step": 474 }, { "epoch": 0.131460596415969, "grad_norm": 0.19015128910541534, "learning_rate": 1e-05, "loss": 0.6023, "step": 475 }, { "epoch": 0.1317373555663184, "grad_norm": 0.19968590140342712, "learning_rate": 1e-05, "loss": 0.6032, "step": 476 }, { "epoch": 0.13201411471666782, "grad_norm": 0.19206388294696808, "learning_rate": 1e-05, "loss": 0.613, "step": 477 }, { "epoch": 0.13229087386701724, "grad_norm": 0.17337140440940857, "learning_rate": 1e-05, "loss": 0.5582, "step": 478 }, { "epoch": 0.13256763301736663, "grad_norm": 0.19371014833450317, "learning_rate": 1e-05, "loss": 0.6199, "step": 479 }, { "epoch": 0.13284439216771604, "grad_norm": 0.18652591109275818, "learning_rate": 1e-05, "loss": 0.6417, "step": 480 }, { "epoch": 0.13312115131806546, "grad_norm": 0.17474046349525452, "learning_rate": 1e-05, "loss": 0.5733, "step": 481 }, { "epoch": 0.13339791046841487, "grad_norm": 0.19840781390666962, "learning_rate": 1e-05, "loss": 0.5521, "step": 482 }, { "epoch": 0.13367466961876426, "grad_norm": 0.19338847696781158, "learning_rate": 1e-05, "loss": 0.6034, "step": 483 }, { "epoch": 0.13395142876911367, "grad_norm": 0.20294329524040222, "learning_rate": 1e-05, "loss": 0.6238, "step": 484 }, { "epoch": 0.1342281879194631, "grad_norm": 0.1853896975517273, "learning_rate": 1e-05, "loss": 0.5711, "step": 485 }, { "epoch": 0.1345049470698125, "grad_norm": 0.18633653223514557, "learning_rate": 1e-05, "loss": 0.5843, "step": 486 }, { "epoch": 0.13478170622016192, "grad_norm": 0.21525007486343384, "learning_rate": 1e-05, "loss": 0.6136, "step": 487 }, { "epoch": 0.1350584653705113, "grad_norm": 0.2187526971101761, "learning_rate": 1e-05, "loss": 0.5847, "step": 488 }, { "epoch": 0.13533522452086072, "grad_norm": 0.20275889337062836, "learning_rate": 1e-05, "loss": 0.5668, "step": 489 }, { "epoch": 0.13561198367121013, "grad_norm": 0.23138263821601868, "learning_rate": 1e-05, "loss": 0.6075, "step": 490 }, { "epoch": 0.13588874282155955, "grad_norm": 0.17960554361343384, "learning_rate": 1e-05, "loss": 0.5635, "step": 491 }, { "epoch": 0.13616550197190894, "grad_norm": 0.18761545419692993, "learning_rate": 1e-05, "loss": 0.5974, "step": 492 }, { "epoch": 0.13644226112225835, "grad_norm": 0.2012123316526413, "learning_rate": 1e-05, "loss": 0.5849, "step": 493 }, { "epoch": 0.13671902027260777, "grad_norm": 0.178312748670578, "learning_rate": 1e-05, "loss": 0.5941, "step": 494 }, { "epoch": 0.13699577942295718, "grad_norm": 0.19047506153583527, "learning_rate": 1e-05, "loss": 0.5797, "step": 495 }, { "epoch": 0.13727253857330657, "grad_norm": 0.2016458660364151, "learning_rate": 1e-05, "loss": 0.5776, "step": 496 }, { "epoch": 0.13754929772365598, "grad_norm": 0.18834178149700165, "learning_rate": 1e-05, "loss": 0.6019, "step": 497 }, { "epoch": 0.1378260568740054, "grad_norm": 0.2359309196472168, "learning_rate": 1e-05, "loss": 0.6027, "step": 498 }, { "epoch": 0.1381028160243548, "grad_norm": 0.19191840291023254, "learning_rate": 1e-05, "loss": 0.6203, "step": 499 }, { "epoch": 0.1383795751747042, "grad_norm": 0.19056786596775055, "learning_rate": 1e-05, "loss": 0.5984, "step": 500 }, { "epoch": 0.13865633432505362, "grad_norm": 0.22250887751579285, "learning_rate": 1e-05, "loss": 0.613, "step": 501 }, { "epoch": 0.13893309347540303, "grad_norm": 0.20188076794147491, "learning_rate": 1e-05, "loss": 0.6171, "step": 502 }, { "epoch": 0.13920985262575244, "grad_norm": 0.19352272152900696, "learning_rate": 1e-05, "loss": 0.6159, "step": 503 }, { "epoch": 0.13948661177610186, "grad_norm": 0.19642239809036255, "learning_rate": 1e-05, "loss": 0.5848, "step": 504 }, { "epoch": 0.13976337092645125, "grad_norm": 0.18528465926647186, "learning_rate": 1e-05, "loss": 0.5997, "step": 505 }, { "epoch": 0.14004013007680066, "grad_norm": 0.1924692839384079, "learning_rate": 1e-05, "loss": 0.5839, "step": 506 }, { "epoch": 0.14031688922715008, "grad_norm": 0.19050778448581696, "learning_rate": 1e-05, "loss": 0.6285, "step": 507 }, { "epoch": 0.1405936483774995, "grad_norm": 0.19218075275421143, "learning_rate": 1e-05, "loss": 0.6126, "step": 508 }, { "epoch": 0.14087040752784888, "grad_norm": 0.20011256635189056, "learning_rate": 1e-05, "loss": 0.617, "step": 509 }, { "epoch": 0.1411471666781983, "grad_norm": 0.1922251284122467, "learning_rate": 1e-05, "loss": 0.6176, "step": 510 }, { "epoch": 0.1414239258285477, "grad_norm": 0.1859605759382248, "learning_rate": 1e-05, "loss": 0.6009, "step": 511 }, { "epoch": 0.14170068497889712, "grad_norm": 0.20947584509849548, "learning_rate": 1e-05, "loss": 0.6121, "step": 512 }, { "epoch": 0.1419774441292465, "grad_norm": 0.19215765595436096, "learning_rate": 1e-05, "loss": 0.5849, "step": 513 }, { "epoch": 0.14225420327959593, "grad_norm": 0.18443572521209717, "learning_rate": 1e-05, "loss": 0.6004, "step": 514 }, { "epoch": 0.14253096242994534, "grad_norm": 0.206680029630661, "learning_rate": 1e-05, "loss": 0.586, "step": 515 }, { "epoch": 0.14280772158029476, "grad_norm": 0.21743780374526978, "learning_rate": 1e-05, "loss": 0.6202, "step": 516 }, { "epoch": 0.14308448073064417, "grad_norm": 0.1922091543674469, "learning_rate": 1e-05, "loss": 0.5781, "step": 517 }, { "epoch": 0.14336123988099356, "grad_norm": 0.20775730907917023, "learning_rate": 1e-05, "loss": 0.599, "step": 518 }, { "epoch": 0.14363799903134297, "grad_norm": 0.19122952222824097, "learning_rate": 1e-05, "loss": 0.6104, "step": 519 }, { "epoch": 0.1439147581816924, "grad_norm": 0.18587736785411835, "learning_rate": 1e-05, "loss": 0.5438, "step": 520 }, { "epoch": 0.1441915173320418, "grad_norm": 0.1990147978067398, "learning_rate": 1e-05, "loss": 0.6028, "step": 521 }, { "epoch": 0.1444682764823912, "grad_norm": 0.18898053467273712, "learning_rate": 1e-05, "loss": 0.588, "step": 522 }, { "epoch": 0.1447450356327406, "grad_norm": 0.18808215856552124, "learning_rate": 1e-05, "loss": 0.6119, "step": 523 }, { "epoch": 0.14502179478309002, "grad_norm": 0.2041255086660385, "learning_rate": 1e-05, "loss": 0.5928, "step": 524 }, { "epoch": 0.14529855393343943, "grad_norm": 0.18991221487522125, "learning_rate": 1e-05, "loss": 0.5862, "step": 525 }, { "epoch": 0.14557531308378882, "grad_norm": 0.1885487586259842, "learning_rate": 1e-05, "loss": 0.5706, "step": 526 }, { "epoch": 0.14585207223413824, "grad_norm": 0.22691205143928528, "learning_rate": 1e-05, "loss": 0.605, "step": 527 }, { "epoch": 0.14612883138448765, "grad_norm": 0.18875741958618164, "learning_rate": 1e-05, "loss": 0.5889, "step": 528 }, { "epoch": 0.14640559053483707, "grad_norm": 0.20975887775421143, "learning_rate": 1e-05, "loss": 0.5916, "step": 529 }, { "epoch": 0.14668234968518645, "grad_norm": 0.2034844160079956, "learning_rate": 1e-05, "loss": 0.6006, "step": 530 }, { "epoch": 0.14695910883553587, "grad_norm": 0.20306435227394104, "learning_rate": 1e-05, "loss": 0.6205, "step": 531 }, { "epoch": 0.14723586798588528, "grad_norm": 0.17797498404979706, "learning_rate": 1e-05, "loss": 0.5843, "step": 532 }, { "epoch": 0.1475126271362347, "grad_norm": 0.21485963463783264, "learning_rate": 1e-05, "loss": 0.5895, "step": 533 }, { "epoch": 0.1477893862865841, "grad_norm": 0.19315105676651, "learning_rate": 1e-05, "loss": 0.5754, "step": 534 }, { "epoch": 0.1480661454369335, "grad_norm": 0.2330632358789444, "learning_rate": 1e-05, "loss": 0.6062, "step": 535 }, { "epoch": 0.14834290458728291, "grad_norm": 0.18280082941055298, "learning_rate": 1e-05, "loss": 0.6152, "step": 536 }, { "epoch": 0.14861966373763233, "grad_norm": 0.20307470858097076, "learning_rate": 1e-05, "loss": 0.6313, "step": 537 }, { "epoch": 0.14889642288798174, "grad_norm": 0.22527331113815308, "learning_rate": 1e-05, "loss": 0.605, "step": 538 }, { "epoch": 0.14917318203833113, "grad_norm": 0.18950757384300232, "learning_rate": 1e-05, "loss": 0.6062, "step": 539 }, { "epoch": 0.14944994118868055, "grad_norm": 0.18161363899707794, "learning_rate": 1e-05, "loss": 0.5963, "step": 540 }, { "epoch": 0.14972670033902996, "grad_norm": 0.20989076793193817, "learning_rate": 1e-05, "loss": 0.5757, "step": 541 }, { "epoch": 0.15000345948937938, "grad_norm": 0.21076437830924988, "learning_rate": 1e-05, "loss": 0.5856, "step": 542 }, { "epoch": 0.15028021863972876, "grad_norm": 0.19914399087429047, "learning_rate": 1e-05, "loss": 0.6112, "step": 543 }, { "epoch": 0.15055697779007818, "grad_norm": 0.19186805188655853, "learning_rate": 1e-05, "loss": 0.5829, "step": 544 }, { "epoch": 0.1508337369404276, "grad_norm": 0.21969106793403625, "learning_rate": 1e-05, "loss": 0.599, "step": 545 }, { "epoch": 0.151110496090777, "grad_norm": 0.22894170880317688, "learning_rate": 1e-05, "loss": 0.6151, "step": 546 }, { "epoch": 0.15138725524112642, "grad_norm": 0.20425210893154144, "learning_rate": 1e-05, "loss": 0.5598, "step": 547 }, { "epoch": 0.1516640143914758, "grad_norm": 0.18593771755695343, "learning_rate": 1e-05, "loss": 0.5898, "step": 548 }, { "epoch": 0.15194077354182522, "grad_norm": 0.19039373099803925, "learning_rate": 1e-05, "loss": 0.6113, "step": 549 }, { "epoch": 0.15221753269217464, "grad_norm": 0.20003843307495117, "learning_rate": 1e-05, "loss": 0.5527, "step": 550 }, { "epoch": 0.15249429184252405, "grad_norm": 0.18763229250907898, "learning_rate": 1e-05, "loss": 0.604, "step": 551 }, { "epoch": 0.15277105099287344, "grad_norm": 0.17885464429855347, "learning_rate": 1e-05, "loss": 0.5867, "step": 552 }, { "epoch": 0.15304781014322286, "grad_norm": 0.19012632966041565, "learning_rate": 1e-05, "loss": 0.6041, "step": 553 }, { "epoch": 0.15332456929357227, "grad_norm": 0.1861179769039154, "learning_rate": 1e-05, "loss": 0.5594, "step": 554 }, { "epoch": 0.1536013284439217, "grad_norm": 0.1847517490386963, "learning_rate": 1e-05, "loss": 0.5933, "step": 555 }, { "epoch": 0.15387808759427107, "grad_norm": 0.1902363896369934, "learning_rate": 1e-05, "loss": 0.6329, "step": 556 }, { "epoch": 0.1541548467446205, "grad_norm": 0.19042816758155823, "learning_rate": 1e-05, "loss": 0.5919, "step": 557 }, { "epoch": 0.1544316058949699, "grad_norm": 0.20321138203144073, "learning_rate": 1e-05, "loss": 0.6212, "step": 558 }, { "epoch": 0.15470836504531932, "grad_norm": 0.18109048902988434, "learning_rate": 1e-05, "loss": 0.5782, "step": 559 }, { "epoch": 0.15498512419566873, "grad_norm": 0.19851604104042053, "learning_rate": 1e-05, "loss": 0.576, "step": 560 }, { "epoch": 0.15526188334601812, "grad_norm": 0.1873910278081894, "learning_rate": 1e-05, "loss": 0.575, "step": 561 }, { "epoch": 0.15553864249636754, "grad_norm": 0.21075931191444397, "learning_rate": 1e-05, "loss": 0.628, "step": 562 }, { "epoch": 0.15581540164671695, "grad_norm": 0.1850409358739853, "learning_rate": 1e-05, "loss": 0.5721, "step": 563 }, { "epoch": 0.15609216079706636, "grad_norm": 0.18616290390491486, "learning_rate": 1e-05, "loss": 0.5818, "step": 564 }, { "epoch": 0.15636891994741575, "grad_norm": 0.19328425824642181, "learning_rate": 1e-05, "loss": 0.6013, "step": 565 }, { "epoch": 0.15664567909776517, "grad_norm": 0.21102961897850037, "learning_rate": 1e-05, "loss": 0.609, "step": 566 }, { "epoch": 0.15692243824811458, "grad_norm": 0.19346831738948822, "learning_rate": 1e-05, "loss": 0.598, "step": 567 }, { "epoch": 0.157199197398464, "grad_norm": 0.19231358170509338, "learning_rate": 1e-05, "loss": 0.5786, "step": 568 }, { "epoch": 0.15747595654881338, "grad_norm": 0.18777577579021454, "learning_rate": 1e-05, "loss": 0.5752, "step": 569 }, { "epoch": 0.1577527156991628, "grad_norm": 0.22667695581912994, "learning_rate": 1e-05, "loss": 0.5709, "step": 570 }, { "epoch": 0.1580294748495122, "grad_norm": 0.17205539345741272, "learning_rate": 1e-05, "loss": 0.5563, "step": 571 }, { "epoch": 0.15830623399986163, "grad_norm": 0.1781681925058365, "learning_rate": 1e-05, "loss": 0.6164, "step": 572 }, { "epoch": 0.15858299315021102, "grad_norm": 0.19936242699623108, "learning_rate": 1e-05, "loss": 0.5784, "step": 573 }, { "epoch": 0.15885975230056043, "grad_norm": 0.18174012005329132, "learning_rate": 1e-05, "loss": 0.5809, "step": 574 }, { "epoch": 0.15913651145090985, "grad_norm": 0.18689844012260437, "learning_rate": 1e-05, "loss": 0.6121, "step": 575 }, { "epoch": 0.15941327060125926, "grad_norm": 0.1878029704093933, "learning_rate": 1e-05, "loss": 0.5651, "step": 576 }, { "epoch": 0.15969002975160868, "grad_norm": 0.2152654230594635, "learning_rate": 1e-05, "loss": 0.5762, "step": 577 }, { "epoch": 0.15996678890195806, "grad_norm": 0.1833391785621643, "learning_rate": 1e-05, "loss": 0.6272, "step": 578 }, { "epoch": 0.16024354805230748, "grad_norm": 0.1884106695652008, "learning_rate": 1e-05, "loss": 0.6179, "step": 579 }, { "epoch": 0.1605203072026569, "grad_norm": 0.18467047810554504, "learning_rate": 1e-05, "loss": 0.5933, "step": 580 }, { "epoch": 0.1607970663530063, "grad_norm": 0.18447092175483704, "learning_rate": 1e-05, "loss": 0.6079, "step": 581 }, { "epoch": 0.1610738255033557, "grad_norm": 0.20124220848083496, "learning_rate": 1e-05, "loss": 0.607, "step": 582 }, { "epoch": 0.1613505846537051, "grad_norm": 0.1844351589679718, "learning_rate": 1e-05, "loss": 0.5993, "step": 583 }, { "epoch": 0.16162734380405452, "grad_norm": 0.17676995694637299, "learning_rate": 1e-05, "loss": 0.6068, "step": 584 }, { "epoch": 0.16190410295440394, "grad_norm": 0.1932850480079651, "learning_rate": 1e-05, "loss": 0.5916, "step": 585 }, { "epoch": 0.16218086210475333, "grad_norm": 0.18112710118293762, "learning_rate": 1e-05, "loss": 0.5785, "step": 586 }, { "epoch": 0.16245762125510274, "grad_norm": 0.1780952513217926, "learning_rate": 1e-05, "loss": 0.5641, "step": 587 }, { "epoch": 0.16273438040545216, "grad_norm": 0.19645068049430847, "learning_rate": 1e-05, "loss": 0.5899, "step": 588 }, { "epoch": 0.16301113955580157, "grad_norm": 0.18219690024852753, "learning_rate": 1e-05, "loss": 0.5996, "step": 589 }, { "epoch": 0.16328789870615099, "grad_norm": 0.1953161209821701, "learning_rate": 1e-05, "loss": 0.569, "step": 590 }, { "epoch": 0.16356465785650037, "grad_norm": 0.1812022477388382, "learning_rate": 1e-05, "loss": 0.5742, "step": 591 }, { "epoch": 0.1638414170068498, "grad_norm": 0.18496783077716827, "learning_rate": 1e-05, "loss": 0.5806, "step": 592 }, { "epoch": 0.1641181761571992, "grad_norm": 0.1851193755865097, "learning_rate": 1e-05, "loss": 0.5791, "step": 593 }, { "epoch": 0.16439493530754862, "grad_norm": 0.18707747757434845, "learning_rate": 1e-05, "loss": 0.5976, "step": 594 }, { "epoch": 0.164671694457898, "grad_norm": 0.1921873539686203, "learning_rate": 1e-05, "loss": 0.6498, "step": 595 }, { "epoch": 0.16494845360824742, "grad_norm": 0.19136972725391388, "learning_rate": 1e-05, "loss": 0.5915, "step": 596 }, { "epoch": 0.16522521275859683, "grad_norm": 0.17809490859508514, "learning_rate": 1e-05, "loss": 0.5917, "step": 597 }, { "epoch": 0.16550197190894625, "grad_norm": 0.17912352085113525, "learning_rate": 1e-05, "loss": 0.5871, "step": 598 }, { "epoch": 0.16577873105929564, "grad_norm": 0.1936812549829483, "learning_rate": 1e-05, "loss": 0.6017, "step": 599 }, { "epoch": 0.16605549020964505, "grad_norm": 0.19793425500392914, "learning_rate": 1e-05, "loss": 0.5627, "step": 600 }, { "epoch": 0.16633224935999447, "grad_norm": 0.18367448449134827, "learning_rate": 1e-05, "loss": 0.5855, "step": 601 }, { "epoch": 0.16660900851034388, "grad_norm": 0.19004136323928833, "learning_rate": 1e-05, "loss": 0.5558, "step": 602 }, { "epoch": 0.16688576766069327, "grad_norm": 0.20491254329681396, "learning_rate": 1e-05, "loss": 0.6189, "step": 603 }, { "epoch": 0.16716252681104268, "grad_norm": 0.19423207640647888, "learning_rate": 1e-05, "loss": 0.5952, "step": 604 }, { "epoch": 0.1674392859613921, "grad_norm": 0.1994161456823349, "learning_rate": 1e-05, "loss": 0.5762, "step": 605 }, { "epoch": 0.1677160451117415, "grad_norm": 0.18722465634346008, "learning_rate": 1e-05, "loss": 0.6212, "step": 606 }, { "epoch": 0.16799280426209093, "grad_norm": 0.19015635550022125, "learning_rate": 1e-05, "loss": 0.5864, "step": 607 }, { "epoch": 0.16826956341244032, "grad_norm": 0.18962502479553223, "learning_rate": 1e-05, "loss": 0.5994, "step": 608 }, { "epoch": 0.16854632256278973, "grad_norm": 0.18032273650169373, "learning_rate": 1e-05, "loss": 0.5841, "step": 609 }, { "epoch": 0.16882308171313914, "grad_norm": 0.1772097647190094, "learning_rate": 1e-05, "loss": 0.5773, "step": 610 }, { "epoch": 0.16909984086348856, "grad_norm": 0.2008288949728012, "learning_rate": 1e-05, "loss": 0.5787, "step": 611 }, { "epoch": 0.16937660001383795, "grad_norm": 0.18484532833099365, "learning_rate": 1e-05, "loss": 0.5939, "step": 612 }, { "epoch": 0.16965335916418736, "grad_norm": 0.18176114559173584, "learning_rate": 1e-05, "loss": 0.5814, "step": 613 }, { "epoch": 0.16993011831453678, "grad_norm": 0.206057608127594, "learning_rate": 1e-05, "loss": 0.5606, "step": 614 }, { "epoch": 0.1702068774648862, "grad_norm": 0.20997023582458496, "learning_rate": 1e-05, "loss": 0.613, "step": 615 }, { "epoch": 0.17048363661523558, "grad_norm": 0.18715348839759827, "learning_rate": 1e-05, "loss": 0.5962, "step": 616 }, { "epoch": 0.170760395765585, "grad_norm": 0.21124491095542908, "learning_rate": 1e-05, "loss": 0.5837, "step": 617 }, { "epoch": 0.1710371549159344, "grad_norm": 0.1852397471666336, "learning_rate": 1e-05, "loss": 0.5817, "step": 618 }, { "epoch": 0.17131391406628382, "grad_norm": 0.19539125263690948, "learning_rate": 1e-05, "loss": 0.598, "step": 619 }, { "epoch": 0.17159067321663324, "grad_norm": 0.18463635444641113, "learning_rate": 1e-05, "loss": 0.5781, "step": 620 }, { "epoch": 0.17186743236698263, "grad_norm": 0.1878219097852707, "learning_rate": 1e-05, "loss": 0.6003, "step": 621 }, { "epoch": 0.17214419151733204, "grad_norm": 0.18427549302577972, "learning_rate": 1e-05, "loss": 0.6162, "step": 622 }, { "epoch": 0.17242095066768146, "grad_norm": 0.1873679906129837, "learning_rate": 1e-05, "loss": 0.5851, "step": 623 }, { "epoch": 0.17269770981803087, "grad_norm": 0.1959691196680069, "learning_rate": 1e-05, "loss": 0.6018, "step": 624 }, { "epoch": 0.17297446896838026, "grad_norm": 0.18951405584812164, "learning_rate": 1e-05, "loss": 0.6045, "step": 625 }, { "epoch": 0.17325122811872967, "grad_norm": 0.20031677186489105, "learning_rate": 1e-05, "loss": 0.6112, "step": 626 }, { "epoch": 0.1735279872690791, "grad_norm": 0.18173369765281677, "learning_rate": 1e-05, "loss": 0.5839, "step": 627 }, { "epoch": 0.1738047464194285, "grad_norm": 0.18123826384544373, "learning_rate": 1e-05, "loss": 0.5729, "step": 628 }, { "epoch": 0.1740815055697779, "grad_norm": 0.17724889516830444, "learning_rate": 1e-05, "loss": 0.5807, "step": 629 }, { "epoch": 0.1743582647201273, "grad_norm": 0.18829111754894257, "learning_rate": 1e-05, "loss": 0.5966, "step": 630 }, { "epoch": 0.17463502387047672, "grad_norm": 0.20029404759407043, "learning_rate": 1e-05, "loss": 0.5913, "step": 631 }, { "epoch": 0.17491178302082613, "grad_norm": 0.21502263844013214, "learning_rate": 1e-05, "loss": 0.609, "step": 632 }, { "epoch": 0.17518854217117552, "grad_norm": 0.1850699931383133, "learning_rate": 1e-05, "loss": 0.5715, "step": 633 }, { "epoch": 0.17546530132152494, "grad_norm": 0.22841838002204895, "learning_rate": 1e-05, "loss": 0.5586, "step": 634 }, { "epoch": 0.17574206047187435, "grad_norm": 0.19472545385360718, "learning_rate": 1e-05, "loss": 0.5997, "step": 635 }, { "epoch": 0.17601881962222377, "grad_norm": 0.18416079878807068, "learning_rate": 1e-05, "loss": 0.6168, "step": 636 }, { "epoch": 0.17629557877257318, "grad_norm": 0.2019299864768982, "learning_rate": 1e-05, "loss": 0.5687, "step": 637 }, { "epoch": 0.17657233792292257, "grad_norm": 0.17828074097633362, "learning_rate": 1e-05, "loss": 0.6002, "step": 638 }, { "epoch": 0.17684909707327198, "grad_norm": 0.18352048099040985, "learning_rate": 1e-05, "loss": 0.5676, "step": 639 }, { "epoch": 0.1771258562236214, "grad_norm": 0.1852838099002838, "learning_rate": 1e-05, "loss": 0.5887, "step": 640 }, { "epoch": 0.1774026153739708, "grad_norm": 0.23459641635417938, "learning_rate": 1e-05, "loss": 0.5814, "step": 641 }, { "epoch": 0.1776793745243202, "grad_norm": 0.24283069372177124, "learning_rate": 1e-05, "loss": 0.6, "step": 642 }, { "epoch": 0.17795613367466961, "grad_norm": 0.24392537772655487, "learning_rate": 1e-05, "loss": 0.607, "step": 643 }, { "epoch": 0.17823289282501903, "grad_norm": 0.3690929114818573, "learning_rate": 1e-05, "loss": 0.5867, "step": 644 }, { "epoch": 0.17850965197536844, "grad_norm": 0.21599270403385162, "learning_rate": 1e-05, "loss": 0.6051, "step": 645 }, { "epoch": 0.17878641112571783, "grad_norm": 0.19567495584487915, "learning_rate": 1e-05, "loss": 0.612, "step": 646 }, { "epoch": 0.17906317027606725, "grad_norm": 0.1907491832971573, "learning_rate": 1e-05, "loss": 0.5786, "step": 647 }, { "epoch": 0.17933992942641666, "grad_norm": 0.20501020550727844, "learning_rate": 1e-05, "loss": 0.6041, "step": 648 }, { "epoch": 0.17961668857676608, "grad_norm": 0.19061295688152313, "learning_rate": 1e-05, "loss": 0.5874, "step": 649 }, { "epoch": 0.1798934477271155, "grad_norm": 0.18138563632965088, "learning_rate": 1e-05, "loss": 0.5224, "step": 650 }, { "epoch": 0.18017020687746488, "grad_norm": 0.18018782138824463, "learning_rate": 1e-05, "loss": 0.5635, "step": 651 }, { "epoch": 0.1804469660278143, "grad_norm": 0.22037748992443085, "learning_rate": 1e-05, "loss": 0.595, "step": 652 }, { "epoch": 0.1807237251781637, "grad_norm": 0.17819008231163025, "learning_rate": 1e-05, "loss": 0.5696, "step": 653 }, { "epoch": 0.18100048432851312, "grad_norm": 0.2292039543390274, "learning_rate": 1e-05, "loss": 0.6074, "step": 654 }, { "epoch": 0.1812772434788625, "grad_norm": 0.19042913615703583, "learning_rate": 1e-05, "loss": 0.586, "step": 655 }, { "epoch": 0.18155400262921192, "grad_norm": 0.19912151992321014, "learning_rate": 1e-05, "loss": 0.5926, "step": 656 }, { "epoch": 0.18183076177956134, "grad_norm": 0.183335542678833, "learning_rate": 1e-05, "loss": 0.586, "step": 657 }, { "epoch": 0.18210752092991075, "grad_norm": 0.1976127177476883, "learning_rate": 1e-05, "loss": 0.5965, "step": 658 }, { "epoch": 0.18238428008026014, "grad_norm": 0.18121112883090973, "learning_rate": 1e-05, "loss": 0.5646, "step": 659 }, { "epoch": 0.18266103923060956, "grad_norm": 0.18170765042304993, "learning_rate": 1e-05, "loss": 0.5875, "step": 660 }, { "epoch": 0.18293779838095897, "grad_norm": 0.18556669354438782, "learning_rate": 1e-05, "loss": 0.6145, "step": 661 }, { "epoch": 0.1832145575313084, "grad_norm": 0.1879902482032776, "learning_rate": 1e-05, "loss": 0.5916, "step": 662 }, { "epoch": 0.1834913166816578, "grad_norm": 0.17723216116428375, "learning_rate": 1e-05, "loss": 0.5789, "step": 663 }, { "epoch": 0.1837680758320072, "grad_norm": 0.2062070518732071, "learning_rate": 1e-05, "loss": 0.6163, "step": 664 }, { "epoch": 0.1840448349823566, "grad_norm": 0.19314047694206238, "learning_rate": 1e-05, "loss": 0.5636, "step": 665 }, { "epoch": 0.18432159413270602, "grad_norm": 0.17998869717121124, "learning_rate": 1e-05, "loss": 0.5916, "step": 666 }, { "epoch": 0.18459835328305543, "grad_norm": 0.1841798722743988, "learning_rate": 1e-05, "loss": 0.6123, "step": 667 }, { "epoch": 0.18487511243340482, "grad_norm": 0.18516206741333008, "learning_rate": 1e-05, "loss": 0.604, "step": 668 }, { "epoch": 0.18515187158375424, "grad_norm": 0.1783057451248169, "learning_rate": 1e-05, "loss": 0.6295, "step": 669 }, { "epoch": 0.18542863073410365, "grad_norm": 0.18766267597675323, "learning_rate": 1e-05, "loss": 0.6096, "step": 670 }, { "epoch": 0.18570538988445306, "grad_norm": 0.1826966255903244, "learning_rate": 1e-05, "loss": 0.6062, "step": 671 }, { "epoch": 0.18598214903480245, "grad_norm": 0.17895442247390747, "learning_rate": 1e-05, "loss": 0.5718, "step": 672 }, { "epoch": 0.18625890818515187, "grad_norm": 0.1950952112674713, "learning_rate": 1e-05, "loss": 0.5812, "step": 673 }, { "epoch": 0.18653566733550128, "grad_norm": 0.1864253133535385, "learning_rate": 1e-05, "loss": 0.6064, "step": 674 }, { "epoch": 0.1868124264858507, "grad_norm": 0.17804020643234253, "learning_rate": 1e-05, "loss": 0.6128, "step": 675 }, { "epoch": 0.18708918563620008, "grad_norm": 0.1820497065782547, "learning_rate": 1e-05, "loss": 0.5719, "step": 676 }, { "epoch": 0.1873659447865495, "grad_norm": 0.18617863953113556, "learning_rate": 1e-05, "loss": 0.601, "step": 677 }, { "epoch": 0.1876427039368989, "grad_norm": 0.18955466151237488, "learning_rate": 1e-05, "loss": 0.6386, "step": 678 }, { "epoch": 0.18791946308724833, "grad_norm": 0.17780135571956635, "learning_rate": 1e-05, "loss": 0.6047, "step": 679 }, { "epoch": 0.18819622223759774, "grad_norm": 0.1745823621749878, "learning_rate": 1e-05, "loss": 0.5908, "step": 680 }, { "epoch": 0.18847298138794713, "grad_norm": 0.2052975744009018, "learning_rate": 1e-05, "loss": 0.5903, "step": 681 }, { "epoch": 0.18874974053829655, "grad_norm": 0.1906580924987793, "learning_rate": 1e-05, "loss": 0.5696, "step": 682 }, { "epoch": 0.18902649968864596, "grad_norm": 0.19474008679389954, "learning_rate": 1e-05, "loss": 0.6093, "step": 683 }, { "epoch": 0.18930325883899538, "grad_norm": 0.19493243098258972, "learning_rate": 1e-05, "loss": 0.6077, "step": 684 }, { "epoch": 0.18958001798934476, "grad_norm": 0.18819427490234375, "learning_rate": 1e-05, "loss": 0.6141, "step": 685 }, { "epoch": 0.18985677713969418, "grad_norm": 0.17513544857501984, "learning_rate": 1e-05, "loss": 0.5773, "step": 686 }, { "epoch": 0.1901335362900436, "grad_norm": 0.19603599607944489, "learning_rate": 1e-05, "loss": 0.6018, "step": 687 }, { "epoch": 0.190410295440393, "grad_norm": 0.18084825575351715, "learning_rate": 1e-05, "loss": 0.5945, "step": 688 }, { "epoch": 0.1906870545907424, "grad_norm": 0.19107845425605774, "learning_rate": 1e-05, "loss": 0.6098, "step": 689 }, { "epoch": 0.1909638137410918, "grad_norm": 0.17983633279800415, "learning_rate": 1e-05, "loss": 0.5843, "step": 690 }, { "epoch": 0.19124057289144122, "grad_norm": 0.1856430619955063, "learning_rate": 1e-05, "loss": 0.6096, "step": 691 }, { "epoch": 0.19151733204179064, "grad_norm": 0.1811629682779312, "learning_rate": 1e-05, "loss": 0.6211, "step": 692 }, { "epoch": 0.19179409119214005, "grad_norm": 0.19463561475276947, "learning_rate": 1e-05, "loss": 0.5714, "step": 693 }, { "epoch": 0.19207085034248944, "grad_norm": 0.19207587838172913, "learning_rate": 1e-05, "loss": 0.6096, "step": 694 }, { "epoch": 0.19234760949283886, "grad_norm": 0.19731508195400238, "learning_rate": 1e-05, "loss": 0.5851, "step": 695 }, { "epoch": 0.19262436864318827, "grad_norm": 0.19325734674930573, "learning_rate": 1e-05, "loss": 0.5761, "step": 696 }, { "epoch": 0.19290112779353769, "grad_norm": 0.1881234496831894, "learning_rate": 1e-05, "loss": 0.5532, "step": 697 }, { "epoch": 0.19317788694388707, "grad_norm": 0.19624708592891693, "learning_rate": 1e-05, "loss": 0.5814, "step": 698 }, { "epoch": 0.1934546460942365, "grad_norm": 0.20489643514156342, "learning_rate": 1e-05, "loss": 0.5699, "step": 699 }, { "epoch": 0.1937314052445859, "grad_norm": 0.19450590014457703, "learning_rate": 1e-05, "loss": 0.646, "step": 700 }, { "epoch": 0.19400816439493532, "grad_norm": 0.21459051966667175, "learning_rate": 1e-05, "loss": 0.5993, "step": 701 }, { "epoch": 0.1942849235452847, "grad_norm": 0.1794232726097107, "learning_rate": 1e-05, "loss": 0.5772, "step": 702 }, { "epoch": 0.19456168269563412, "grad_norm": 0.1696508526802063, "learning_rate": 1e-05, "loss": 0.5707, "step": 703 }, { "epoch": 0.19483844184598353, "grad_norm": 0.19712381064891815, "learning_rate": 1e-05, "loss": 0.5808, "step": 704 }, { "epoch": 0.19511520099633295, "grad_norm": 0.1869281381368637, "learning_rate": 1e-05, "loss": 0.5878, "step": 705 }, { "epoch": 0.19539196014668234, "grad_norm": 0.17912180721759796, "learning_rate": 1e-05, "loss": 0.5892, "step": 706 }, { "epoch": 0.19566871929703175, "grad_norm": 0.20578134059906006, "learning_rate": 1e-05, "loss": 0.5783, "step": 707 }, { "epoch": 0.19594547844738117, "grad_norm": 0.1926405131816864, "learning_rate": 1e-05, "loss": 0.5838, "step": 708 }, { "epoch": 0.19622223759773058, "grad_norm": 0.1833278089761734, "learning_rate": 1e-05, "loss": 0.5894, "step": 709 }, { "epoch": 0.19649899674808, "grad_norm": 0.16988970339298248, "learning_rate": 1e-05, "loss": 0.5907, "step": 710 }, { "epoch": 0.19677575589842938, "grad_norm": 0.19566982984542847, "learning_rate": 1e-05, "loss": 0.6061, "step": 711 }, { "epoch": 0.1970525150487788, "grad_norm": 0.18078204989433289, "learning_rate": 1e-05, "loss": 0.557, "step": 712 }, { "epoch": 0.1973292741991282, "grad_norm": 0.1887751817703247, "learning_rate": 1e-05, "loss": 0.6015, "step": 713 }, { "epoch": 0.19760603334947763, "grad_norm": 0.18374057114124298, "learning_rate": 1e-05, "loss": 0.6259, "step": 714 }, { "epoch": 0.19788279249982701, "grad_norm": 0.19418522715568542, "learning_rate": 1e-05, "loss": 0.6075, "step": 715 }, { "epoch": 0.19815955165017643, "grad_norm": 0.18870128691196442, "learning_rate": 1e-05, "loss": 0.587, "step": 716 }, { "epoch": 0.19843631080052584, "grad_norm": 0.18979854881763458, "learning_rate": 1e-05, "loss": 0.5878, "step": 717 }, { "epoch": 0.19871306995087526, "grad_norm": 0.18741941452026367, "learning_rate": 1e-05, "loss": 0.5681, "step": 718 }, { "epoch": 0.19898982910122465, "grad_norm": 0.1813717633485794, "learning_rate": 1e-05, "loss": 0.6053, "step": 719 }, { "epoch": 0.19926658825157406, "grad_norm": 0.1801595538854599, "learning_rate": 1e-05, "loss": 0.5824, "step": 720 }, { "epoch": 0.19954334740192348, "grad_norm": 0.1840808242559433, "learning_rate": 1e-05, "loss": 0.6071, "step": 721 }, { "epoch": 0.1998201065522729, "grad_norm": 0.18150578439235687, "learning_rate": 1e-05, "loss": 0.5639, "step": 722 }, { "epoch": 0.2000968657026223, "grad_norm": 0.19364473223686218, "learning_rate": 1e-05, "loss": 0.5954, "step": 723 }, { "epoch": 0.2003736248529717, "grad_norm": 0.18236809968948364, "learning_rate": 1e-05, "loss": 0.5847, "step": 724 }, { "epoch": 0.2006503840033211, "grad_norm": 0.19149993360042572, "learning_rate": 1e-05, "loss": 0.6059, "step": 725 }, { "epoch": 0.20092714315367052, "grad_norm": 0.17820106446743011, "learning_rate": 1e-05, "loss": 0.5744, "step": 726 }, { "epoch": 0.20120390230401994, "grad_norm": 0.19103458523750305, "learning_rate": 1e-05, "loss": 0.58, "step": 727 }, { "epoch": 0.20148066145436933, "grad_norm": 0.19411242008209229, "learning_rate": 1e-05, "loss": 0.5677, "step": 728 }, { "epoch": 0.20175742060471874, "grad_norm": 0.18748793005943298, "learning_rate": 1e-05, "loss": 0.5898, "step": 729 }, { "epoch": 0.20203417975506816, "grad_norm": 0.17295505106449127, "learning_rate": 1e-05, "loss": 0.5478, "step": 730 }, { "epoch": 0.20231093890541757, "grad_norm": 0.1901765614748001, "learning_rate": 1e-05, "loss": 0.6062, "step": 731 }, { "epoch": 0.20258769805576696, "grad_norm": 0.1956249624490738, "learning_rate": 1e-05, "loss": 0.5688, "step": 732 }, { "epoch": 0.20286445720611637, "grad_norm": 0.182984858751297, "learning_rate": 1e-05, "loss": 0.5977, "step": 733 }, { "epoch": 0.2031412163564658, "grad_norm": 0.17999690771102905, "learning_rate": 1e-05, "loss": 0.561, "step": 734 }, { "epoch": 0.2034179755068152, "grad_norm": 0.1889934092760086, "learning_rate": 1e-05, "loss": 0.5506, "step": 735 }, { "epoch": 0.2036947346571646, "grad_norm": 0.17668218910694122, "learning_rate": 1e-05, "loss": 0.579, "step": 736 }, { "epoch": 0.203971493807514, "grad_norm": 0.18725812435150146, "learning_rate": 1e-05, "loss": 0.5798, "step": 737 }, { "epoch": 0.20424825295786342, "grad_norm": 0.18451805412769318, "learning_rate": 1e-05, "loss": 0.5877, "step": 738 }, { "epoch": 0.20452501210821283, "grad_norm": 0.17329992353916168, "learning_rate": 1e-05, "loss": 0.5637, "step": 739 }, { "epoch": 0.20480177125856225, "grad_norm": 0.1794307678937912, "learning_rate": 1e-05, "loss": 0.5961, "step": 740 }, { "epoch": 0.20507853040891164, "grad_norm": 0.17594371736049652, "learning_rate": 1e-05, "loss": 0.5923, "step": 741 }, { "epoch": 0.20535528955926105, "grad_norm": 0.1836751252412796, "learning_rate": 1e-05, "loss": 0.5899, "step": 742 }, { "epoch": 0.20563204870961047, "grad_norm": 0.1916855275630951, "learning_rate": 1e-05, "loss": 0.5705, "step": 743 }, { "epoch": 0.20590880785995988, "grad_norm": 0.19249284267425537, "learning_rate": 1e-05, "loss": 0.605, "step": 744 }, { "epoch": 0.20618556701030927, "grad_norm": 0.17222891747951508, "learning_rate": 1e-05, "loss": 0.5994, "step": 745 }, { "epoch": 0.20646232616065868, "grad_norm": 0.18759506940841675, "learning_rate": 1e-05, "loss": 0.6015, "step": 746 }, { "epoch": 0.2067390853110081, "grad_norm": 0.20155805349349976, "learning_rate": 1e-05, "loss": 0.6027, "step": 747 }, { "epoch": 0.2070158444613575, "grad_norm": 0.195786714553833, "learning_rate": 1e-05, "loss": 0.5626, "step": 748 }, { "epoch": 0.2072926036117069, "grad_norm": 0.178785502910614, "learning_rate": 1e-05, "loss": 0.6043, "step": 749 }, { "epoch": 0.20756936276205631, "grad_norm": 0.17319819331169128, "learning_rate": 1e-05, "loss": 0.5825, "step": 750 }, { "epoch": 0.20784612191240573, "grad_norm": 0.19927729666233063, "learning_rate": 1e-05, "loss": 0.599, "step": 751 }, { "epoch": 0.20812288106275514, "grad_norm": 0.1771668642759323, "learning_rate": 1e-05, "loss": 0.5558, "step": 752 }, { "epoch": 0.20839964021310456, "grad_norm": 0.17616140842437744, "learning_rate": 1e-05, "loss": 0.5699, "step": 753 }, { "epoch": 0.20867639936345395, "grad_norm": 0.1749933958053589, "learning_rate": 1e-05, "loss": 0.5944, "step": 754 }, { "epoch": 0.20895315851380336, "grad_norm": 0.17741666734218597, "learning_rate": 1e-05, "loss": 0.5807, "step": 755 }, { "epoch": 0.20922991766415278, "grad_norm": 0.1812400370836258, "learning_rate": 1e-05, "loss": 0.5677, "step": 756 }, { "epoch": 0.2095066768145022, "grad_norm": 0.1813507080078125, "learning_rate": 1e-05, "loss": 0.5486, "step": 757 }, { "epoch": 0.20978343596485158, "grad_norm": 0.1814769208431244, "learning_rate": 1e-05, "loss": 0.5726, "step": 758 }, { "epoch": 0.210060195115201, "grad_norm": 0.17289644479751587, "learning_rate": 1e-05, "loss": 0.6077, "step": 759 }, { "epoch": 0.2103369542655504, "grad_norm": 0.18788468837738037, "learning_rate": 1e-05, "loss": 0.607, "step": 760 }, { "epoch": 0.21061371341589982, "grad_norm": 0.1812230944633484, "learning_rate": 1e-05, "loss": 0.5771, "step": 761 }, { "epoch": 0.2108904725662492, "grad_norm": 0.1761406511068344, "learning_rate": 1e-05, "loss": 0.5987, "step": 762 }, { "epoch": 0.21116723171659862, "grad_norm": 0.18188636004924774, "learning_rate": 1e-05, "loss": 0.5702, "step": 763 }, { "epoch": 0.21144399086694804, "grad_norm": 0.1833881139755249, "learning_rate": 1e-05, "loss": 0.6038, "step": 764 }, { "epoch": 0.21172075001729745, "grad_norm": 0.17766286432743073, "learning_rate": 1e-05, "loss": 0.5592, "step": 765 }, { "epoch": 0.21199750916764684, "grad_norm": 0.19013483822345734, "learning_rate": 1e-05, "loss": 0.5356, "step": 766 }, { "epoch": 0.21227426831799626, "grad_norm": 0.19557508826255798, "learning_rate": 1e-05, "loss": 0.6049, "step": 767 }, { "epoch": 0.21255102746834567, "grad_norm": 0.18069817125797272, "learning_rate": 1e-05, "loss": 0.5709, "step": 768 }, { "epoch": 0.2128277866186951, "grad_norm": 0.17254115641117096, "learning_rate": 1e-05, "loss": 0.5837, "step": 769 }, { "epoch": 0.2131045457690445, "grad_norm": 0.18449369072914124, "learning_rate": 1e-05, "loss": 0.6, "step": 770 }, { "epoch": 0.2133813049193939, "grad_norm": 0.18022947013378143, "learning_rate": 1e-05, "loss": 0.5598, "step": 771 }, { "epoch": 0.2136580640697433, "grad_norm": 0.19413843750953674, "learning_rate": 1e-05, "loss": 0.5586, "step": 772 }, { "epoch": 0.21393482322009272, "grad_norm": 0.1795695722103119, "learning_rate": 1e-05, "loss": 0.5936, "step": 773 }, { "epoch": 0.21421158237044213, "grad_norm": 0.18220441043376923, "learning_rate": 1e-05, "loss": 0.5696, "step": 774 }, { "epoch": 0.21448834152079152, "grad_norm": 0.18868398666381836, "learning_rate": 1e-05, "loss": 0.5978, "step": 775 }, { "epoch": 0.21476510067114093, "grad_norm": 0.18825623393058777, "learning_rate": 1e-05, "loss": 0.6316, "step": 776 }, { "epoch": 0.21504185982149035, "grad_norm": 0.1780831515789032, "learning_rate": 1e-05, "loss": 0.5665, "step": 777 }, { "epoch": 0.21531861897183976, "grad_norm": 0.18033717572689056, "learning_rate": 1e-05, "loss": 0.5626, "step": 778 }, { "epoch": 0.21559537812218915, "grad_norm": 0.18761968612670898, "learning_rate": 1e-05, "loss": 0.5859, "step": 779 }, { "epoch": 0.21587213727253857, "grad_norm": 0.18536265194416046, "learning_rate": 1e-05, "loss": 0.6178, "step": 780 }, { "epoch": 0.21614889642288798, "grad_norm": 0.18817423284053802, "learning_rate": 1e-05, "loss": 0.5886, "step": 781 }, { "epoch": 0.2164256555732374, "grad_norm": 0.20119550824165344, "learning_rate": 1e-05, "loss": 0.5879, "step": 782 }, { "epoch": 0.2167024147235868, "grad_norm": 0.18785789608955383, "learning_rate": 1e-05, "loss": 0.5772, "step": 783 }, { "epoch": 0.2169791738739362, "grad_norm": 0.1994706094264984, "learning_rate": 1e-05, "loss": 0.6099, "step": 784 }, { "epoch": 0.2172559330242856, "grad_norm": 0.19113539159297943, "learning_rate": 1e-05, "loss": 0.5788, "step": 785 }, { "epoch": 0.21753269217463503, "grad_norm": 0.1884009838104248, "learning_rate": 1e-05, "loss": 0.6072, "step": 786 }, { "epoch": 0.21780945132498444, "grad_norm": 0.18653731048107147, "learning_rate": 1e-05, "loss": 0.6114, "step": 787 }, { "epoch": 0.21808621047533383, "grad_norm": 0.18242208659648895, "learning_rate": 1e-05, "loss": 0.5577, "step": 788 }, { "epoch": 0.21836296962568325, "grad_norm": 0.19419406354427338, "learning_rate": 1e-05, "loss": 0.6077, "step": 789 }, { "epoch": 0.21863972877603266, "grad_norm": 0.18686731159687042, "learning_rate": 1e-05, "loss": 0.5618, "step": 790 }, { "epoch": 0.21891648792638208, "grad_norm": 0.18065311014652252, "learning_rate": 1e-05, "loss": 0.5981, "step": 791 }, { "epoch": 0.21919324707673146, "grad_norm": 0.18132862448692322, "learning_rate": 1e-05, "loss": 0.5741, "step": 792 }, { "epoch": 0.21947000622708088, "grad_norm": 0.1766415387392044, "learning_rate": 1e-05, "loss": 0.571, "step": 793 }, { "epoch": 0.2197467653774303, "grad_norm": 0.18362002074718475, "learning_rate": 1e-05, "loss": 0.6035, "step": 794 }, { "epoch": 0.2200235245277797, "grad_norm": 0.18916165828704834, "learning_rate": 1e-05, "loss": 0.5831, "step": 795 }, { "epoch": 0.22030028367812912, "grad_norm": 0.18922927975654602, "learning_rate": 1e-05, "loss": 0.6156, "step": 796 }, { "epoch": 0.2205770428284785, "grad_norm": 0.17428332567214966, "learning_rate": 1e-05, "loss": 0.5477, "step": 797 }, { "epoch": 0.22085380197882792, "grad_norm": 0.18591585755348206, "learning_rate": 1e-05, "loss": 0.5607, "step": 798 }, { "epoch": 0.22113056112917734, "grad_norm": 0.21150419116020203, "learning_rate": 1e-05, "loss": 0.5879, "step": 799 }, { "epoch": 0.22140732027952675, "grad_norm": 0.18309351801872253, "learning_rate": 1e-05, "loss": 0.5519, "step": 800 }, { "epoch": 0.22168407942987614, "grad_norm": 0.19577434659004211, "learning_rate": 1e-05, "loss": 0.5864, "step": 801 }, { "epoch": 0.22196083858022556, "grad_norm": 0.17148706316947937, "learning_rate": 1e-05, "loss": 0.5687, "step": 802 }, { "epoch": 0.22223759773057497, "grad_norm": 0.1913692057132721, "learning_rate": 1e-05, "loss": 0.5729, "step": 803 }, { "epoch": 0.22251435688092439, "grad_norm": 0.20137150585651398, "learning_rate": 1e-05, "loss": 0.5814, "step": 804 }, { "epoch": 0.22279111603127377, "grad_norm": 0.18024198710918427, "learning_rate": 1e-05, "loss": 0.6001, "step": 805 }, { "epoch": 0.2230678751816232, "grad_norm": 0.17712028324604034, "learning_rate": 1e-05, "loss": 0.5834, "step": 806 }, { "epoch": 0.2233446343319726, "grad_norm": 0.1830291748046875, "learning_rate": 1e-05, "loss": 0.5842, "step": 807 }, { "epoch": 0.22362139348232202, "grad_norm": 0.1823885142803192, "learning_rate": 1e-05, "loss": 0.5724, "step": 808 }, { "epoch": 0.2238981526326714, "grad_norm": 0.20578309893608093, "learning_rate": 1e-05, "loss": 0.5781, "step": 809 }, { "epoch": 0.22417491178302082, "grad_norm": 0.17908281087875366, "learning_rate": 1e-05, "loss": 0.5895, "step": 810 }, { "epoch": 0.22445167093337023, "grad_norm": 0.1738407462835312, "learning_rate": 1e-05, "loss": 0.547, "step": 811 }, { "epoch": 0.22472843008371965, "grad_norm": 0.17936022579669952, "learning_rate": 1e-05, "loss": 0.6056, "step": 812 }, { "epoch": 0.22500518923406906, "grad_norm": 0.17984023690223694, "learning_rate": 1e-05, "loss": 0.6053, "step": 813 }, { "epoch": 0.22528194838441845, "grad_norm": 0.1993889957666397, "learning_rate": 1e-05, "loss": 0.5816, "step": 814 }, { "epoch": 0.22555870753476787, "grad_norm": 0.1807403415441513, "learning_rate": 1e-05, "loss": 0.614, "step": 815 }, { "epoch": 0.22583546668511728, "grad_norm": 0.17234525084495544, "learning_rate": 1e-05, "loss": 0.5594, "step": 816 }, { "epoch": 0.2261122258354667, "grad_norm": 0.2055816352367401, "learning_rate": 1e-05, "loss": 0.5994, "step": 817 }, { "epoch": 0.22638898498581608, "grad_norm": 0.18701200187206268, "learning_rate": 1e-05, "loss": 0.6032, "step": 818 }, { "epoch": 0.2266657441361655, "grad_norm": 0.18526789546012878, "learning_rate": 1e-05, "loss": 0.5814, "step": 819 }, { "epoch": 0.2269425032865149, "grad_norm": 0.18391597270965576, "learning_rate": 1e-05, "loss": 0.5842, "step": 820 }, { "epoch": 0.22721926243686433, "grad_norm": 0.18416325747966766, "learning_rate": 1e-05, "loss": 0.5904, "step": 821 }, { "epoch": 0.22749602158721371, "grad_norm": 0.19105954468250275, "learning_rate": 1e-05, "loss": 0.5827, "step": 822 }, { "epoch": 0.22777278073756313, "grad_norm": 0.1767473965883255, "learning_rate": 1e-05, "loss": 0.5928, "step": 823 }, { "epoch": 0.22804953988791254, "grad_norm": 0.181941956281662, "learning_rate": 1e-05, "loss": 0.5586, "step": 824 }, { "epoch": 0.22832629903826196, "grad_norm": 0.19598273932933807, "learning_rate": 1e-05, "loss": 0.6033, "step": 825 }, { "epoch": 0.22860305818861137, "grad_norm": 0.17907202243804932, "learning_rate": 1e-05, "loss": 0.5764, "step": 826 }, { "epoch": 0.22887981733896076, "grad_norm": 0.17907939851284027, "learning_rate": 1e-05, "loss": 0.5863, "step": 827 }, { "epoch": 0.22915657648931018, "grad_norm": 0.18627694249153137, "learning_rate": 1e-05, "loss": 0.5873, "step": 828 }, { "epoch": 0.2294333356396596, "grad_norm": 0.18703977763652802, "learning_rate": 1e-05, "loss": 0.5614, "step": 829 }, { "epoch": 0.229710094790009, "grad_norm": 0.18716442584991455, "learning_rate": 1e-05, "loss": 0.586, "step": 830 }, { "epoch": 0.2299868539403584, "grad_norm": 0.17890872061252594, "learning_rate": 1e-05, "loss": 0.5685, "step": 831 }, { "epoch": 0.2302636130907078, "grad_norm": 0.19138532876968384, "learning_rate": 1e-05, "loss": 0.5663, "step": 832 }, { "epoch": 0.23054037224105722, "grad_norm": 0.19666707515716553, "learning_rate": 1e-05, "loss": 0.5961, "step": 833 }, { "epoch": 0.23081713139140664, "grad_norm": 0.18208669126033783, "learning_rate": 1e-05, "loss": 0.5805, "step": 834 }, { "epoch": 0.23109389054175603, "grad_norm": 0.18802709877490997, "learning_rate": 1e-05, "loss": 0.6148, "step": 835 }, { "epoch": 0.23137064969210544, "grad_norm": 0.18616248667240143, "learning_rate": 1e-05, "loss": 0.5787, "step": 836 }, { "epoch": 0.23164740884245485, "grad_norm": 0.20188893377780914, "learning_rate": 1e-05, "loss": 0.5842, "step": 837 }, { "epoch": 0.23192416799280427, "grad_norm": 0.1934157907962799, "learning_rate": 1e-05, "loss": 0.5826, "step": 838 }, { "epoch": 0.23220092714315366, "grad_norm": 0.17691951990127563, "learning_rate": 1e-05, "loss": 0.5651, "step": 839 }, { "epoch": 0.23247768629350307, "grad_norm": 0.17939062416553497, "learning_rate": 1e-05, "loss": 0.5724, "step": 840 }, { "epoch": 0.2327544454438525, "grad_norm": 0.2008800208568573, "learning_rate": 1e-05, "loss": 0.5822, "step": 841 }, { "epoch": 0.2330312045942019, "grad_norm": 0.18061873316764832, "learning_rate": 1e-05, "loss": 0.5864, "step": 842 }, { "epoch": 0.23330796374455132, "grad_norm": 0.20017004013061523, "learning_rate": 1e-05, "loss": 0.5797, "step": 843 }, { "epoch": 0.2335847228949007, "grad_norm": 0.17736564576625824, "learning_rate": 1e-05, "loss": 0.564, "step": 844 }, { "epoch": 0.23386148204525012, "grad_norm": 0.17483185231685638, "learning_rate": 1e-05, "loss": 0.5948, "step": 845 }, { "epoch": 0.23413824119559953, "grad_norm": 0.17849856615066528, "learning_rate": 1e-05, "loss": 0.6105, "step": 846 }, { "epoch": 0.23441500034594895, "grad_norm": 0.18428803980350494, "learning_rate": 1e-05, "loss": 0.5976, "step": 847 }, { "epoch": 0.23469175949629834, "grad_norm": 0.18441441655158997, "learning_rate": 1e-05, "loss": 0.595, "step": 848 }, { "epoch": 0.23496851864664775, "grad_norm": 0.19660663604736328, "learning_rate": 1e-05, "loss": 0.5991, "step": 849 }, { "epoch": 0.23524527779699717, "grad_norm": 0.1800185590982437, "learning_rate": 1e-05, "loss": 0.5802, "step": 850 }, { "epoch": 0.23552203694734658, "grad_norm": 0.20775455236434937, "learning_rate": 1e-05, "loss": 0.5904, "step": 851 }, { "epoch": 0.23579879609769597, "grad_norm": 0.19340160489082336, "learning_rate": 1e-05, "loss": 0.5813, "step": 852 }, { "epoch": 0.23607555524804538, "grad_norm": 0.18583130836486816, "learning_rate": 1e-05, "loss": 0.5755, "step": 853 }, { "epoch": 0.2363523143983948, "grad_norm": 0.18410921096801758, "learning_rate": 1e-05, "loss": 0.5848, "step": 854 }, { "epoch": 0.2366290735487442, "grad_norm": 0.18369296193122864, "learning_rate": 1e-05, "loss": 0.5815, "step": 855 }, { "epoch": 0.23690583269909363, "grad_norm": 0.18070535361766815, "learning_rate": 1e-05, "loss": 0.5664, "step": 856 }, { "epoch": 0.23718259184944301, "grad_norm": 0.18926939368247986, "learning_rate": 1e-05, "loss": 0.5882, "step": 857 }, { "epoch": 0.23745935099979243, "grad_norm": 0.18913449347019196, "learning_rate": 1e-05, "loss": 0.5938, "step": 858 }, { "epoch": 0.23773611015014184, "grad_norm": 0.18055298924446106, "learning_rate": 1e-05, "loss": 0.6068, "step": 859 }, { "epoch": 0.23801286930049126, "grad_norm": 0.2061522901058197, "learning_rate": 1e-05, "loss": 0.5835, "step": 860 }, { "epoch": 0.23828962845084065, "grad_norm": 0.193989098072052, "learning_rate": 1e-05, "loss": 0.5881, "step": 861 }, { "epoch": 0.23856638760119006, "grad_norm": 0.2171832025051117, "learning_rate": 1e-05, "loss": 0.5906, "step": 862 }, { "epoch": 0.23884314675153948, "grad_norm": 0.19690121710300446, "learning_rate": 1e-05, "loss": 0.6229, "step": 863 }, { "epoch": 0.2391199059018889, "grad_norm": 0.18994596600532532, "learning_rate": 1e-05, "loss": 0.6093, "step": 864 }, { "epoch": 0.23939666505223828, "grad_norm": 0.18395178020000458, "learning_rate": 1e-05, "loss": 0.6216, "step": 865 }, { "epoch": 0.2396734242025877, "grad_norm": 0.1762446165084839, "learning_rate": 1e-05, "loss": 0.5738, "step": 866 }, { "epoch": 0.2399501833529371, "grad_norm": 0.19546326994895935, "learning_rate": 1e-05, "loss": 0.6028, "step": 867 }, { "epoch": 0.24022694250328652, "grad_norm": 0.22424151003360748, "learning_rate": 1e-05, "loss": 0.5946, "step": 868 }, { "epoch": 0.2405037016536359, "grad_norm": 0.18802712857723236, "learning_rate": 1e-05, "loss": 0.5621, "step": 869 }, { "epoch": 0.24078046080398532, "grad_norm": 0.18643029034137726, "learning_rate": 1e-05, "loss": 0.566, "step": 870 }, { "epoch": 0.24105721995433474, "grad_norm": 0.18328483402729034, "learning_rate": 1e-05, "loss": 0.5634, "step": 871 }, { "epoch": 0.24133397910468415, "grad_norm": 0.19348381459712982, "learning_rate": 1e-05, "loss": 0.5981, "step": 872 }, { "epoch": 0.24161073825503357, "grad_norm": 0.1845473200082779, "learning_rate": 1e-05, "loss": 0.5782, "step": 873 }, { "epoch": 0.24188749740538296, "grad_norm": 0.1940966248512268, "learning_rate": 1e-05, "loss": 0.581, "step": 874 }, { "epoch": 0.24216425655573237, "grad_norm": 0.17791825532913208, "learning_rate": 1e-05, "loss": 0.5647, "step": 875 }, { "epoch": 0.24244101570608179, "grad_norm": 0.18268145620822906, "learning_rate": 1e-05, "loss": 0.5756, "step": 876 }, { "epoch": 0.2427177748564312, "grad_norm": 0.19041664898395538, "learning_rate": 1e-05, "loss": 0.6025, "step": 877 }, { "epoch": 0.2429945340067806, "grad_norm": 0.17829269170761108, "learning_rate": 1e-05, "loss": 0.5711, "step": 878 }, { "epoch": 0.24327129315713, "grad_norm": 0.18042215704917908, "learning_rate": 1e-05, "loss": 0.5669, "step": 879 }, { "epoch": 0.24354805230747942, "grad_norm": 0.18514114618301392, "learning_rate": 1e-05, "loss": 0.6226, "step": 880 }, { "epoch": 0.24382481145782883, "grad_norm": 0.1798439472913742, "learning_rate": 1e-05, "loss": 0.573, "step": 881 }, { "epoch": 0.24410157060817822, "grad_norm": 0.17944662272930145, "learning_rate": 1e-05, "loss": 0.5513, "step": 882 }, { "epoch": 0.24437832975852763, "grad_norm": 0.19002334773540497, "learning_rate": 1e-05, "loss": 0.6129, "step": 883 }, { "epoch": 0.24465508890887705, "grad_norm": 0.18833497166633606, "learning_rate": 1e-05, "loss": 0.5949, "step": 884 }, { "epoch": 0.24493184805922646, "grad_norm": 0.18663693964481354, "learning_rate": 1e-05, "loss": 0.6, "step": 885 }, { "epoch": 0.24520860720957588, "grad_norm": 0.1757865846157074, "learning_rate": 1e-05, "loss": 0.5535, "step": 886 }, { "epoch": 0.24548536635992527, "grad_norm": 0.18685995042324066, "learning_rate": 1e-05, "loss": 0.576, "step": 887 }, { "epoch": 0.24576212551027468, "grad_norm": 0.17698416113853455, "learning_rate": 1e-05, "loss": 0.5699, "step": 888 }, { "epoch": 0.2460388846606241, "grad_norm": 0.18848729133605957, "learning_rate": 1e-05, "loss": 0.6009, "step": 889 }, { "epoch": 0.2463156438109735, "grad_norm": 0.17977496981620789, "learning_rate": 1e-05, "loss": 0.5911, "step": 890 }, { "epoch": 0.2465924029613229, "grad_norm": 0.17730052769184113, "learning_rate": 1e-05, "loss": 0.5719, "step": 891 }, { "epoch": 0.2468691621116723, "grad_norm": 0.19420959055423737, "learning_rate": 1e-05, "loss": 0.5914, "step": 892 }, { "epoch": 0.24714592126202173, "grad_norm": 0.18489046394824982, "learning_rate": 1e-05, "loss": 0.5939, "step": 893 }, { "epoch": 0.24742268041237114, "grad_norm": 0.17589795589447021, "learning_rate": 1e-05, "loss": 0.5963, "step": 894 }, { "epoch": 0.24769943956272053, "grad_norm": 0.1875644475221634, "learning_rate": 1e-05, "loss": 0.5396, "step": 895 }, { "epoch": 0.24797619871306995, "grad_norm": 0.1843571662902832, "learning_rate": 1e-05, "loss": 0.5691, "step": 896 }, { "epoch": 0.24825295786341936, "grad_norm": 0.18027988076210022, "learning_rate": 1e-05, "loss": 0.5724, "step": 897 }, { "epoch": 0.24852971701376878, "grad_norm": 0.17572636902332306, "learning_rate": 1e-05, "loss": 0.5789, "step": 898 }, { "epoch": 0.2488064761641182, "grad_norm": 0.18842794001102448, "learning_rate": 1e-05, "loss": 0.598, "step": 899 }, { "epoch": 0.24908323531446758, "grad_norm": 0.17492066323757172, "learning_rate": 1e-05, "loss": 0.5731, "step": 900 }, { "epoch": 0.249359994464817, "grad_norm": 0.17378072440624237, "learning_rate": 1e-05, "loss": 0.605, "step": 901 }, { "epoch": 0.2496367536151664, "grad_norm": 0.16985929012298584, "learning_rate": 1e-05, "loss": 0.5832, "step": 902 }, { "epoch": 0.24991351276551582, "grad_norm": 0.18749749660491943, "learning_rate": 1e-05, "loss": 0.5906, "step": 903 }, { "epoch": 0.25019027191586524, "grad_norm": 0.17938637733459473, "learning_rate": 1e-05, "loss": 0.5746, "step": 904 }, { "epoch": 0.25046703106621465, "grad_norm": 0.17738297581672668, "learning_rate": 1e-05, "loss": 0.5669, "step": 905 }, { "epoch": 0.250743790216564, "grad_norm": 0.18687844276428223, "learning_rate": 1e-05, "loss": 0.5866, "step": 906 }, { "epoch": 0.2510205493669134, "grad_norm": 0.18201623857021332, "learning_rate": 1e-05, "loss": 0.5956, "step": 907 }, { "epoch": 0.25129730851726284, "grad_norm": 0.18228094279766083, "learning_rate": 1e-05, "loss": 0.581, "step": 908 }, { "epoch": 0.25157406766761226, "grad_norm": 0.1746271252632141, "learning_rate": 1e-05, "loss": 0.6068, "step": 909 }, { "epoch": 0.25185082681796167, "grad_norm": 0.17434987425804138, "learning_rate": 1e-05, "loss": 0.5503, "step": 910 }, { "epoch": 0.2521275859683111, "grad_norm": 0.17513108253479004, "learning_rate": 1e-05, "loss": 0.5793, "step": 911 }, { "epoch": 0.2524043451186605, "grad_norm": 0.1919221580028534, "learning_rate": 1e-05, "loss": 0.6208, "step": 912 }, { "epoch": 0.2526811042690099, "grad_norm": 0.17483161389827728, "learning_rate": 1e-05, "loss": 0.5765, "step": 913 }, { "epoch": 0.25295786341935933, "grad_norm": 0.172459676861763, "learning_rate": 1e-05, "loss": 0.57, "step": 914 }, { "epoch": 0.2532346225697087, "grad_norm": 0.17995145916938782, "learning_rate": 1e-05, "loss": 0.5848, "step": 915 }, { "epoch": 0.2535113817200581, "grad_norm": 0.18884821236133575, "learning_rate": 1e-05, "loss": 0.5855, "step": 916 }, { "epoch": 0.2537881408704075, "grad_norm": 0.17966264486312866, "learning_rate": 1e-05, "loss": 0.5619, "step": 917 }, { "epoch": 0.25406490002075693, "grad_norm": 0.17384350299835205, "learning_rate": 1e-05, "loss": 0.6011, "step": 918 }, { "epoch": 0.25434165917110635, "grad_norm": 0.18296487629413605, "learning_rate": 1e-05, "loss": 0.5772, "step": 919 }, { "epoch": 0.25461841832145576, "grad_norm": 0.18254128098487854, "learning_rate": 1e-05, "loss": 0.6051, "step": 920 }, { "epoch": 0.2548951774718052, "grad_norm": 0.18677902221679688, "learning_rate": 1e-05, "loss": 0.6064, "step": 921 }, { "epoch": 0.2551719366221546, "grad_norm": 0.18378759920597076, "learning_rate": 1e-05, "loss": 0.5901, "step": 922 }, { "epoch": 0.25544869577250395, "grad_norm": 0.20784322917461395, "learning_rate": 1e-05, "loss": 0.5885, "step": 923 }, { "epoch": 0.25572545492285337, "grad_norm": 0.18712082505226135, "learning_rate": 1e-05, "loss": 0.5974, "step": 924 }, { "epoch": 0.2560022140732028, "grad_norm": 0.17237073183059692, "learning_rate": 1e-05, "loss": 0.5662, "step": 925 }, { "epoch": 0.2562789732235522, "grad_norm": 0.18288351595401764, "learning_rate": 1e-05, "loss": 0.6184, "step": 926 }, { "epoch": 0.2565557323739016, "grad_norm": 0.1708442121744156, "learning_rate": 1e-05, "loss": 0.5523, "step": 927 }, { "epoch": 0.256832491524251, "grad_norm": 0.17170946300029755, "learning_rate": 1e-05, "loss": 0.5571, "step": 928 }, { "epoch": 0.25710925067460044, "grad_norm": 0.184109628200531, "learning_rate": 1e-05, "loss": 0.5796, "step": 929 }, { "epoch": 0.25738600982494986, "grad_norm": 0.1838240921497345, "learning_rate": 1e-05, "loss": 0.6087, "step": 930 }, { "epoch": 0.2576627689752993, "grad_norm": 0.18872253596782684, "learning_rate": 1e-05, "loss": 0.5951, "step": 931 }, { "epoch": 0.25793952812564863, "grad_norm": 0.1861378401517868, "learning_rate": 1e-05, "loss": 0.5492, "step": 932 }, { "epoch": 0.25821628727599805, "grad_norm": 0.17693452537059784, "learning_rate": 1e-05, "loss": 0.5685, "step": 933 }, { "epoch": 0.25849304642634746, "grad_norm": 0.17960022389888763, "learning_rate": 1e-05, "loss": 0.5718, "step": 934 }, { "epoch": 0.2587698055766969, "grad_norm": 0.17303022742271423, "learning_rate": 1e-05, "loss": 0.57, "step": 935 }, { "epoch": 0.2590465647270463, "grad_norm": 0.1804521083831787, "learning_rate": 1e-05, "loss": 0.6031, "step": 936 }, { "epoch": 0.2593233238773957, "grad_norm": 0.17946003377437592, "learning_rate": 1e-05, "loss": 0.584, "step": 937 }, { "epoch": 0.2596000830277451, "grad_norm": 0.18168216943740845, "learning_rate": 1e-05, "loss": 0.5704, "step": 938 }, { "epoch": 0.25987684217809454, "grad_norm": 0.18293409049510956, "learning_rate": 1e-05, "loss": 0.5544, "step": 939 }, { "epoch": 0.2601536013284439, "grad_norm": 0.17378458380699158, "learning_rate": 1e-05, "loss": 0.5673, "step": 940 }, { "epoch": 0.2604303604787933, "grad_norm": 0.1897217184305191, "learning_rate": 1e-05, "loss": 0.5823, "step": 941 }, { "epoch": 0.2607071196291427, "grad_norm": 0.18678317964076996, "learning_rate": 1e-05, "loss": 0.5679, "step": 942 }, { "epoch": 0.26098387877949214, "grad_norm": 0.19269771873950958, "learning_rate": 1e-05, "loss": 0.5734, "step": 943 }, { "epoch": 0.26126063792984155, "grad_norm": 0.19166570901870728, "learning_rate": 1e-05, "loss": 0.5899, "step": 944 }, { "epoch": 0.26153739708019097, "grad_norm": 0.1885523647069931, "learning_rate": 1e-05, "loss": 0.6148, "step": 945 }, { "epoch": 0.2618141562305404, "grad_norm": 0.1889239400625229, "learning_rate": 1e-05, "loss": 0.602, "step": 946 }, { "epoch": 0.2620909153808898, "grad_norm": 0.1757299154996872, "learning_rate": 1e-05, "loss": 0.574, "step": 947 }, { "epoch": 0.2623676745312392, "grad_norm": 0.19537201523780823, "learning_rate": 1e-05, "loss": 0.572, "step": 948 }, { "epoch": 0.2626444336815886, "grad_norm": 0.19311238825321198, "learning_rate": 1e-05, "loss": 0.5799, "step": 949 }, { "epoch": 0.262921192831938, "grad_norm": 0.18534910678863525, "learning_rate": 1e-05, "loss": 0.5714, "step": 950 }, { "epoch": 0.2631979519822874, "grad_norm": 0.18646641075611115, "learning_rate": 1e-05, "loss": 0.5677, "step": 951 }, { "epoch": 0.2634747111326368, "grad_norm": 0.16954070329666138, "learning_rate": 1e-05, "loss": 0.55, "step": 952 }, { "epoch": 0.26375147028298623, "grad_norm": 0.1822720319032669, "learning_rate": 1e-05, "loss": 0.5787, "step": 953 }, { "epoch": 0.26402822943333565, "grad_norm": 0.19210933148860931, "learning_rate": 1e-05, "loss": 0.5834, "step": 954 }, { "epoch": 0.26430498858368506, "grad_norm": 0.16951757669448853, "learning_rate": 1e-05, "loss": 0.5835, "step": 955 }, { "epoch": 0.2645817477340345, "grad_norm": 0.179080992937088, "learning_rate": 1e-05, "loss": 0.5881, "step": 956 }, { "epoch": 0.26485850688438384, "grad_norm": 0.17617705464363098, "learning_rate": 1e-05, "loss": 0.5921, "step": 957 }, { "epoch": 0.26513526603473325, "grad_norm": 0.17818795144557953, "learning_rate": 1e-05, "loss": 0.5579, "step": 958 }, { "epoch": 0.26541202518508267, "grad_norm": 0.19048212468624115, "learning_rate": 1e-05, "loss": 0.602, "step": 959 }, { "epoch": 0.2656887843354321, "grad_norm": 0.18126410245895386, "learning_rate": 1e-05, "loss": 0.6039, "step": 960 }, { "epoch": 0.2659655434857815, "grad_norm": 0.1768074631690979, "learning_rate": 1e-05, "loss": 0.5642, "step": 961 }, { "epoch": 0.2662423026361309, "grad_norm": 0.17839887738227844, "learning_rate": 1e-05, "loss": 0.57, "step": 962 }, { "epoch": 0.2665190617864803, "grad_norm": 0.18618729710578918, "learning_rate": 1e-05, "loss": 0.5808, "step": 963 }, { "epoch": 0.26679582093682974, "grad_norm": 0.1814972162246704, "learning_rate": 1e-05, "loss": 0.5741, "step": 964 }, { "epoch": 0.26707258008717916, "grad_norm": 0.18735013902187347, "learning_rate": 1e-05, "loss": 0.5729, "step": 965 }, { "epoch": 0.2673493392375285, "grad_norm": 0.17813801765441895, "learning_rate": 1e-05, "loss": 0.5736, "step": 966 }, { "epoch": 0.26762609838787793, "grad_norm": 0.18427973985671997, "learning_rate": 1e-05, "loss": 0.5854, "step": 967 }, { "epoch": 0.26790285753822735, "grad_norm": 0.18667268753051758, "learning_rate": 1e-05, "loss": 0.5663, "step": 968 }, { "epoch": 0.26817961668857676, "grad_norm": 0.18905359506607056, "learning_rate": 1e-05, "loss": 0.6167, "step": 969 }, { "epoch": 0.2684563758389262, "grad_norm": 0.19118182361125946, "learning_rate": 1e-05, "loss": 0.5316, "step": 970 }, { "epoch": 0.2687331349892756, "grad_norm": 0.17948895692825317, "learning_rate": 1e-05, "loss": 0.5974, "step": 971 }, { "epoch": 0.269009894139625, "grad_norm": 0.1741398423910141, "learning_rate": 1e-05, "loss": 0.5212, "step": 972 }, { "epoch": 0.2692866532899744, "grad_norm": 0.1720748096704483, "learning_rate": 1e-05, "loss": 0.5838, "step": 973 }, { "epoch": 0.26956341244032384, "grad_norm": 0.18946270644664764, "learning_rate": 1e-05, "loss": 0.5659, "step": 974 }, { "epoch": 0.2698401715906732, "grad_norm": 0.1837378740310669, "learning_rate": 1e-05, "loss": 0.5746, "step": 975 }, { "epoch": 0.2701169307410226, "grad_norm": 0.1841641217470169, "learning_rate": 1e-05, "loss": 0.5856, "step": 976 }, { "epoch": 0.270393689891372, "grad_norm": 0.17379453778266907, "learning_rate": 1e-05, "loss": 0.5698, "step": 977 }, { "epoch": 0.27067044904172144, "grad_norm": 0.17457795143127441, "learning_rate": 1e-05, "loss": 0.5579, "step": 978 }, { "epoch": 0.27094720819207085, "grad_norm": 0.17981652915477753, "learning_rate": 1e-05, "loss": 0.5756, "step": 979 }, { "epoch": 0.27122396734242027, "grad_norm": 0.1777719408273697, "learning_rate": 1e-05, "loss": 0.5795, "step": 980 }, { "epoch": 0.2715007264927697, "grad_norm": 0.18737520277500153, "learning_rate": 1e-05, "loss": 0.6142, "step": 981 }, { "epoch": 0.2717774856431191, "grad_norm": 0.17689403891563416, "learning_rate": 1e-05, "loss": 0.5609, "step": 982 }, { "epoch": 0.27205424479346846, "grad_norm": 0.19571830332279205, "learning_rate": 1e-05, "loss": 0.6158, "step": 983 }, { "epoch": 0.2723310039438179, "grad_norm": 0.19070763885974884, "learning_rate": 1e-05, "loss": 0.5966, "step": 984 }, { "epoch": 0.2726077630941673, "grad_norm": 0.18318849802017212, "learning_rate": 1e-05, "loss": 0.5567, "step": 985 }, { "epoch": 0.2728845222445167, "grad_norm": 0.18415194749832153, "learning_rate": 1e-05, "loss": 0.5935, "step": 986 }, { "epoch": 0.2731612813948661, "grad_norm": 0.18995624780654907, "learning_rate": 1e-05, "loss": 0.5905, "step": 987 }, { "epoch": 0.27343804054521553, "grad_norm": 0.18289156258106232, "learning_rate": 1e-05, "loss": 0.5868, "step": 988 }, { "epoch": 0.27371479969556495, "grad_norm": 0.17886285483837128, "learning_rate": 1e-05, "loss": 0.5772, "step": 989 }, { "epoch": 0.27399155884591436, "grad_norm": 0.19775421917438507, "learning_rate": 1e-05, "loss": 0.5988, "step": 990 }, { "epoch": 0.2742683179962638, "grad_norm": 0.18605543673038483, "learning_rate": 1e-05, "loss": 0.6061, "step": 991 }, { "epoch": 0.27454507714661314, "grad_norm": 0.1762513816356659, "learning_rate": 1e-05, "loss": 0.5659, "step": 992 }, { "epoch": 0.27482183629696255, "grad_norm": 0.17519649863243103, "learning_rate": 1e-05, "loss": 0.5839, "step": 993 }, { "epoch": 0.27509859544731197, "grad_norm": 0.17910659313201904, "learning_rate": 1e-05, "loss": 0.5832, "step": 994 }, { "epoch": 0.2753753545976614, "grad_norm": 0.1774449497461319, "learning_rate": 1e-05, "loss": 0.5674, "step": 995 }, { "epoch": 0.2756521137480108, "grad_norm": 0.18517450988292694, "learning_rate": 1e-05, "loss": 0.5929, "step": 996 }, { "epoch": 0.2759288728983602, "grad_norm": 0.18229565024375916, "learning_rate": 1e-05, "loss": 0.5704, "step": 997 }, { "epoch": 0.2762056320487096, "grad_norm": 0.1844424307346344, "learning_rate": 1e-05, "loss": 0.5861, "step": 998 }, { "epoch": 0.27648239119905904, "grad_norm": 0.1793651133775711, "learning_rate": 1e-05, "loss": 0.5643, "step": 999 }, { "epoch": 0.2767591503494084, "grad_norm": 0.17637398838996887, "learning_rate": 1e-05, "loss": 0.5857, "step": 1000 }, { "epoch": 0.2770359094997578, "grad_norm": 0.17661918699741364, "learning_rate": 1e-05, "loss": 0.5809, "step": 1001 }, { "epoch": 0.27731266865010723, "grad_norm": 0.17832809686660767, "learning_rate": 1e-05, "loss": 0.5449, "step": 1002 }, { "epoch": 0.27758942780045665, "grad_norm": 0.1792573779821396, "learning_rate": 1e-05, "loss": 0.5859, "step": 1003 }, { "epoch": 0.27786618695080606, "grad_norm": 0.1793922632932663, "learning_rate": 1e-05, "loss": 0.6196, "step": 1004 }, { "epoch": 0.2781429461011555, "grad_norm": 0.176763653755188, "learning_rate": 1e-05, "loss": 0.6181, "step": 1005 }, { "epoch": 0.2784197052515049, "grad_norm": 0.1749633252620697, "learning_rate": 1e-05, "loss": 0.616, "step": 1006 }, { "epoch": 0.2786964644018543, "grad_norm": 0.17147277295589447, "learning_rate": 1e-05, "loss": 0.603, "step": 1007 }, { "epoch": 0.2789732235522037, "grad_norm": 0.179981991648674, "learning_rate": 1e-05, "loss": 0.5937, "step": 1008 }, { "epoch": 0.2792499827025531, "grad_norm": 0.2069828063249588, "learning_rate": 1e-05, "loss": 0.6025, "step": 1009 }, { "epoch": 0.2795267418529025, "grad_norm": 0.1753210425376892, "learning_rate": 1e-05, "loss": 0.5664, "step": 1010 }, { "epoch": 0.2798035010032519, "grad_norm": 0.17564964294433594, "learning_rate": 1e-05, "loss": 0.5618, "step": 1011 }, { "epoch": 0.2800802601536013, "grad_norm": 0.17678289115428925, "learning_rate": 1e-05, "loss": 0.5756, "step": 1012 }, { "epoch": 0.28035701930395074, "grad_norm": 0.17752894759178162, "learning_rate": 1e-05, "loss": 0.5836, "step": 1013 }, { "epoch": 0.28063377845430015, "grad_norm": 0.18301749229431152, "learning_rate": 1e-05, "loss": 0.601, "step": 1014 }, { "epoch": 0.28091053760464957, "grad_norm": 0.19982072710990906, "learning_rate": 1e-05, "loss": 0.6025, "step": 1015 }, { "epoch": 0.281187296754999, "grad_norm": 0.18625137209892273, "learning_rate": 1e-05, "loss": 0.6024, "step": 1016 }, { "epoch": 0.2814640559053484, "grad_norm": 0.19172470271587372, "learning_rate": 1e-05, "loss": 0.5703, "step": 1017 }, { "epoch": 0.28174081505569776, "grad_norm": 0.1808132529258728, "learning_rate": 1e-05, "loss": 0.5956, "step": 1018 }, { "epoch": 0.2820175742060472, "grad_norm": 0.17673450708389282, "learning_rate": 1e-05, "loss": 0.576, "step": 1019 }, { "epoch": 0.2822943333563966, "grad_norm": 0.1918935626745224, "learning_rate": 1e-05, "loss": 0.597, "step": 1020 }, { "epoch": 0.282571092506746, "grad_norm": 0.18166574835777283, "learning_rate": 1e-05, "loss": 0.5675, "step": 1021 }, { "epoch": 0.2828478516570954, "grad_norm": 0.18233729898929596, "learning_rate": 1e-05, "loss": 0.581, "step": 1022 }, { "epoch": 0.28312461080744483, "grad_norm": 0.17901746928691864, "learning_rate": 1e-05, "loss": 0.5556, "step": 1023 }, { "epoch": 0.28340136995779425, "grad_norm": 0.18477602303028107, "learning_rate": 1e-05, "loss": 0.5699, "step": 1024 }, { "epoch": 0.28367812910814366, "grad_norm": 0.1813942790031433, "learning_rate": 1e-05, "loss": 0.6015, "step": 1025 }, { "epoch": 0.283954888258493, "grad_norm": 0.18766658008098602, "learning_rate": 1e-05, "loss": 0.5801, "step": 1026 }, { "epoch": 0.28423164740884244, "grad_norm": 0.1741679608821869, "learning_rate": 1e-05, "loss": 0.5656, "step": 1027 }, { "epoch": 0.28450840655919185, "grad_norm": 0.17257945239543915, "learning_rate": 1e-05, "loss": 0.5455, "step": 1028 }, { "epoch": 0.28478516570954127, "grad_norm": 0.1889011710882187, "learning_rate": 1e-05, "loss": 0.5859, "step": 1029 }, { "epoch": 0.2850619248598907, "grad_norm": 0.1742401272058487, "learning_rate": 1e-05, "loss": 0.5735, "step": 1030 }, { "epoch": 0.2853386840102401, "grad_norm": 0.1822703629732132, "learning_rate": 1e-05, "loss": 0.5543, "step": 1031 }, { "epoch": 0.2856154431605895, "grad_norm": 0.18370147049427032, "learning_rate": 1e-05, "loss": 0.5894, "step": 1032 }, { "epoch": 0.2858922023109389, "grad_norm": 0.19705568253993988, "learning_rate": 1e-05, "loss": 0.5975, "step": 1033 }, { "epoch": 0.28616896146128834, "grad_norm": 0.18007409572601318, "learning_rate": 1e-05, "loss": 0.5818, "step": 1034 }, { "epoch": 0.2864457206116377, "grad_norm": 0.18679453432559967, "learning_rate": 1e-05, "loss": 0.5731, "step": 1035 }, { "epoch": 0.2867224797619871, "grad_norm": 0.17323556542396545, "learning_rate": 1e-05, "loss": 0.5524, "step": 1036 }, { "epoch": 0.28699923891233653, "grad_norm": 0.18132244050502777, "learning_rate": 1e-05, "loss": 0.5529, "step": 1037 }, { "epoch": 0.28727599806268594, "grad_norm": 0.18769505620002747, "learning_rate": 1e-05, "loss": 0.6047, "step": 1038 }, { "epoch": 0.28755275721303536, "grad_norm": 0.1835259050130844, "learning_rate": 1e-05, "loss": 0.574, "step": 1039 }, { "epoch": 0.2878295163633848, "grad_norm": 0.1802726536989212, "learning_rate": 1e-05, "loss": 0.5658, "step": 1040 }, { "epoch": 0.2881062755137342, "grad_norm": 0.18035642802715302, "learning_rate": 1e-05, "loss": 0.5725, "step": 1041 }, { "epoch": 0.2883830346640836, "grad_norm": 0.193642258644104, "learning_rate": 1e-05, "loss": 0.583, "step": 1042 }, { "epoch": 0.28865979381443296, "grad_norm": 0.18086113035678864, "learning_rate": 1e-05, "loss": 0.5912, "step": 1043 }, { "epoch": 0.2889365529647824, "grad_norm": 0.17617881298065186, "learning_rate": 1e-05, "loss": 0.5536, "step": 1044 }, { "epoch": 0.2892133121151318, "grad_norm": 0.18363653123378754, "learning_rate": 1e-05, "loss": 0.5952, "step": 1045 }, { "epoch": 0.2894900712654812, "grad_norm": 0.18193766474723816, "learning_rate": 1e-05, "loss": 0.5855, "step": 1046 }, { "epoch": 0.2897668304158306, "grad_norm": 0.18974941968917847, "learning_rate": 1e-05, "loss": 0.5958, "step": 1047 }, { "epoch": 0.29004358956618004, "grad_norm": 0.18445879220962524, "learning_rate": 1e-05, "loss": 0.5826, "step": 1048 }, { "epoch": 0.29032034871652945, "grad_norm": 0.18348853290081024, "learning_rate": 1e-05, "loss": 0.5951, "step": 1049 }, { "epoch": 0.29059710786687887, "grad_norm": 0.17932230234146118, "learning_rate": 1e-05, "loss": 0.5798, "step": 1050 }, { "epoch": 0.2908738670172283, "grad_norm": 0.21349772810935974, "learning_rate": 1e-05, "loss": 0.5261, "step": 1051 }, { "epoch": 0.29115062616757764, "grad_norm": 0.17543964087963104, "learning_rate": 1e-05, "loss": 0.593, "step": 1052 }, { "epoch": 0.29142738531792706, "grad_norm": 0.16986985504627228, "learning_rate": 1e-05, "loss": 0.6106, "step": 1053 }, { "epoch": 0.29170414446827647, "grad_norm": 0.17245244979858398, "learning_rate": 1e-05, "loss": 0.5584, "step": 1054 }, { "epoch": 0.2919809036186259, "grad_norm": 0.18379727005958557, "learning_rate": 1e-05, "loss": 0.5954, "step": 1055 }, { "epoch": 0.2922576627689753, "grad_norm": 0.18073637783527374, "learning_rate": 1e-05, "loss": 0.5712, "step": 1056 }, { "epoch": 0.2925344219193247, "grad_norm": 0.1836814433336258, "learning_rate": 1e-05, "loss": 0.5773, "step": 1057 }, { "epoch": 0.29281118106967413, "grad_norm": 0.18536868691444397, "learning_rate": 1e-05, "loss": 0.5807, "step": 1058 }, { "epoch": 0.29308794022002355, "grad_norm": 0.18933309614658356, "learning_rate": 1e-05, "loss": 0.5883, "step": 1059 }, { "epoch": 0.2933646993703729, "grad_norm": 0.1700764149427414, "learning_rate": 1e-05, "loss": 0.557, "step": 1060 }, { "epoch": 0.2936414585207223, "grad_norm": 0.17293184995651245, "learning_rate": 1e-05, "loss": 0.5679, "step": 1061 }, { "epoch": 0.29391821767107174, "grad_norm": 0.18131504952907562, "learning_rate": 1e-05, "loss": 0.5835, "step": 1062 }, { "epoch": 0.29419497682142115, "grad_norm": 0.1820629984140396, "learning_rate": 1e-05, "loss": 0.5787, "step": 1063 }, { "epoch": 0.29447173597177057, "grad_norm": 0.19175072014331818, "learning_rate": 1e-05, "loss": 0.6192, "step": 1064 }, { "epoch": 0.29474849512212, "grad_norm": 0.17718973755836487, "learning_rate": 1e-05, "loss": 0.5886, "step": 1065 }, { "epoch": 0.2950252542724694, "grad_norm": 0.17941828072071075, "learning_rate": 1e-05, "loss": 0.5666, "step": 1066 }, { "epoch": 0.2953020134228188, "grad_norm": 0.17432193458080292, "learning_rate": 1e-05, "loss": 0.5697, "step": 1067 }, { "epoch": 0.2955787725731682, "grad_norm": 0.18473881483078003, "learning_rate": 1e-05, "loss": 0.5784, "step": 1068 }, { "epoch": 0.2958555317235176, "grad_norm": 0.1914503276348114, "learning_rate": 1e-05, "loss": 0.5818, "step": 1069 }, { "epoch": 0.296132290873867, "grad_norm": 0.1753094494342804, "learning_rate": 1e-05, "loss": 0.6026, "step": 1070 }, { "epoch": 0.2964090500242164, "grad_norm": 0.1724114865064621, "learning_rate": 1e-05, "loss": 0.6103, "step": 1071 }, { "epoch": 0.29668580917456583, "grad_norm": 0.18317840993404388, "learning_rate": 1e-05, "loss": 0.5648, "step": 1072 }, { "epoch": 0.29696256832491524, "grad_norm": 0.1785741001367569, "learning_rate": 1e-05, "loss": 0.5646, "step": 1073 }, { "epoch": 0.29723932747526466, "grad_norm": 0.1821894645690918, "learning_rate": 1e-05, "loss": 0.5689, "step": 1074 }, { "epoch": 0.2975160866256141, "grad_norm": 0.1802983433008194, "learning_rate": 1e-05, "loss": 0.5817, "step": 1075 }, { "epoch": 0.2977928457759635, "grad_norm": 0.17680680751800537, "learning_rate": 1e-05, "loss": 0.5983, "step": 1076 }, { "epoch": 0.2980696049263129, "grad_norm": 0.17669305205345154, "learning_rate": 1e-05, "loss": 0.5831, "step": 1077 }, { "epoch": 0.29834636407666226, "grad_norm": 0.1806221306324005, "learning_rate": 1e-05, "loss": 0.6359, "step": 1078 }, { "epoch": 0.2986231232270117, "grad_norm": 0.17773601412773132, "learning_rate": 1e-05, "loss": 0.5569, "step": 1079 }, { "epoch": 0.2988998823773611, "grad_norm": 0.17633716762065887, "learning_rate": 1e-05, "loss": 0.5781, "step": 1080 }, { "epoch": 0.2991766415277105, "grad_norm": 0.17368371784687042, "learning_rate": 1e-05, "loss": 0.5638, "step": 1081 }, { "epoch": 0.2994534006780599, "grad_norm": 0.18027660250663757, "learning_rate": 1e-05, "loss": 0.5673, "step": 1082 }, { "epoch": 0.29973015982840934, "grad_norm": 0.1826440989971161, "learning_rate": 1e-05, "loss": 0.5767, "step": 1083 }, { "epoch": 0.30000691897875875, "grad_norm": 0.18124079704284668, "learning_rate": 1e-05, "loss": 0.5687, "step": 1084 }, { "epoch": 0.30028367812910817, "grad_norm": 0.18164639174938202, "learning_rate": 1e-05, "loss": 0.6048, "step": 1085 }, { "epoch": 0.3005604372794575, "grad_norm": 0.17601098120212555, "learning_rate": 1e-05, "loss": 0.5813, "step": 1086 }, { "epoch": 0.30083719642980694, "grad_norm": 0.16986343264579773, "learning_rate": 1e-05, "loss": 0.5579, "step": 1087 }, { "epoch": 0.30111395558015636, "grad_norm": 0.18357093632221222, "learning_rate": 1e-05, "loss": 0.5734, "step": 1088 }, { "epoch": 0.30139071473050577, "grad_norm": 0.173426553606987, "learning_rate": 1e-05, "loss": 0.5513, "step": 1089 }, { "epoch": 0.3016674738808552, "grad_norm": 0.17810072004795074, "learning_rate": 1e-05, "loss": 0.55, "step": 1090 }, { "epoch": 0.3019442330312046, "grad_norm": 0.17868955433368683, "learning_rate": 1e-05, "loss": 0.5775, "step": 1091 }, { "epoch": 0.302220992181554, "grad_norm": 0.18566736578941345, "learning_rate": 1e-05, "loss": 0.577, "step": 1092 }, { "epoch": 0.30249775133190343, "grad_norm": 0.1801648586988449, "learning_rate": 1e-05, "loss": 0.5687, "step": 1093 }, { "epoch": 0.30277451048225285, "grad_norm": 0.1764914095401764, "learning_rate": 1e-05, "loss": 0.5339, "step": 1094 }, { "epoch": 0.3030512696326022, "grad_norm": 0.17444588243961334, "learning_rate": 1e-05, "loss": 0.5933, "step": 1095 }, { "epoch": 0.3033280287829516, "grad_norm": 0.18400919437408447, "learning_rate": 1e-05, "loss": 0.5743, "step": 1096 }, { "epoch": 0.30360478793330103, "grad_norm": 0.17577490210533142, "learning_rate": 1e-05, "loss": 0.5706, "step": 1097 }, { "epoch": 0.30388154708365045, "grad_norm": 0.18511821329593658, "learning_rate": 1e-05, "loss": 0.591, "step": 1098 }, { "epoch": 0.30415830623399986, "grad_norm": 0.1766117364168167, "learning_rate": 1e-05, "loss": 0.5778, "step": 1099 }, { "epoch": 0.3044350653843493, "grad_norm": 0.1778910905122757, "learning_rate": 1e-05, "loss": 0.5642, "step": 1100 }, { "epoch": 0.3047118245346987, "grad_norm": 0.17834864556789398, "learning_rate": 1e-05, "loss": 0.6084, "step": 1101 }, { "epoch": 0.3049885836850481, "grad_norm": 0.19032251834869385, "learning_rate": 1e-05, "loss": 0.5848, "step": 1102 }, { "epoch": 0.30526534283539747, "grad_norm": 0.18981005251407623, "learning_rate": 1e-05, "loss": 0.5615, "step": 1103 }, { "epoch": 0.3055421019857469, "grad_norm": 0.17867408692836761, "learning_rate": 1e-05, "loss": 0.5618, "step": 1104 }, { "epoch": 0.3058188611360963, "grad_norm": 0.1794937551021576, "learning_rate": 1e-05, "loss": 0.5625, "step": 1105 }, { "epoch": 0.3060956202864457, "grad_norm": 0.17799748480319977, "learning_rate": 1e-05, "loss": 0.5564, "step": 1106 }, { "epoch": 0.30637237943679513, "grad_norm": 0.17635931074619293, "learning_rate": 1e-05, "loss": 0.5817, "step": 1107 }, { "epoch": 0.30664913858714454, "grad_norm": 0.17596270143985748, "learning_rate": 1e-05, "loss": 0.5716, "step": 1108 }, { "epoch": 0.30692589773749396, "grad_norm": 0.17897696793079376, "learning_rate": 1e-05, "loss": 0.5807, "step": 1109 }, { "epoch": 0.3072026568878434, "grad_norm": 0.18864931166172028, "learning_rate": 1e-05, "loss": 0.5802, "step": 1110 }, { "epoch": 0.3074794160381928, "grad_norm": 0.18823905289173126, "learning_rate": 1e-05, "loss": 0.5587, "step": 1111 }, { "epoch": 0.30775617518854215, "grad_norm": 0.17796404659748077, "learning_rate": 1e-05, "loss": 0.5707, "step": 1112 }, { "epoch": 0.30803293433889156, "grad_norm": 0.18339358270168304, "learning_rate": 1e-05, "loss": 0.6325, "step": 1113 }, { "epoch": 0.308309693489241, "grad_norm": 0.17941291630268097, "learning_rate": 1e-05, "loss": 0.5799, "step": 1114 }, { "epoch": 0.3085864526395904, "grad_norm": 0.1800389140844345, "learning_rate": 1e-05, "loss": 0.5787, "step": 1115 }, { "epoch": 0.3088632117899398, "grad_norm": 0.17728674411773682, "learning_rate": 1e-05, "loss": 0.5951, "step": 1116 }, { "epoch": 0.3091399709402892, "grad_norm": 0.1759999692440033, "learning_rate": 1e-05, "loss": 0.5525, "step": 1117 }, { "epoch": 0.30941673009063864, "grad_norm": 0.18928289413452148, "learning_rate": 1e-05, "loss": 0.5833, "step": 1118 }, { "epoch": 0.30969348924098805, "grad_norm": 0.1828695833683014, "learning_rate": 1e-05, "loss": 0.5769, "step": 1119 }, { "epoch": 0.30997024839133747, "grad_norm": 0.17702274024486542, "learning_rate": 1e-05, "loss": 0.604, "step": 1120 }, { "epoch": 0.3102470075416868, "grad_norm": 0.17620177567005157, "learning_rate": 1e-05, "loss": 0.5742, "step": 1121 }, { "epoch": 0.31052376669203624, "grad_norm": 0.18872563540935516, "learning_rate": 1e-05, "loss": 0.594, "step": 1122 }, { "epoch": 0.31080052584238566, "grad_norm": 0.17434361577033997, "learning_rate": 1e-05, "loss": 0.5717, "step": 1123 }, { "epoch": 0.31107728499273507, "grad_norm": 0.18941836059093475, "learning_rate": 1e-05, "loss": 0.5764, "step": 1124 }, { "epoch": 0.3113540441430845, "grad_norm": 0.1784893274307251, "learning_rate": 1e-05, "loss": 0.5594, "step": 1125 }, { "epoch": 0.3116308032934339, "grad_norm": 0.18057741224765778, "learning_rate": 1e-05, "loss": 0.5741, "step": 1126 }, { "epoch": 0.3119075624437833, "grad_norm": 0.17655673623085022, "learning_rate": 1e-05, "loss": 0.5792, "step": 1127 }, { "epoch": 0.31218432159413273, "grad_norm": 0.18612602353096008, "learning_rate": 1e-05, "loss": 0.5864, "step": 1128 }, { "epoch": 0.3124610807444821, "grad_norm": 0.19161878526210785, "learning_rate": 1e-05, "loss": 0.5596, "step": 1129 }, { "epoch": 0.3127378398948315, "grad_norm": 0.18850542604923248, "learning_rate": 1e-05, "loss": 0.5798, "step": 1130 }, { "epoch": 0.3130145990451809, "grad_norm": 0.18133646249771118, "learning_rate": 1e-05, "loss": 0.5927, "step": 1131 }, { "epoch": 0.31329135819553033, "grad_norm": 0.1804470419883728, "learning_rate": 1e-05, "loss": 0.576, "step": 1132 }, { "epoch": 0.31356811734587975, "grad_norm": 0.18346482515335083, "learning_rate": 1e-05, "loss": 0.5596, "step": 1133 }, { "epoch": 0.31384487649622916, "grad_norm": 0.17838411033153534, "learning_rate": 1e-05, "loss": 0.5887, "step": 1134 }, { "epoch": 0.3141216356465786, "grad_norm": 0.18570253252983093, "learning_rate": 1e-05, "loss": 0.586, "step": 1135 }, { "epoch": 0.314398394796928, "grad_norm": 0.18732324242591858, "learning_rate": 1e-05, "loss": 0.5943, "step": 1136 }, { "epoch": 0.3146751539472774, "grad_norm": 0.17309147119522095, "learning_rate": 1e-05, "loss": 0.5581, "step": 1137 }, { "epoch": 0.31495191309762677, "grad_norm": 0.1828528791666031, "learning_rate": 1e-05, "loss": 0.5673, "step": 1138 }, { "epoch": 0.3152286722479762, "grad_norm": 0.17879748344421387, "learning_rate": 1e-05, "loss": 0.5561, "step": 1139 }, { "epoch": 0.3155054313983256, "grad_norm": 0.17504341900348663, "learning_rate": 1e-05, "loss": 0.5749, "step": 1140 }, { "epoch": 0.315782190548675, "grad_norm": 0.17164064943790436, "learning_rate": 1e-05, "loss": 0.5703, "step": 1141 }, { "epoch": 0.3160589496990244, "grad_norm": 0.1700817048549652, "learning_rate": 1e-05, "loss": 0.5624, "step": 1142 }, { "epoch": 0.31633570884937384, "grad_norm": 0.19575674831867218, "learning_rate": 1e-05, "loss": 0.5819, "step": 1143 }, { "epoch": 0.31661246799972326, "grad_norm": 0.18458488583564758, "learning_rate": 1e-05, "loss": 0.5915, "step": 1144 }, { "epoch": 0.31688922715007267, "grad_norm": 0.18464747071266174, "learning_rate": 1e-05, "loss": 0.57, "step": 1145 }, { "epoch": 0.31716598630042203, "grad_norm": 0.18992480635643005, "learning_rate": 1e-05, "loss": 0.5941, "step": 1146 }, { "epoch": 0.31744274545077145, "grad_norm": 0.18105408549308777, "learning_rate": 1e-05, "loss": 0.5497, "step": 1147 }, { "epoch": 0.31771950460112086, "grad_norm": 0.17819786071777344, "learning_rate": 1e-05, "loss": 0.5869, "step": 1148 }, { "epoch": 0.3179962637514703, "grad_norm": 0.18036797642707825, "learning_rate": 1e-05, "loss": 0.5904, "step": 1149 }, { "epoch": 0.3182730229018197, "grad_norm": 0.18872028589248657, "learning_rate": 1e-05, "loss": 0.59, "step": 1150 }, { "epoch": 0.3185497820521691, "grad_norm": 0.17893821001052856, "learning_rate": 1e-05, "loss": 0.5752, "step": 1151 }, { "epoch": 0.3188265412025185, "grad_norm": 0.18501116335391998, "learning_rate": 1e-05, "loss": 0.5953, "step": 1152 }, { "epoch": 0.31910330035286794, "grad_norm": 0.1788380742073059, "learning_rate": 1e-05, "loss": 0.5529, "step": 1153 }, { "epoch": 0.31938005950321735, "grad_norm": 0.1809559464454651, "learning_rate": 1e-05, "loss": 0.5654, "step": 1154 }, { "epoch": 0.3196568186535667, "grad_norm": 0.18129530549049377, "learning_rate": 1e-05, "loss": 0.5735, "step": 1155 }, { "epoch": 0.3199335778039161, "grad_norm": 0.17159980535507202, "learning_rate": 1e-05, "loss": 0.557, "step": 1156 }, { "epoch": 0.32021033695426554, "grad_norm": 0.17021486163139343, "learning_rate": 1e-05, "loss": 0.5565, "step": 1157 }, { "epoch": 0.32048709610461495, "grad_norm": 0.1720835566520691, "learning_rate": 1e-05, "loss": 0.5391, "step": 1158 }, { "epoch": 0.32076385525496437, "grad_norm": 0.18818922340869904, "learning_rate": 1e-05, "loss": 0.577, "step": 1159 }, { "epoch": 0.3210406144053138, "grad_norm": 0.17404362559318542, "learning_rate": 1e-05, "loss": 0.569, "step": 1160 }, { "epoch": 0.3213173735556632, "grad_norm": 0.17887726426124573, "learning_rate": 1e-05, "loss": 0.6116, "step": 1161 }, { "epoch": 0.3215941327060126, "grad_norm": 0.17872290313243866, "learning_rate": 1e-05, "loss": 0.5783, "step": 1162 }, { "epoch": 0.321870891856362, "grad_norm": 0.1773424595594406, "learning_rate": 1e-05, "loss": 0.5626, "step": 1163 }, { "epoch": 0.3221476510067114, "grad_norm": 0.1804758757352829, "learning_rate": 1e-05, "loss": 0.5623, "step": 1164 }, { "epoch": 0.3224244101570608, "grad_norm": 0.1810540109872818, "learning_rate": 1e-05, "loss": 0.5718, "step": 1165 }, { "epoch": 0.3227011693074102, "grad_norm": 0.18068596720695496, "learning_rate": 1e-05, "loss": 0.5398, "step": 1166 }, { "epoch": 0.32297792845775963, "grad_norm": 0.1842799186706543, "learning_rate": 1e-05, "loss": 0.5642, "step": 1167 }, { "epoch": 0.32325468760810905, "grad_norm": 0.1762673705816269, "learning_rate": 1e-05, "loss": 0.6069, "step": 1168 }, { "epoch": 0.32353144675845846, "grad_norm": 0.18561576306819916, "learning_rate": 1e-05, "loss": 0.5519, "step": 1169 }, { "epoch": 0.3238082059088079, "grad_norm": 0.17434601485729218, "learning_rate": 1e-05, "loss": 0.5475, "step": 1170 }, { "epoch": 0.3240849650591573, "grad_norm": 0.17691347002983093, "learning_rate": 1e-05, "loss": 0.5934, "step": 1171 }, { "epoch": 0.32436172420950665, "grad_norm": 0.18074361979961395, "learning_rate": 1e-05, "loss": 0.5636, "step": 1172 }, { "epoch": 0.32463848335985607, "grad_norm": 0.18318262696266174, "learning_rate": 1e-05, "loss": 0.5469, "step": 1173 }, { "epoch": 0.3249152425102055, "grad_norm": 0.18467363715171814, "learning_rate": 1e-05, "loss": 0.5508, "step": 1174 }, { "epoch": 0.3251920016605549, "grad_norm": 0.1816830039024353, "learning_rate": 1e-05, "loss": 0.587, "step": 1175 }, { "epoch": 0.3254687608109043, "grad_norm": 0.18347911536693573, "learning_rate": 1e-05, "loss": 0.5791, "step": 1176 }, { "epoch": 0.3257455199612537, "grad_norm": 0.17915275692939758, "learning_rate": 1e-05, "loss": 0.5578, "step": 1177 }, { "epoch": 0.32602227911160314, "grad_norm": 0.1740441471338272, "learning_rate": 1e-05, "loss": 0.5574, "step": 1178 }, { "epoch": 0.32629903826195256, "grad_norm": 0.17013631761074066, "learning_rate": 1e-05, "loss": 0.5882, "step": 1179 }, { "epoch": 0.32657579741230197, "grad_norm": 0.1750791072845459, "learning_rate": 1e-05, "loss": 0.581, "step": 1180 }, { "epoch": 0.32685255656265133, "grad_norm": 0.17421357333660126, "learning_rate": 1e-05, "loss": 0.5822, "step": 1181 }, { "epoch": 0.32712931571300075, "grad_norm": 0.18301914632320404, "learning_rate": 1e-05, "loss": 0.5457, "step": 1182 }, { "epoch": 0.32740607486335016, "grad_norm": 0.18373806774616241, "learning_rate": 1e-05, "loss": 0.5815, "step": 1183 }, { "epoch": 0.3276828340136996, "grad_norm": 0.19292868673801422, "learning_rate": 1e-05, "loss": 0.5985, "step": 1184 }, { "epoch": 0.327959593164049, "grad_norm": 0.1773187220096588, "learning_rate": 1e-05, "loss": 0.5779, "step": 1185 }, { "epoch": 0.3282363523143984, "grad_norm": 0.17676299810409546, "learning_rate": 1e-05, "loss": 0.5785, "step": 1186 }, { "epoch": 0.3285131114647478, "grad_norm": 0.18739758431911469, "learning_rate": 1e-05, "loss": 0.5779, "step": 1187 }, { "epoch": 0.32878987061509723, "grad_norm": 0.17793011665344238, "learning_rate": 1e-05, "loss": 0.5721, "step": 1188 }, { "epoch": 0.3290666297654466, "grad_norm": 0.1792822778224945, "learning_rate": 1e-05, "loss": 0.5547, "step": 1189 }, { "epoch": 0.329343388915796, "grad_norm": 0.17810599505901337, "learning_rate": 1e-05, "loss": 0.5676, "step": 1190 }, { "epoch": 0.3296201480661454, "grad_norm": 0.18236035108566284, "learning_rate": 1e-05, "loss": 0.5671, "step": 1191 }, { "epoch": 0.32989690721649484, "grad_norm": 0.1838209182024002, "learning_rate": 1e-05, "loss": 0.5752, "step": 1192 }, { "epoch": 0.33017366636684425, "grad_norm": 0.17947471141815186, "learning_rate": 1e-05, "loss": 0.5856, "step": 1193 }, { "epoch": 0.33045042551719367, "grad_norm": 0.18528112769126892, "learning_rate": 1e-05, "loss": 0.5872, "step": 1194 }, { "epoch": 0.3307271846675431, "grad_norm": 0.18420414626598358, "learning_rate": 1e-05, "loss": 0.586, "step": 1195 }, { "epoch": 0.3310039438178925, "grad_norm": 0.18042173981666565, "learning_rate": 1e-05, "loss": 0.5864, "step": 1196 }, { "epoch": 0.3312807029682419, "grad_norm": 0.1881732940673828, "learning_rate": 1e-05, "loss": 0.5812, "step": 1197 }, { "epoch": 0.3315574621185913, "grad_norm": 0.1695108413696289, "learning_rate": 1e-05, "loss": 0.5792, "step": 1198 }, { "epoch": 0.3318342212689407, "grad_norm": 0.17240653932094574, "learning_rate": 1e-05, "loss": 0.5648, "step": 1199 }, { "epoch": 0.3321109804192901, "grad_norm": 0.18106268346309662, "learning_rate": 1e-05, "loss": 0.5691, "step": 1200 }, { "epoch": 0.3323877395696395, "grad_norm": 0.18471741676330566, "learning_rate": 1e-05, "loss": 0.6007, "step": 1201 }, { "epoch": 0.33266449871998893, "grad_norm": 0.1808074712753296, "learning_rate": 1e-05, "loss": 0.5946, "step": 1202 }, { "epoch": 0.33294125787033835, "grad_norm": 0.18883252143859863, "learning_rate": 1e-05, "loss": 0.5572, "step": 1203 }, { "epoch": 0.33321801702068776, "grad_norm": 0.1751684546470642, "learning_rate": 1e-05, "loss": 0.5545, "step": 1204 }, { "epoch": 0.3334947761710372, "grad_norm": 0.17845529317855835, "learning_rate": 1e-05, "loss": 0.5768, "step": 1205 }, { "epoch": 0.33377153532138654, "grad_norm": 0.17809702455997467, "learning_rate": 1e-05, "loss": 0.5765, "step": 1206 }, { "epoch": 0.33404829447173595, "grad_norm": 0.18065202236175537, "learning_rate": 1e-05, "loss": 0.5614, "step": 1207 }, { "epoch": 0.33432505362208537, "grad_norm": 0.17171914875507355, "learning_rate": 1e-05, "loss": 0.5784, "step": 1208 }, { "epoch": 0.3346018127724348, "grad_norm": 0.16915443539619446, "learning_rate": 1e-05, "loss": 0.5662, "step": 1209 }, { "epoch": 0.3348785719227842, "grad_norm": 0.18340595066547394, "learning_rate": 1e-05, "loss": 0.5844, "step": 1210 }, { "epoch": 0.3351553310731336, "grad_norm": 0.18861714005470276, "learning_rate": 1e-05, "loss": 0.5637, "step": 1211 }, { "epoch": 0.335432090223483, "grad_norm": 0.1753142774105072, "learning_rate": 1e-05, "loss": 0.5723, "step": 1212 }, { "epoch": 0.33570884937383244, "grad_norm": 0.17367449402809143, "learning_rate": 1e-05, "loss": 0.5518, "step": 1213 }, { "epoch": 0.33598560852418186, "grad_norm": 0.1776447743177414, "learning_rate": 1e-05, "loss": 0.5765, "step": 1214 }, { "epoch": 0.3362623676745312, "grad_norm": 0.17684847116470337, "learning_rate": 1e-05, "loss": 0.5578, "step": 1215 }, { "epoch": 0.33653912682488063, "grad_norm": 0.18362925946712494, "learning_rate": 1e-05, "loss": 0.5518, "step": 1216 }, { "epoch": 0.33681588597523004, "grad_norm": 0.1788303256034851, "learning_rate": 1e-05, "loss": 0.5699, "step": 1217 }, { "epoch": 0.33709264512557946, "grad_norm": 0.18397581577301025, "learning_rate": 1e-05, "loss": 0.5782, "step": 1218 }, { "epoch": 0.3373694042759289, "grad_norm": 0.18542160093784332, "learning_rate": 1e-05, "loss": 0.5864, "step": 1219 }, { "epoch": 0.3376461634262783, "grad_norm": 0.1796381175518036, "learning_rate": 1e-05, "loss": 0.5918, "step": 1220 }, { "epoch": 0.3379229225766277, "grad_norm": 0.1706746369600296, "learning_rate": 1e-05, "loss": 0.5651, "step": 1221 }, { "epoch": 0.3381996817269771, "grad_norm": 0.17457415163516998, "learning_rate": 1e-05, "loss": 0.5524, "step": 1222 }, { "epoch": 0.33847644087732653, "grad_norm": 0.17644447088241577, "learning_rate": 1e-05, "loss": 0.5866, "step": 1223 }, { "epoch": 0.3387532000276759, "grad_norm": 0.1763908416032791, "learning_rate": 1e-05, "loss": 0.591, "step": 1224 }, { "epoch": 0.3390299591780253, "grad_norm": 0.17792542278766632, "learning_rate": 1e-05, "loss": 0.5626, "step": 1225 }, { "epoch": 0.3393067183283747, "grad_norm": 0.17699183523654938, "learning_rate": 1e-05, "loss": 0.5859, "step": 1226 }, { "epoch": 0.33958347747872414, "grad_norm": 0.1986238658428192, "learning_rate": 1e-05, "loss": 0.558, "step": 1227 }, { "epoch": 0.33986023662907355, "grad_norm": 0.17943307757377625, "learning_rate": 1e-05, "loss": 0.6135, "step": 1228 }, { "epoch": 0.34013699577942297, "grad_norm": 0.18312916159629822, "learning_rate": 1e-05, "loss": 0.6019, "step": 1229 }, { "epoch": 0.3404137549297724, "grad_norm": 0.17561538517475128, "learning_rate": 1e-05, "loss": 0.5764, "step": 1230 }, { "epoch": 0.3406905140801218, "grad_norm": 0.19040356576442719, "learning_rate": 1e-05, "loss": 0.5718, "step": 1231 }, { "epoch": 0.34096727323047116, "grad_norm": 0.1986088901758194, "learning_rate": 1e-05, "loss": 0.5945, "step": 1232 }, { "epoch": 0.34124403238082057, "grad_norm": 0.17363159358501434, "learning_rate": 1e-05, "loss": 0.5835, "step": 1233 }, { "epoch": 0.34152079153117, "grad_norm": 0.1730291098356247, "learning_rate": 1e-05, "loss": 0.5988, "step": 1234 }, { "epoch": 0.3417975506815194, "grad_norm": 0.17816130816936493, "learning_rate": 1e-05, "loss": 0.5535, "step": 1235 }, { "epoch": 0.3420743098318688, "grad_norm": 0.18265485763549805, "learning_rate": 1e-05, "loss": 0.5651, "step": 1236 }, { "epoch": 0.34235106898221823, "grad_norm": 0.18635882437229156, "learning_rate": 1e-05, "loss": 0.5747, "step": 1237 }, { "epoch": 0.34262782813256765, "grad_norm": 0.179630845785141, "learning_rate": 1e-05, "loss": 0.5904, "step": 1238 }, { "epoch": 0.34290458728291706, "grad_norm": 0.18926902115345, "learning_rate": 1e-05, "loss": 0.5852, "step": 1239 }, { "epoch": 0.3431813464332665, "grad_norm": 0.19057226181030273, "learning_rate": 1e-05, "loss": 0.6173, "step": 1240 }, { "epoch": 0.34345810558361584, "grad_norm": 0.1787969022989273, "learning_rate": 1e-05, "loss": 0.5625, "step": 1241 }, { "epoch": 0.34373486473396525, "grad_norm": 0.1967000961303711, "learning_rate": 1e-05, "loss": 0.5689, "step": 1242 }, { "epoch": 0.34401162388431467, "grad_norm": 0.18241441249847412, "learning_rate": 1e-05, "loss": 0.5761, "step": 1243 }, { "epoch": 0.3442883830346641, "grad_norm": 0.18552668392658234, "learning_rate": 1e-05, "loss": 0.5863, "step": 1244 }, { "epoch": 0.3445651421850135, "grad_norm": 0.17579784989356995, "learning_rate": 1e-05, "loss": 0.5825, "step": 1245 }, { "epoch": 0.3448419013353629, "grad_norm": 0.17899160087108612, "learning_rate": 1e-05, "loss": 0.5615, "step": 1246 }, { "epoch": 0.3451186604857123, "grad_norm": 0.18136009573936462, "learning_rate": 1e-05, "loss": 0.5906, "step": 1247 }, { "epoch": 0.34539541963606174, "grad_norm": 0.17387574911117554, "learning_rate": 1e-05, "loss": 0.5845, "step": 1248 }, { "epoch": 0.3456721787864111, "grad_norm": 0.18307487666606903, "learning_rate": 1e-05, "loss": 0.5647, "step": 1249 }, { "epoch": 0.3459489379367605, "grad_norm": 0.17705769836902618, "learning_rate": 1e-05, "loss": 0.5442, "step": 1250 }, { "epoch": 0.34622569708710993, "grad_norm": 0.17568045854568481, "learning_rate": 1e-05, "loss": 0.5565, "step": 1251 }, { "epoch": 0.34650245623745934, "grad_norm": 0.1754060983657837, "learning_rate": 1e-05, "loss": 0.5869, "step": 1252 }, { "epoch": 0.34677921538780876, "grad_norm": 0.18490305542945862, "learning_rate": 1e-05, "loss": 0.5959, "step": 1253 }, { "epoch": 0.3470559745381582, "grad_norm": 0.1775590032339096, "learning_rate": 1e-05, "loss": 0.5755, "step": 1254 }, { "epoch": 0.3473327336885076, "grad_norm": 0.17191371321678162, "learning_rate": 1e-05, "loss": 0.5794, "step": 1255 }, { "epoch": 0.347609492838857, "grad_norm": 0.1782778650522232, "learning_rate": 1e-05, "loss": 0.564, "step": 1256 }, { "epoch": 0.3478862519892064, "grad_norm": 0.18097220361232758, "learning_rate": 1e-05, "loss": 0.5664, "step": 1257 }, { "epoch": 0.3481630111395558, "grad_norm": 0.18284441530704498, "learning_rate": 1e-05, "loss": 0.5929, "step": 1258 }, { "epoch": 0.3484397702899052, "grad_norm": 0.1839774250984192, "learning_rate": 1e-05, "loss": 0.5787, "step": 1259 }, { "epoch": 0.3487165294402546, "grad_norm": 0.173128142952919, "learning_rate": 1e-05, "loss": 0.5584, "step": 1260 }, { "epoch": 0.348993288590604, "grad_norm": 0.1813679188489914, "learning_rate": 1e-05, "loss": 0.583, "step": 1261 }, { "epoch": 0.34927004774095344, "grad_norm": 0.17745116353034973, "learning_rate": 1e-05, "loss": 0.5933, "step": 1262 }, { "epoch": 0.34954680689130285, "grad_norm": 0.17124223709106445, "learning_rate": 1e-05, "loss": 0.5876, "step": 1263 }, { "epoch": 0.34982356604165227, "grad_norm": 0.17909519374370575, "learning_rate": 1e-05, "loss": 0.5474, "step": 1264 }, { "epoch": 0.3501003251920017, "grad_norm": 0.18730181455612183, "learning_rate": 1e-05, "loss": 0.5406, "step": 1265 }, { "epoch": 0.35037708434235104, "grad_norm": 0.1790524125099182, "learning_rate": 1e-05, "loss": 0.5641, "step": 1266 }, { "epoch": 0.35065384349270046, "grad_norm": 0.18674589693546295, "learning_rate": 1e-05, "loss": 0.5885, "step": 1267 }, { "epoch": 0.35093060264304987, "grad_norm": 0.183821439743042, "learning_rate": 1e-05, "loss": 0.5708, "step": 1268 }, { "epoch": 0.3512073617933993, "grad_norm": 0.17605483531951904, "learning_rate": 1e-05, "loss": 0.6017, "step": 1269 }, { "epoch": 0.3514841209437487, "grad_norm": 0.18812601268291473, "learning_rate": 1e-05, "loss": 0.6021, "step": 1270 }, { "epoch": 0.3517608800940981, "grad_norm": 0.17811711132526398, "learning_rate": 1e-05, "loss": 0.6042, "step": 1271 }, { "epoch": 0.35203763924444753, "grad_norm": 0.17999190092086792, "learning_rate": 1e-05, "loss": 0.5779, "step": 1272 }, { "epoch": 0.35231439839479695, "grad_norm": 0.173455610871315, "learning_rate": 1e-05, "loss": 0.5913, "step": 1273 }, { "epoch": 0.35259115754514636, "grad_norm": 0.18330033123493195, "learning_rate": 1e-05, "loss": 0.5826, "step": 1274 }, { "epoch": 0.3528679166954957, "grad_norm": 0.17858175933361053, "learning_rate": 1e-05, "loss": 0.5631, "step": 1275 }, { "epoch": 0.35314467584584514, "grad_norm": 0.17316441237926483, "learning_rate": 1e-05, "loss": 0.575, "step": 1276 }, { "epoch": 0.35342143499619455, "grad_norm": 0.1814458966255188, "learning_rate": 1e-05, "loss": 0.5634, "step": 1277 }, { "epoch": 0.35369819414654396, "grad_norm": 0.17839571833610535, "learning_rate": 1e-05, "loss": 0.5837, "step": 1278 }, { "epoch": 0.3539749532968934, "grad_norm": 0.1856490969657898, "learning_rate": 1e-05, "loss": 0.5996, "step": 1279 }, { "epoch": 0.3542517124472428, "grad_norm": 0.17253237962722778, "learning_rate": 1e-05, "loss": 0.5785, "step": 1280 }, { "epoch": 0.3545284715975922, "grad_norm": 0.18440473079681396, "learning_rate": 1e-05, "loss": 0.5778, "step": 1281 }, { "epoch": 0.3548052307479416, "grad_norm": 0.1808757185935974, "learning_rate": 1e-05, "loss": 0.5721, "step": 1282 }, { "epoch": 0.35508198989829104, "grad_norm": 0.18208785355091095, "learning_rate": 1e-05, "loss": 0.6041, "step": 1283 }, { "epoch": 0.3553587490486404, "grad_norm": 0.17686186730861664, "learning_rate": 1e-05, "loss": 0.5825, "step": 1284 }, { "epoch": 0.3556355081989898, "grad_norm": 0.18593569099903107, "learning_rate": 1e-05, "loss": 0.5818, "step": 1285 }, { "epoch": 0.35591226734933923, "grad_norm": 0.17056292295455933, "learning_rate": 1e-05, "loss": 0.5782, "step": 1286 }, { "epoch": 0.35618902649968864, "grad_norm": 0.17556306719779968, "learning_rate": 1e-05, "loss": 0.5478, "step": 1287 }, { "epoch": 0.35646578565003806, "grad_norm": 0.18443161249160767, "learning_rate": 1e-05, "loss": 0.549, "step": 1288 }, { "epoch": 0.3567425448003875, "grad_norm": 0.17349442839622498, "learning_rate": 1e-05, "loss": 0.5192, "step": 1289 }, { "epoch": 0.3570193039507369, "grad_norm": 0.18383587896823883, "learning_rate": 1e-05, "loss": 0.5657, "step": 1290 }, { "epoch": 0.3572960631010863, "grad_norm": 0.18513290584087372, "learning_rate": 1e-05, "loss": 0.5891, "step": 1291 }, { "epoch": 0.35757282225143566, "grad_norm": 0.1750076860189438, "learning_rate": 1e-05, "loss": 0.5613, "step": 1292 }, { "epoch": 0.3578495814017851, "grad_norm": 0.17715036869049072, "learning_rate": 1e-05, "loss": 0.582, "step": 1293 }, { "epoch": 0.3581263405521345, "grad_norm": 0.1738748699426651, "learning_rate": 1e-05, "loss": 0.5583, "step": 1294 }, { "epoch": 0.3584030997024839, "grad_norm": 0.18469014763832092, "learning_rate": 1e-05, "loss": 0.565, "step": 1295 }, { "epoch": 0.3586798588528333, "grad_norm": 0.17423366010189056, "learning_rate": 1e-05, "loss": 0.581, "step": 1296 }, { "epoch": 0.35895661800318274, "grad_norm": 0.1778155267238617, "learning_rate": 1e-05, "loss": 0.5759, "step": 1297 }, { "epoch": 0.35923337715353215, "grad_norm": 0.17402751743793488, "learning_rate": 1e-05, "loss": 0.5428, "step": 1298 }, { "epoch": 0.35951013630388157, "grad_norm": 0.17830702662467957, "learning_rate": 1e-05, "loss": 0.5778, "step": 1299 }, { "epoch": 0.359786895454231, "grad_norm": 0.1705295294523239, "learning_rate": 1e-05, "loss": 0.5754, "step": 1300 }, { "epoch": 0.36006365460458034, "grad_norm": 0.1792924851179123, "learning_rate": 1e-05, "loss": 0.5721, "step": 1301 }, { "epoch": 0.36034041375492976, "grad_norm": 0.18146555125713348, "learning_rate": 1e-05, "loss": 0.5638, "step": 1302 }, { "epoch": 0.36061717290527917, "grad_norm": 0.18029305338859558, "learning_rate": 1e-05, "loss": 0.563, "step": 1303 }, { "epoch": 0.3608939320556286, "grad_norm": 0.17676225304603577, "learning_rate": 1e-05, "loss": 0.5782, "step": 1304 }, { "epoch": 0.361170691205978, "grad_norm": 0.17272453010082245, "learning_rate": 1e-05, "loss": 0.5744, "step": 1305 }, { "epoch": 0.3614474503563274, "grad_norm": 0.17404848337173462, "learning_rate": 1e-05, "loss": 0.5847, "step": 1306 }, { "epoch": 0.36172420950667683, "grad_norm": 0.1776416003704071, "learning_rate": 1e-05, "loss": 0.5878, "step": 1307 }, { "epoch": 0.36200096865702625, "grad_norm": 0.17175796627998352, "learning_rate": 1e-05, "loss": 0.5631, "step": 1308 }, { "epoch": 0.3622777278073756, "grad_norm": 0.18162235617637634, "learning_rate": 1e-05, "loss": 0.5666, "step": 1309 }, { "epoch": 0.362554486957725, "grad_norm": 0.23053984344005585, "learning_rate": 1e-05, "loss": 0.5788, "step": 1310 }, { "epoch": 0.36283124610807443, "grad_norm": 0.20170247554779053, "learning_rate": 1e-05, "loss": 0.6026, "step": 1311 }, { "epoch": 0.36310800525842385, "grad_norm": 0.17960363626480103, "learning_rate": 1e-05, "loss": 0.5517, "step": 1312 }, { "epoch": 0.36338476440877326, "grad_norm": 0.18425993621349335, "learning_rate": 1e-05, "loss": 0.601, "step": 1313 }, { "epoch": 0.3636615235591227, "grad_norm": 0.18029506504535675, "learning_rate": 1e-05, "loss": 0.5657, "step": 1314 }, { "epoch": 0.3639382827094721, "grad_norm": 0.17633673548698425, "learning_rate": 1e-05, "loss": 0.5766, "step": 1315 }, { "epoch": 0.3642150418598215, "grad_norm": 0.18686938285827637, "learning_rate": 1e-05, "loss": 0.5934, "step": 1316 }, { "epoch": 0.3644918010101709, "grad_norm": 0.1868416666984558, "learning_rate": 1e-05, "loss": 0.5783, "step": 1317 }, { "epoch": 0.3647685601605203, "grad_norm": 0.1759416162967682, "learning_rate": 1e-05, "loss": 0.5758, "step": 1318 }, { "epoch": 0.3650453193108697, "grad_norm": 0.18988794088363647, "learning_rate": 1e-05, "loss": 0.5839, "step": 1319 }, { "epoch": 0.3653220784612191, "grad_norm": 0.1702679991722107, "learning_rate": 1e-05, "loss": 0.6025, "step": 1320 }, { "epoch": 0.36559883761156853, "grad_norm": 0.1800035834312439, "learning_rate": 1e-05, "loss": 0.606, "step": 1321 }, { "epoch": 0.36587559676191794, "grad_norm": 0.18977336585521698, "learning_rate": 1e-05, "loss": 0.5805, "step": 1322 }, { "epoch": 0.36615235591226736, "grad_norm": 0.1749512106180191, "learning_rate": 1e-05, "loss": 0.5266, "step": 1323 }, { "epoch": 0.3664291150626168, "grad_norm": 0.17485444247722626, "learning_rate": 1e-05, "loss": 0.5485, "step": 1324 }, { "epoch": 0.3667058742129662, "grad_norm": 0.17978151142597198, "learning_rate": 1e-05, "loss": 0.5658, "step": 1325 }, { "epoch": 0.3669826333633156, "grad_norm": 0.1699856072664261, "learning_rate": 1e-05, "loss": 0.5503, "step": 1326 }, { "epoch": 0.36725939251366496, "grad_norm": 0.18294887244701385, "learning_rate": 1e-05, "loss": 0.588, "step": 1327 }, { "epoch": 0.3675361516640144, "grad_norm": 0.17894300818443298, "learning_rate": 1e-05, "loss": 0.5548, "step": 1328 }, { "epoch": 0.3678129108143638, "grad_norm": 0.17654037475585938, "learning_rate": 1e-05, "loss": 0.6003, "step": 1329 }, { "epoch": 0.3680896699647132, "grad_norm": 0.16807270050048828, "learning_rate": 1e-05, "loss": 0.5305, "step": 1330 }, { "epoch": 0.3683664291150626, "grad_norm": 0.17064695060253143, "learning_rate": 1e-05, "loss": 0.5825, "step": 1331 }, { "epoch": 0.36864318826541204, "grad_norm": 0.1788780242204666, "learning_rate": 1e-05, "loss": 0.551, "step": 1332 }, { "epoch": 0.36891994741576145, "grad_norm": 0.17734064161777496, "learning_rate": 1e-05, "loss": 0.5713, "step": 1333 }, { "epoch": 0.36919670656611087, "grad_norm": 0.1875457763671875, "learning_rate": 1e-05, "loss": 0.5888, "step": 1334 }, { "epoch": 0.3694734657164602, "grad_norm": 0.17750950157642365, "learning_rate": 1e-05, "loss": 0.5286, "step": 1335 }, { "epoch": 0.36975022486680964, "grad_norm": 0.1799815148115158, "learning_rate": 1e-05, "loss": 0.5754, "step": 1336 }, { "epoch": 0.37002698401715906, "grad_norm": 0.17196987569332123, "learning_rate": 1e-05, "loss": 0.5534, "step": 1337 }, { "epoch": 0.37030374316750847, "grad_norm": 0.17997057735919952, "learning_rate": 1e-05, "loss": 0.5702, "step": 1338 }, { "epoch": 0.3705805023178579, "grad_norm": 0.17868083715438843, "learning_rate": 1e-05, "loss": 0.5706, "step": 1339 }, { "epoch": 0.3708572614682073, "grad_norm": 0.18089479207992554, "learning_rate": 1e-05, "loss": 0.5637, "step": 1340 }, { "epoch": 0.3711340206185567, "grad_norm": 0.17298699915409088, "learning_rate": 1e-05, "loss": 0.5728, "step": 1341 }, { "epoch": 0.37141077976890613, "grad_norm": 0.17566537857055664, "learning_rate": 1e-05, "loss": 0.5824, "step": 1342 }, { "epoch": 0.37168753891925554, "grad_norm": 0.1805431842803955, "learning_rate": 1e-05, "loss": 0.6057, "step": 1343 }, { "epoch": 0.3719642980696049, "grad_norm": 0.17862454056739807, "learning_rate": 1e-05, "loss": 0.5811, "step": 1344 }, { "epoch": 0.3722410572199543, "grad_norm": 0.17758965492248535, "learning_rate": 1e-05, "loss": 0.5656, "step": 1345 }, { "epoch": 0.37251781637030373, "grad_norm": 0.1677071452140808, "learning_rate": 1e-05, "loss": 0.5712, "step": 1346 }, { "epoch": 0.37279457552065315, "grad_norm": 0.17777714133262634, "learning_rate": 1e-05, "loss": 0.5778, "step": 1347 }, { "epoch": 0.37307133467100256, "grad_norm": 0.17409195005893707, "learning_rate": 1e-05, "loss": 0.5759, "step": 1348 }, { "epoch": 0.373348093821352, "grad_norm": 0.16907383501529694, "learning_rate": 1e-05, "loss": 0.5735, "step": 1349 }, { "epoch": 0.3736248529717014, "grad_norm": 0.18259595334529877, "learning_rate": 1e-05, "loss": 0.5726, "step": 1350 }, { "epoch": 0.3739016121220508, "grad_norm": 0.1733747124671936, "learning_rate": 1e-05, "loss": 0.5373, "step": 1351 }, { "epoch": 0.37417837127240017, "grad_norm": 0.17612150311470032, "learning_rate": 1e-05, "loss": 0.5559, "step": 1352 }, { "epoch": 0.3744551304227496, "grad_norm": 0.1701641082763672, "learning_rate": 1e-05, "loss": 0.5509, "step": 1353 }, { "epoch": 0.374731889573099, "grad_norm": 0.1735294610261917, "learning_rate": 1e-05, "loss": 0.55, "step": 1354 }, { "epoch": 0.3750086487234484, "grad_norm": 0.17100246250629425, "learning_rate": 1e-05, "loss": 0.5303, "step": 1355 }, { "epoch": 0.3752854078737978, "grad_norm": 0.17124801874160767, "learning_rate": 1e-05, "loss": 0.5821, "step": 1356 }, { "epoch": 0.37556216702414724, "grad_norm": 0.1736711710691452, "learning_rate": 1e-05, "loss": 0.5229, "step": 1357 }, { "epoch": 0.37583892617449666, "grad_norm": 0.17066577076911926, "learning_rate": 1e-05, "loss": 0.5285, "step": 1358 }, { "epoch": 0.37611568532484607, "grad_norm": 0.18859383463859558, "learning_rate": 1e-05, "loss": 0.5887, "step": 1359 }, { "epoch": 0.3763924444751955, "grad_norm": 0.19501204788684845, "learning_rate": 1e-05, "loss": 0.621, "step": 1360 }, { "epoch": 0.37666920362554485, "grad_norm": 0.1812286227941513, "learning_rate": 1e-05, "loss": 0.5617, "step": 1361 }, { "epoch": 0.37694596277589426, "grad_norm": 0.19356560707092285, "learning_rate": 1e-05, "loss": 0.565, "step": 1362 }, { "epoch": 0.3772227219262437, "grad_norm": 0.16654685139656067, "learning_rate": 1e-05, "loss": 0.5434, "step": 1363 }, { "epoch": 0.3774994810765931, "grad_norm": 0.17611536383628845, "learning_rate": 1e-05, "loss": 0.5628, "step": 1364 }, { "epoch": 0.3777762402269425, "grad_norm": 0.17856952548027039, "learning_rate": 1e-05, "loss": 0.5411, "step": 1365 }, { "epoch": 0.3780529993772919, "grad_norm": 0.18374161422252655, "learning_rate": 1e-05, "loss": 0.5942, "step": 1366 }, { "epoch": 0.37832975852764134, "grad_norm": 0.1982612907886505, "learning_rate": 1e-05, "loss": 0.5284, "step": 1367 }, { "epoch": 0.37860651767799075, "grad_norm": 0.18437409400939941, "learning_rate": 1e-05, "loss": 0.56, "step": 1368 }, { "epoch": 0.3788832768283401, "grad_norm": 0.19086268544197083, "learning_rate": 1e-05, "loss": 0.5777, "step": 1369 }, { "epoch": 0.3791600359786895, "grad_norm": 0.16857413947582245, "learning_rate": 1e-05, "loss": 0.55, "step": 1370 }, { "epoch": 0.37943679512903894, "grad_norm": 0.1716468334197998, "learning_rate": 1e-05, "loss": 0.5559, "step": 1371 }, { "epoch": 0.37971355427938835, "grad_norm": 0.18028302490711212, "learning_rate": 1e-05, "loss": 0.5889, "step": 1372 }, { "epoch": 0.37999031342973777, "grad_norm": 0.17792348563671112, "learning_rate": 1e-05, "loss": 0.5674, "step": 1373 }, { "epoch": 0.3802670725800872, "grad_norm": 0.17950235307216644, "learning_rate": 1e-05, "loss": 0.6037, "step": 1374 }, { "epoch": 0.3805438317304366, "grad_norm": 0.18818999826908112, "learning_rate": 1e-05, "loss": 0.5642, "step": 1375 }, { "epoch": 0.380820590880786, "grad_norm": 0.17635668814182281, "learning_rate": 1e-05, "loss": 0.5646, "step": 1376 }, { "epoch": 0.38109735003113543, "grad_norm": 0.18233786523342133, "learning_rate": 1e-05, "loss": 0.5714, "step": 1377 }, { "epoch": 0.3813741091814848, "grad_norm": 0.1851387917995453, "learning_rate": 1e-05, "loss": 0.5683, "step": 1378 }, { "epoch": 0.3816508683318342, "grad_norm": 0.18857987225055695, "learning_rate": 1e-05, "loss": 0.5837, "step": 1379 }, { "epoch": 0.3819276274821836, "grad_norm": 0.1758527159690857, "learning_rate": 1e-05, "loss": 0.5775, "step": 1380 }, { "epoch": 0.38220438663253303, "grad_norm": 0.1781454235315323, "learning_rate": 1e-05, "loss": 0.5968, "step": 1381 }, { "epoch": 0.38248114578288245, "grad_norm": 0.17969922721385956, "learning_rate": 1e-05, "loss": 0.5966, "step": 1382 }, { "epoch": 0.38275790493323186, "grad_norm": 0.17076238989830017, "learning_rate": 1e-05, "loss": 0.5304, "step": 1383 }, { "epoch": 0.3830346640835813, "grad_norm": 0.201168954372406, "learning_rate": 1e-05, "loss": 0.5751, "step": 1384 }, { "epoch": 0.3833114232339307, "grad_norm": 0.18166275322437286, "learning_rate": 1e-05, "loss": 0.6233, "step": 1385 }, { "epoch": 0.3835881823842801, "grad_norm": 0.18137086927890778, "learning_rate": 1e-05, "loss": 0.5918, "step": 1386 }, { "epoch": 0.38386494153462947, "grad_norm": 0.17399217188358307, "learning_rate": 1e-05, "loss": 0.5551, "step": 1387 }, { "epoch": 0.3841417006849789, "grad_norm": 0.1826900690793991, "learning_rate": 1e-05, "loss": 0.5803, "step": 1388 }, { "epoch": 0.3844184598353283, "grad_norm": 0.16425098478794098, "learning_rate": 1e-05, "loss": 0.5689, "step": 1389 }, { "epoch": 0.3846952189856777, "grad_norm": 0.18056906759738922, "learning_rate": 1e-05, "loss": 0.5749, "step": 1390 }, { "epoch": 0.3849719781360271, "grad_norm": 0.16956400871276855, "learning_rate": 1e-05, "loss": 0.5576, "step": 1391 }, { "epoch": 0.38524873728637654, "grad_norm": 0.18156734108924866, "learning_rate": 1e-05, "loss": 0.5646, "step": 1392 }, { "epoch": 0.38552549643672596, "grad_norm": 0.18770842254161835, "learning_rate": 1e-05, "loss": 0.5653, "step": 1393 }, { "epoch": 0.38580225558707537, "grad_norm": 0.17776712775230408, "learning_rate": 1e-05, "loss": 0.5931, "step": 1394 }, { "epoch": 0.38607901473742473, "grad_norm": 0.17808368802070618, "learning_rate": 1e-05, "loss": 0.5777, "step": 1395 }, { "epoch": 0.38635577388777415, "grad_norm": 0.17634107172489166, "learning_rate": 1e-05, "loss": 0.5997, "step": 1396 }, { "epoch": 0.38663253303812356, "grad_norm": 0.1798306703567505, "learning_rate": 1e-05, "loss": 0.5599, "step": 1397 }, { "epoch": 0.386909292188473, "grad_norm": 0.1664106547832489, "learning_rate": 1e-05, "loss": 0.5641, "step": 1398 }, { "epoch": 0.3871860513388224, "grad_norm": 0.1843496561050415, "learning_rate": 1e-05, "loss": 0.5477, "step": 1399 }, { "epoch": 0.3874628104891718, "grad_norm": 0.19204078614711761, "learning_rate": 1e-05, "loss": 0.5806, "step": 1400 }, { "epoch": 0.3877395696395212, "grad_norm": 0.17585329711437225, "learning_rate": 1e-05, "loss": 0.5504, "step": 1401 }, { "epoch": 0.38801632878987063, "grad_norm": 0.19446296989917755, "learning_rate": 1e-05, "loss": 0.6151, "step": 1402 }, { "epoch": 0.38829308794022005, "grad_norm": 0.17197680473327637, "learning_rate": 1e-05, "loss": 0.5842, "step": 1403 }, { "epoch": 0.3885698470905694, "grad_norm": 0.17503522336483002, "learning_rate": 1e-05, "loss": 0.5734, "step": 1404 }, { "epoch": 0.3888466062409188, "grad_norm": 0.1798526644706726, "learning_rate": 1e-05, "loss": 0.5442, "step": 1405 }, { "epoch": 0.38912336539126824, "grad_norm": 0.16771943867206573, "learning_rate": 1e-05, "loss": 0.527, "step": 1406 }, { "epoch": 0.38940012454161765, "grad_norm": 0.17265450954437256, "learning_rate": 1e-05, "loss": 0.5888, "step": 1407 }, { "epoch": 0.38967688369196707, "grad_norm": 0.17885401844978333, "learning_rate": 1e-05, "loss": 0.5823, "step": 1408 }, { "epoch": 0.3899536428423165, "grad_norm": 0.1970880925655365, "learning_rate": 1e-05, "loss": 0.5973, "step": 1409 }, { "epoch": 0.3902304019926659, "grad_norm": 0.17835885286331177, "learning_rate": 1e-05, "loss": 0.5565, "step": 1410 }, { "epoch": 0.3905071611430153, "grad_norm": 0.1702946424484253, "learning_rate": 1e-05, "loss": 0.5891, "step": 1411 }, { "epoch": 0.3907839202933647, "grad_norm": 0.1737968772649765, "learning_rate": 1e-05, "loss": 0.5281, "step": 1412 }, { "epoch": 0.3910606794437141, "grad_norm": 0.17288625240325928, "learning_rate": 1e-05, "loss": 0.5914, "step": 1413 }, { "epoch": 0.3913374385940635, "grad_norm": 0.17397888004779816, "learning_rate": 1e-05, "loss": 0.5669, "step": 1414 }, { "epoch": 0.3916141977444129, "grad_norm": 0.1815626174211502, "learning_rate": 1e-05, "loss": 0.5517, "step": 1415 }, { "epoch": 0.39189095689476233, "grad_norm": 0.1842496246099472, "learning_rate": 1e-05, "loss": 0.6041, "step": 1416 }, { "epoch": 0.39216771604511175, "grad_norm": 0.18435779213905334, "learning_rate": 1e-05, "loss": 0.5767, "step": 1417 }, { "epoch": 0.39244447519546116, "grad_norm": 0.17786039412021637, "learning_rate": 1e-05, "loss": 0.5587, "step": 1418 }, { "epoch": 0.3927212343458106, "grad_norm": 0.17147943377494812, "learning_rate": 1e-05, "loss": 0.558, "step": 1419 }, { "epoch": 0.39299799349616, "grad_norm": 0.1888813078403473, "learning_rate": 1e-05, "loss": 0.5767, "step": 1420 }, { "epoch": 0.39327475264650935, "grad_norm": 0.17537909746170044, "learning_rate": 1e-05, "loss": 0.5683, "step": 1421 }, { "epoch": 0.39355151179685877, "grad_norm": 0.18005721271038055, "learning_rate": 1e-05, "loss": 0.5549, "step": 1422 }, { "epoch": 0.3938282709472082, "grad_norm": 0.17395645380020142, "learning_rate": 1e-05, "loss": 0.5725, "step": 1423 }, { "epoch": 0.3941050300975576, "grad_norm": 0.18527908623218536, "learning_rate": 1e-05, "loss": 0.5886, "step": 1424 }, { "epoch": 0.394381789247907, "grad_norm": 0.1726376712322235, "learning_rate": 1e-05, "loss": 0.5742, "step": 1425 }, { "epoch": 0.3946585483982564, "grad_norm": 0.17705903947353363, "learning_rate": 1e-05, "loss": 0.5863, "step": 1426 }, { "epoch": 0.39493530754860584, "grad_norm": 0.17310704290866852, "learning_rate": 1e-05, "loss": 0.5701, "step": 1427 }, { "epoch": 0.39521206669895526, "grad_norm": 0.17833495140075684, "learning_rate": 1e-05, "loss": 0.5654, "step": 1428 }, { "epoch": 0.3954888258493046, "grad_norm": 0.18563607335090637, "learning_rate": 1e-05, "loss": 0.5765, "step": 1429 }, { "epoch": 0.39576558499965403, "grad_norm": 0.17989401519298553, "learning_rate": 1e-05, "loss": 0.5826, "step": 1430 }, { "epoch": 0.39604234415000344, "grad_norm": 0.18319517374038696, "learning_rate": 1e-05, "loss": 0.5666, "step": 1431 }, { "epoch": 0.39631910330035286, "grad_norm": 0.18624986708164215, "learning_rate": 1e-05, "loss": 0.5731, "step": 1432 }, { "epoch": 0.3965958624507023, "grad_norm": 0.17812664806842804, "learning_rate": 1e-05, "loss": 0.5673, "step": 1433 }, { "epoch": 0.3968726216010517, "grad_norm": 0.17763857543468475, "learning_rate": 1e-05, "loss": 0.5584, "step": 1434 }, { "epoch": 0.3971493807514011, "grad_norm": 0.18279053270816803, "learning_rate": 1e-05, "loss": 0.5861, "step": 1435 }, { "epoch": 0.3974261399017505, "grad_norm": 0.1726977378129959, "learning_rate": 1e-05, "loss": 0.5538, "step": 1436 }, { "epoch": 0.39770289905209993, "grad_norm": 0.1859642118215561, "learning_rate": 1e-05, "loss": 0.5408, "step": 1437 }, { "epoch": 0.3979796582024493, "grad_norm": 0.18366074562072754, "learning_rate": 1e-05, "loss": 0.5646, "step": 1438 }, { "epoch": 0.3982564173527987, "grad_norm": 0.16897962987422943, "learning_rate": 1e-05, "loss": 0.5666, "step": 1439 }, { "epoch": 0.3985331765031481, "grad_norm": 0.18271571397781372, "learning_rate": 1e-05, "loss": 0.5722, "step": 1440 }, { "epoch": 0.39880993565349754, "grad_norm": 0.18231238424777985, "learning_rate": 1e-05, "loss": 0.5824, "step": 1441 }, { "epoch": 0.39908669480384695, "grad_norm": 0.16999293863773346, "learning_rate": 1e-05, "loss": 0.5505, "step": 1442 }, { "epoch": 0.39936345395419637, "grad_norm": 0.1809111386537552, "learning_rate": 1e-05, "loss": 0.6006, "step": 1443 }, { "epoch": 0.3996402131045458, "grad_norm": 0.17395241558551788, "learning_rate": 1e-05, "loss": 0.5605, "step": 1444 }, { "epoch": 0.3999169722548952, "grad_norm": 0.1652216762304306, "learning_rate": 1e-05, "loss": 0.5636, "step": 1445 }, { "epoch": 0.4001937314052446, "grad_norm": 0.1815212368965149, "learning_rate": 1e-05, "loss": 0.5571, "step": 1446 }, { "epoch": 0.40047049055559397, "grad_norm": 0.17290183901786804, "learning_rate": 1e-05, "loss": 0.5882, "step": 1447 }, { "epoch": 0.4007472497059434, "grad_norm": 0.18300454318523407, "learning_rate": 1e-05, "loss": 0.5914, "step": 1448 }, { "epoch": 0.4010240088562928, "grad_norm": 0.18221522867679596, "learning_rate": 1e-05, "loss": 0.5753, "step": 1449 }, { "epoch": 0.4013007680066422, "grad_norm": 0.17019020020961761, "learning_rate": 1e-05, "loss": 0.5838, "step": 1450 }, { "epoch": 0.40157752715699163, "grad_norm": 0.17736484110355377, "learning_rate": 1e-05, "loss": 0.5668, "step": 1451 }, { "epoch": 0.40185428630734105, "grad_norm": 0.1821926385164261, "learning_rate": 1e-05, "loss": 0.5549, "step": 1452 }, { "epoch": 0.40213104545769046, "grad_norm": 0.17712363600730896, "learning_rate": 1e-05, "loss": 0.5523, "step": 1453 }, { "epoch": 0.4024078046080399, "grad_norm": 0.1849120855331421, "learning_rate": 1e-05, "loss": 0.5904, "step": 1454 }, { "epoch": 0.40268456375838924, "grad_norm": 0.17633987963199615, "learning_rate": 1e-05, "loss": 0.5704, "step": 1455 }, { "epoch": 0.40296132290873865, "grad_norm": 0.18164440989494324, "learning_rate": 1e-05, "loss": 0.6011, "step": 1456 }, { "epoch": 0.40323808205908807, "grad_norm": 0.17209288477897644, "learning_rate": 1e-05, "loss": 0.5583, "step": 1457 }, { "epoch": 0.4035148412094375, "grad_norm": 0.18488256633281708, "learning_rate": 1e-05, "loss": 0.5938, "step": 1458 }, { "epoch": 0.4037916003597869, "grad_norm": 0.183616042137146, "learning_rate": 1e-05, "loss": 0.5552, "step": 1459 }, { "epoch": 0.4040683595101363, "grad_norm": 0.17334163188934326, "learning_rate": 1e-05, "loss": 0.5826, "step": 1460 }, { "epoch": 0.4043451186604857, "grad_norm": 0.19412384927272797, "learning_rate": 1e-05, "loss": 0.5703, "step": 1461 }, { "epoch": 0.40462187781083514, "grad_norm": 0.17350444197654724, "learning_rate": 1e-05, "loss": 0.5475, "step": 1462 }, { "epoch": 0.40489863696118455, "grad_norm": 0.18170849978923798, "learning_rate": 1e-05, "loss": 0.5896, "step": 1463 }, { "epoch": 0.4051753961115339, "grad_norm": 0.18459850549697876, "learning_rate": 1e-05, "loss": 0.5593, "step": 1464 }, { "epoch": 0.40545215526188333, "grad_norm": 0.17216643691062927, "learning_rate": 1e-05, "loss": 0.5698, "step": 1465 }, { "epoch": 0.40572891441223274, "grad_norm": 0.17211702466011047, "learning_rate": 1e-05, "loss": 0.5489, "step": 1466 }, { "epoch": 0.40600567356258216, "grad_norm": 0.1699201613664627, "learning_rate": 1e-05, "loss": 0.5823, "step": 1467 }, { "epoch": 0.4062824327129316, "grad_norm": 0.19719649851322174, "learning_rate": 1e-05, "loss": 0.5653, "step": 1468 }, { "epoch": 0.406559191863281, "grad_norm": 0.1761341542005539, "learning_rate": 1e-05, "loss": 0.5742, "step": 1469 }, { "epoch": 0.4068359510136304, "grad_norm": 0.1781761348247528, "learning_rate": 1e-05, "loss": 0.5457, "step": 1470 }, { "epoch": 0.4071127101639798, "grad_norm": 0.1761397421360016, "learning_rate": 1e-05, "loss": 0.5416, "step": 1471 }, { "epoch": 0.4073894693143292, "grad_norm": 0.18230734765529633, "learning_rate": 1e-05, "loss": 0.5716, "step": 1472 }, { "epoch": 0.4076662284646786, "grad_norm": 0.17972059547901154, "learning_rate": 1e-05, "loss": 0.5605, "step": 1473 }, { "epoch": 0.407942987615028, "grad_norm": 0.17695370316505432, "learning_rate": 1e-05, "loss": 0.56, "step": 1474 }, { "epoch": 0.4082197467653774, "grad_norm": 0.18217091262340546, "learning_rate": 1e-05, "loss": 0.5714, "step": 1475 }, { "epoch": 0.40849650591572684, "grad_norm": 0.17418955266475677, "learning_rate": 1e-05, "loss": 0.5536, "step": 1476 }, { "epoch": 0.40877326506607625, "grad_norm": 0.18721500039100647, "learning_rate": 1e-05, "loss": 0.5918, "step": 1477 }, { "epoch": 0.40905002421642567, "grad_norm": 0.17565621435642242, "learning_rate": 1e-05, "loss": 0.5648, "step": 1478 }, { "epoch": 0.4093267833667751, "grad_norm": 0.17939890921115875, "learning_rate": 1e-05, "loss": 0.5539, "step": 1479 }, { "epoch": 0.4096035425171245, "grad_norm": 0.17622576653957367, "learning_rate": 1e-05, "loss": 0.5786, "step": 1480 }, { "epoch": 0.40988030166747386, "grad_norm": 0.17380963265895844, "learning_rate": 1e-05, "loss": 0.5415, "step": 1481 }, { "epoch": 0.41015706081782327, "grad_norm": 0.1826447695493698, "learning_rate": 1e-05, "loss": 0.578, "step": 1482 }, { "epoch": 0.4104338199681727, "grad_norm": 0.1762140542268753, "learning_rate": 1e-05, "loss": 0.5504, "step": 1483 }, { "epoch": 0.4107105791185221, "grad_norm": 0.17979860305786133, "learning_rate": 1e-05, "loss": 0.5516, "step": 1484 }, { "epoch": 0.4109873382688715, "grad_norm": 0.18510496616363525, "learning_rate": 1e-05, "loss": 0.5815, "step": 1485 }, { "epoch": 0.41126409741922093, "grad_norm": 0.18123029172420502, "learning_rate": 1e-05, "loss": 0.5704, "step": 1486 }, { "epoch": 0.41154085656957035, "grad_norm": 0.17769651114940643, "learning_rate": 1e-05, "loss": 0.5427, "step": 1487 }, { "epoch": 0.41181761571991976, "grad_norm": 0.18336544930934906, "learning_rate": 1e-05, "loss": 0.5661, "step": 1488 }, { "epoch": 0.4120943748702692, "grad_norm": 0.18079319596290588, "learning_rate": 1e-05, "loss": 0.5791, "step": 1489 }, { "epoch": 0.41237113402061853, "grad_norm": 0.17120859026908875, "learning_rate": 1e-05, "loss": 0.5727, "step": 1490 }, { "epoch": 0.41264789317096795, "grad_norm": 0.17295418679714203, "learning_rate": 1e-05, "loss": 0.5422, "step": 1491 }, { "epoch": 0.41292465232131736, "grad_norm": 0.199989452958107, "learning_rate": 1e-05, "loss": 0.5697, "step": 1492 }, { "epoch": 0.4132014114716668, "grad_norm": 0.1872192919254303, "learning_rate": 1e-05, "loss": 0.5718, "step": 1493 }, { "epoch": 0.4134781706220162, "grad_norm": 0.1743786334991455, "learning_rate": 1e-05, "loss": 0.5552, "step": 1494 }, { "epoch": 0.4137549297723656, "grad_norm": 0.1793673038482666, "learning_rate": 1e-05, "loss": 0.5694, "step": 1495 }, { "epoch": 0.414031688922715, "grad_norm": 0.18722805380821228, "learning_rate": 1e-05, "loss": 0.5788, "step": 1496 }, { "epoch": 0.41430844807306444, "grad_norm": 0.17903171479701996, "learning_rate": 1e-05, "loss": 0.5862, "step": 1497 }, { "epoch": 0.4145852072234138, "grad_norm": 0.18410106003284454, "learning_rate": 1e-05, "loss": 0.5594, "step": 1498 }, { "epoch": 0.4148619663737632, "grad_norm": 0.18749265372753143, "learning_rate": 1e-05, "loss": 0.5981, "step": 1499 }, { "epoch": 0.41513872552411263, "grad_norm": 0.17177854478359222, "learning_rate": 1e-05, "loss": 0.5439, "step": 1500 }, { "epoch": 0.41541548467446204, "grad_norm": 0.17542406916618347, "learning_rate": 1e-05, "loss": 0.5333, "step": 1501 }, { "epoch": 0.41569224382481146, "grad_norm": 0.17458660900592804, "learning_rate": 1e-05, "loss": 0.595, "step": 1502 }, { "epoch": 0.4159690029751609, "grad_norm": 0.18264062702655792, "learning_rate": 1e-05, "loss": 0.5564, "step": 1503 }, { "epoch": 0.4162457621255103, "grad_norm": 0.1735503375530243, "learning_rate": 1e-05, "loss": 0.5652, "step": 1504 }, { "epoch": 0.4165225212758597, "grad_norm": 0.1699366271495819, "learning_rate": 1e-05, "loss": 0.5596, "step": 1505 }, { "epoch": 0.4167992804262091, "grad_norm": 0.17590561509132385, "learning_rate": 1e-05, "loss": 0.5297, "step": 1506 }, { "epoch": 0.4170760395765585, "grad_norm": 0.17690883576869965, "learning_rate": 1e-05, "loss": 0.5702, "step": 1507 }, { "epoch": 0.4173527987269079, "grad_norm": 0.19152002036571503, "learning_rate": 1e-05, "loss": 0.6022, "step": 1508 }, { "epoch": 0.4176295578772573, "grad_norm": 0.1762019693851471, "learning_rate": 1e-05, "loss": 0.5386, "step": 1509 }, { "epoch": 0.4179063170276067, "grad_norm": 0.16524462401866913, "learning_rate": 1e-05, "loss": 0.5144, "step": 1510 }, { "epoch": 0.41818307617795614, "grad_norm": 0.1828121542930603, "learning_rate": 1e-05, "loss": 0.5669, "step": 1511 }, { "epoch": 0.41845983532830555, "grad_norm": 0.1735844910144806, "learning_rate": 1e-05, "loss": 0.5817, "step": 1512 }, { "epoch": 0.41873659447865497, "grad_norm": 0.1662319153547287, "learning_rate": 1e-05, "loss": 0.5487, "step": 1513 }, { "epoch": 0.4190133536290044, "grad_norm": 0.1689973622560501, "learning_rate": 1e-05, "loss": 0.5597, "step": 1514 }, { "epoch": 0.41929011277935374, "grad_norm": 0.17735256254673004, "learning_rate": 1e-05, "loss": 0.5384, "step": 1515 }, { "epoch": 0.41956687192970316, "grad_norm": 0.1735515594482422, "learning_rate": 1e-05, "loss": 0.5703, "step": 1516 }, { "epoch": 0.41984363108005257, "grad_norm": 0.17015324532985687, "learning_rate": 1e-05, "loss": 0.573, "step": 1517 }, { "epoch": 0.420120390230402, "grad_norm": 0.17541833221912384, "learning_rate": 1e-05, "loss": 0.5666, "step": 1518 }, { "epoch": 0.4203971493807514, "grad_norm": 0.17453157901763916, "learning_rate": 1e-05, "loss": 0.5521, "step": 1519 }, { "epoch": 0.4206739085311008, "grad_norm": 0.1721707284450531, "learning_rate": 1e-05, "loss": 0.5538, "step": 1520 }, { "epoch": 0.42095066768145023, "grad_norm": 0.17448361217975616, "learning_rate": 1e-05, "loss": 0.5814, "step": 1521 }, { "epoch": 0.42122742683179965, "grad_norm": 0.1724463254213333, "learning_rate": 1e-05, "loss": 0.576, "step": 1522 }, { "epoch": 0.42150418598214906, "grad_norm": 0.17132943868637085, "learning_rate": 1e-05, "loss": 0.5591, "step": 1523 }, { "epoch": 0.4217809451324984, "grad_norm": 0.17666466534137726, "learning_rate": 1e-05, "loss": 0.5694, "step": 1524 }, { "epoch": 0.42205770428284783, "grad_norm": 0.1733013242483139, "learning_rate": 1e-05, "loss": 0.5372, "step": 1525 }, { "epoch": 0.42233446343319725, "grad_norm": 0.17638695240020752, "learning_rate": 1e-05, "loss": 0.579, "step": 1526 }, { "epoch": 0.42261122258354666, "grad_norm": 0.17166326940059662, "learning_rate": 1e-05, "loss": 0.5619, "step": 1527 }, { "epoch": 0.4228879817338961, "grad_norm": 0.18457943201065063, "learning_rate": 1e-05, "loss": 0.5701, "step": 1528 }, { "epoch": 0.4231647408842455, "grad_norm": 0.1641901284456253, "learning_rate": 1e-05, "loss": 0.5463, "step": 1529 }, { "epoch": 0.4234415000345949, "grad_norm": 0.18234814703464508, "learning_rate": 1e-05, "loss": 0.5617, "step": 1530 }, { "epoch": 0.4237182591849443, "grad_norm": 0.1766400933265686, "learning_rate": 1e-05, "loss": 0.5595, "step": 1531 }, { "epoch": 0.4239950183352937, "grad_norm": 0.17225763201713562, "learning_rate": 1e-05, "loss": 0.543, "step": 1532 }, { "epoch": 0.4242717774856431, "grad_norm": 0.16967903077602386, "learning_rate": 1e-05, "loss": 0.5679, "step": 1533 }, { "epoch": 0.4245485366359925, "grad_norm": 0.17493566870689392, "learning_rate": 1e-05, "loss": 0.5619, "step": 1534 }, { "epoch": 0.4248252957863419, "grad_norm": 0.17991040647029877, "learning_rate": 1e-05, "loss": 0.5819, "step": 1535 }, { "epoch": 0.42510205493669134, "grad_norm": 0.17257475852966309, "learning_rate": 1e-05, "loss": 0.5659, "step": 1536 }, { "epoch": 0.42537881408704076, "grad_norm": 0.17742152512073517, "learning_rate": 1e-05, "loss": 0.5552, "step": 1537 }, { "epoch": 0.4256555732373902, "grad_norm": 0.18793608248233795, "learning_rate": 1e-05, "loss": 0.5828, "step": 1538 }, { "epoch": 0.4259323323877396, "grad_norm": 0.18162162601947784, "learning_rate": 1e-05, "loss": 0.5952, "step": 1539 }, { "epoch": 0.426209091538089, "grad_norm": 0.17116500437259674, "learning_rate": 1e-05, "loss": 0.5493, "step": 1540 }, { "epoch": 0.42648585068843836, "grad_norm": 0.1830659806728363, "learning_rate": 1e-05, "loss": 0.5633, "step": 1541 }, { "epoch": 0.4267626098387878, "grad_norm": 0.18065834045410156, "learning_rate": 1e-05, "loss": 0.5677, "step": 1542 }, { "epoch": 0.4270393689891372, "grad_norm": 0.16863247752189636, "learning_rate": 1e-05, "loss": 0.5636, "step": 1543 }, { "epoch": 0.4273161281394866, "grad_norm": 0.17335504293441772, "learning_rate": 1e-05, "loss": 0.5645, "step": 1544 }, { "epoch": 0.427592887289836, "grad_norm": 0.18362246453762054, "learning_rate": 1e-05, "loss": 0.5752, "step": 1545 }, { "epoch": 0.42786964644018544, "grad_norm": 0.1736215204000473, "learning_rate": 1e-05, "loss": 0.548, "step": 1546 }, { "epoch": 0.42814640559053485, "grad_norm": 0.17971207201480865, "learning_rate": 1e-05, "loss": 0.5722, "step": 1547 }, { "epoch": 0.42842316474088427, "grad_norm": 0.17062056064605713, "learning_rate": 1e-05, "loss": 0.5843, "step": 1548 }, { "epoch": 0.4286999238912337, "grad_norm": 0.18317000567913055, "learning_rate": 1e-05, "loss": 0.6002, "step": 1549 }, { "epoch": 0.42897668304158304, "grad_norm": 0.17899209260940552, "learning_rate": 1e-05, "loss": 0.5711, "step": 1550 }, { "epoch": 0.42925344219193245, "grad_norm": 0.1850893646478653, "learning_rate": 1e-05, "loss": 0.5831, "step": 1551 }, { "epoch": 0.42953020134228187, "grad_norm": 0.17363053560256958, "learning_rate": 1e-05, "loss": 0.5592, "step": 1552 }, { "epoch": 0.4298069604926313, "grad_norm": 0.16691017150878906, "learning_rate": 1e-05, "loss": 0.5676, "step": 1553 }, { "epoch": 0.4300837196429807, "grad_norm": 0.18741317093372345, "learning_rate": 1e-05, "loss": 0.5507, "step": 1554 }, { "epoch": 0.4303604787933301, "grad_norm": 0.18003351986408234, "learning_rate": 1e-05, "loss": 0.5567, "step": 1555 }, { "epoch": 0.43063723794367953, "grad_norm": 0.17219503223896027, "learning_rate": 1e-05, "loss": 0.5355, "step": 1556 }, { "epoch": 0.43091399709402894, "grad_norm": 0.18381766974925995, "learning_rate": 1e-05, "loss": 0.5693, "step": 1557 }, { "epoch": 0.4311907562443783, "grad_norm": 0.17005233466625214, "learning_rate": 1e-05, "loss": 0.5872, "step": 1558 }, { "epoch": 0.4314675153947277, "grad_norm": 0.17091569304466248, "learning_rate": 1e-05, "loss": 0.5838, "step": 1559 }, { "epoch": 0.43174427454507713, "grad_norm": 0.17087389528751373, "learning_rate": 1e-05, "loss": 0.5612, "step": 1560 }, { "epoch": 0.43202103369542655, "grad_norm": 0.1696418970823288, "learning_rate": 1e-05, "loss": 0.5561, "step": 1561 }, { "epoch": 0.43229779284577596, "grad_norm": 0.17325793206691742, "learning_rate": 1e-05, "loss": 0.5463, "step": 1562 }, { "epoch": 0.4325745519961254, "grad_norm": 0.18232277035713196, "learning_rate": 1e-05, "loss": 0.5936, "step": 1563 }, { "epoch": 0.4328513111464748, "grad_norm": 0.1728023886680603, "learning_rate": 1e-05, "loss": 0.5625, "step": 1564 }, { "epoch": 0.4331280702968242, "grad_norm": 0.17890408635139465, "learning_rate": 1e-05, "loss": 0.5552, "step": 1565 }, { "epoch": 0.4334048294471736, "grad_norm": 0.17302964627742767, "learning_rate": 1e-05, "loss": 0.5321, "step": 1566 }, { "epoch": 0.433681588597523, "grad_norm": 0.1773262321949005, "learning_rate": 1e-05, "loss": 0.5434, "step": 1567 }, { "epoch": 0.4339583477478724, "grad_norm": 0.17568877339363098, "learning_rate": 1e-05, "loss": 0.5626, "step": 1568 }, { "epoch": 0.4342351068982218, "grad_norm": 0.18341971933841705, "learning_rate": 1e-05, "loss": 0.5664, "step": 1569 }, { "epoch": 0.4345118660485712, "grad_norm": 0.17505501210689545, "learning_rate": 1e-05, "loss": 0.5385, "step": 1570 }, { "epoch": 0.43478862519892064, "grad_norm": 0.17028199136257172, "learning_rate": 1e-05, "loss": 0.5384, "step": 1571 }, { "epoch": 0.43506538434927006, "grad_norm": 0.1819106787443161, "learning_rate": 1e-05, "loss": 0.5635, "step": 1572 }, { "epoch": 0.43534214349961947, "grad_norm": 0.17087076604366302, "learning_rate": 1e-05, "loss": 0.5588, "step": 1573 }, { "epoch": 0.4356189026499689, "grad_norm": 0.18436890840530396, "learning_rate": 1e-05, "loss": 0.566, "step": 1574 }, { "epoch": 0.43589566180031825, "grad_norm": 0.18530385196208954, "learning_rate": 1e-05, "loss": 0.5709, "step": 1575 }, { "epoch": 0.43617242095066766, "grad_norm": 0.177824005484581, "learning_rate": 1e-05, "loss": 0.5622, "step": 1576 }, { "epoch": 0.4364491801010171, "grad_norm": 0.17693771421909332, "learning_rate": 1e-05, "loss": 0.5716, "step": 1577 }, { "epoch": 0.4367259392513665, "grad_norm": 0.177291601896286, "learning_rate": 1e-05, "loss": 0.604, "step": 1578 }, { "epoch": 0.4370026984017159, "grad_norm": 0.19050122797489166, "learning_rate": 1e-05, "loss": 0.5648, "step": 1579 }, { "epoch": 0.4372794575520653, "grad_norm": 0.1804661601781845, "learning_rate": 1e-05, "loss": 0.5404, "step": 1580 }, { "epoch": 0.43755621670241474, "grad_norm": 0.17990168929100037, "learning_rate": 1e-05, "loss": 0.5729, "step": 1581 }, { "epoch": 0.43783297585276415, "grad_norm": 0.1790662556886673, "learning_rate": 1e-05, "loss": 0.5517, "step": 1582 }, { "epoch": 0.43810973500311357, "grad_norm": 0.1695900410413742, "learning_rate": 1e-05, "loss": 0.5786, "step": 1583 }, { "epoch": 0.4383864941534629, "grad_norm": 0.171067014336586, "learning_rate": 1e-05, "loss": 0.5759, "step": 1584 }, { "epoch": 0.43866325330381234, "grad_norm": 0.1766362488269806, "learning_rate": 1e-05, "loss": 0.5702, "step": 1585 }, { "epoch": 0.43894001245416175, "grad_norm": 0.17509467899799347, "learning_rate": 1e-05, "loss": 0.5508, "step": 1586 }, { "epoch": 0.43921677160451117, "grad_norm": 0.17684060335159302, "learning_rate": 1e-05, "loss": 0.5863, "step": 1587 }, { "epoch": 0.4394935307548606, "grad_norm": 0.18564896285533905, "learning_rate": 1e-05, "loss": 0.544, "step": 1588 }, { "epoch": 0.43977028990521, "grad_norm": 0.17649570107460022, "learning_rate": 1e-05, "loss": 0.5503, "step": 1589 }, { "epoch": 0.4400470490555594, "grad_norm": 0.1740492880344391, "learning_rate": 1e-05, "loss": 0.5792, "step": 1590 }, { "epoch": 0.44032380820590883, "grad_norm": 0.1766560822725296, "learning_rate": 1e-05, "loss": 0.5726, "step": 1591 }, { "epoch": 0.44060056735625824, "grad_norm": 0.18109986186027527, "learning_rate": 1e-05, "loss": 0.5648, "step": 1592 }, { "epoch": 0.4408773265066076, "grad_norm": 0.1766422837972641, "learning_rate": 1e-05, "loss": 0.5634, "step": 1593 }, { "epoch": 0.441154085656957, "grad_norm": 0.17996059358119965, "learning_rate": 1e-05, "loss": 0.5516, "step": 1594 }, { "epoch": 0.44143084480730643, "grad_norm": 0.16909009218215942, "learning_rate": 1e-05, "loss": 0.5582, "step": 1595 }, { "epoch": 0.44170760395765585, "grad_norm": 0.17388372123241425, "learning_rate": 1e-05, "loss": 0.545, "step": 1596 }, { "epoch": 0.44198436310800526, "grad_norm": 0.16665171086788177, "learning_rate": 1e-05, "loss": 0.5588, "step": 1597 }, { "epoch": 0.4422611222583547, "grad_norm": 0.17686384916305542, "learning_rate": 1e-05, "loss": 0.5726, "step": 1598 }, { "epoch": 0.4425378814087041, "grad_norm": 0.17736327648162842, "learning_rate": 1e-05, "loss": 0.5669, "step": 1599 }, { "epoch": 0.4428146405590535, "grad_norm": 0.1747191697359085, "learning_rate": 1e-05, "loss": 0.546, "step": 1600 }, { "epoch": 0.44309139970940287, "grad_norm": 0.1697254478931427, "learning_rate": 1e-05, "loss": 0.5414, "step": 1601 }, { "epoch": 0.4433681588597523, "grad_norm": 0.1800551563501358, "learning_rate": 1e-05, "loss": 0.6088, "step": 1602 }, { "epoch": 0.4436449180101017, "grad_norm": 0.17082028090953827, "learning_rate": 1e-05, "loss": 0.565, "step": 1603 }, { "epoch": 0.4439216771604511, "grad_norm": 0.1737051159143448, "learning_rate": 1e-05, "loss": 0.5652, "step": 1604 }, { "epoch": 0.4441984363108005, "grad_norm": 0.1835697591304779, "learning_rate": 1e-05, "loss": 0.5766, "step": 1605 }, { "epoch": 0.44447519546114994, "grad_norm": 0.18363744020462036, "learning_rate": 1e-05, "loss": 0.5539, "step": 1606 }, { "epoch": 0.44475195461149936, "grad_norm": 0.17289447784423828, "learning_rate": 1e-05, "loss": 0.5784, "step": 1607 }, { "epoch": 0.44502871376184877, "grad_norm": 0.17156517505645752, "learning_rate": 1e-05, "loss": 0.5722, "step": 1608 }, { "epoch": 0.4453054729121982, "grad_norm": 0.17524364590644836, "learning_rate": 1e-05, "loss": 0.5666, "step": 1609 }, { "epoch": 0.44558223206254755, "grad_norm": 0.1809045374393463, "learning_rate": 1e-05, "loss": 0.59, "step": 1610 }, { "epoch": 0.44585899121289696, "grad_norm": 0.16601110994815826, "learning_rate": 1e-05, "loss": 0.5286, "step": 1611 }, { "epoch": 0.4461357503632464, "grad_norm": 0.18372170627117157, "learning_rate": 1e-05, "loss": 0.5956, "step": 1612 }, { "epoch": 0.4464125095135958, "grad_norm": 0.1851986199617386, "learning_rate": 1e-05, "loss": 0.5569, "step": 1613 }, { "epoch": 0.4466892686639452, "grad_norm": 0.18070034682750702, "learning_rate": 1e-05, "loss": 0.5739, "step": 1614 }, { "epoch": 0.4469660278142946, "grad_norm": 0.1725083738565445, "learning_rate": 1e-05, "loss": 0.539, "step": 1615 }, { "epoch": 0.44724278696464403, "grad_norm": 0.16947266459465027, "learning_rate": 1e-05, "loss": 0.5637, "step": 1616 }, { "epoch": 0.44751954611499345, "grad_norm": 0.17844559252262115, "learning_rate": 1e-05, "loss": 0.5586, "step": 1617 }, { "epoch": 0.4477963052653428, "grad_norm": 0.17910727858543396, "learning_rate": 1e-05, "loss": 0.5793, "step": 1618 }, { "epoch": 0.4480730644156922, "grad_norm": 0.1780283898115158, "learning_rate": 1e-05, "loss": 0.5813, "step": 1619 }, { "epoch": 0.44834982356604164, "grad_norm": 0.18091094493865967, "learning_rate": 1e-05, "loss": 0.5681, "step": 1620 }, { "epoch": 0.44862658271639105, "grad_norm": 0.1857171356678009, "learning_rate": 1e-05, "loss": 0.5833, "step": 1621 }, { "epoch": 0.44890334186674047, "grad_norm": 0.18733692169189453, "learning_rate": 1e-05, "loss": 0.5805, "step": 1622 }, { "epoch": 0.4491801010170899, "grad_norm": 0.17078281939029694, "learning_rate": 1e-05, "loss": 0.5751, "step": 1623 }, { "epoch": 0.4494568601674393, "grad_norm": 0.17413294315338135, "learning_rate": 1e-05, "loss": 0.5553, "step": 1624 }, { "epoch": 0.4497336193177887, "grad_norm": 0.18706223368644714, "learning_rate": 1e-05, "loss": 0.5664, "step": 1625 }, { "epoch": 0.45001037846813813, "grad_norm": 0.16952700912952423, "learning_rate": 1e-05, "loss": 0.5677, "step": 1626 }, { "epoch": 0.4502871376184875, "grad_norm": 0.18147064745426178, "learning_rate": 1e-05, "loss": 0.5714, "step": 1627 }, { "epoch": 0.4505638967688369, "grad_norm": 0.18001917004585266, "learning_rate": 1e-05, "loss": 0.565, "step": 1628 }, { "epoch": 0.4508406559191863, "grad_norm": 0.18313783407211304, "learning_rate": 1e-05, "loss": 0.5857, "step": 1629 }, { "epoch": 0.45111741506953573, "grad_norm": 0.16735799610614777, "learning_rate": 1e-05, "loss": 0.5746, "step": 1630 }, { "epoch": 0.45139417421988515, "grad_norm": 0.17053896188735962, "learning_rate": 1e-05, "loss": 0.5548, "step": 1631 }, { "epoch": 0.45167093337023456, "grad_norm": 0.2094276249408722, "learning_rate": 1e-05, "loss": 0.589, "step": 1632 }, { "epoch": 0.451947692520584, "grad_norm": 0.16227442026138306, "learning_rate": 1e-05, "loss": 0.5422, "step": 1633 }, { "epoch": 0.4522244516709334, "grad_norm": 0.18151967227458954, "learning_rate": 1e-05, "loss": 0.5871, "step": 1634 }, { "epoch": 0.45250121082128275, "grad_norm": 0.18395259976387024, "learning_rate": 1e-05, "loss": 0.5999, "step": 1635 }, { "epoch": 0.45277796997163217, "grad_norm": 0.17296218872070312, "learning_rate": 1e-05, "loss": 0.5626, "step": 1636 }, { "epoch": 0.4530547291219816, "grad_norm": 0.1770433783531189, "learning_rate": 1e-05, "loss": 0.5567, "step": 1637 }, { "epoch": 0.453331488272331, "grad_norm": 0.1718888133764267, "learning_rate": 1e-05, "loss": 0.5767, "step": 1638 }, { "epoch": 0.4536082474226804, "grad_norm": 0.17602650821208954, "learning_rate": 1e-05, "loss": 0.5943, "step": 1639 }, { "epoch": 0.4538850065730298, "grad_norm": 0.17516137659549713, "learning_rate": 1e-05, "loss": 0.5614, "step": 1640 }, { "epoch": 0.45416176572337924, "grad_norm": 0.1802394688129425, "learning_rate": 1e-05, "loss": 0.5605, "step": 1641 }, { "epoch": 0.45443852487372866, "grad_norm": 0.17672830820083618, "learning_rate": 1e-05, "loss": 0.5573, "step": 1642 }, { "epoch": 0.45471528402407807, "grad_norm": 0.17523744702339172, "learning_rate": 1e-05, "loss": 0.5292, "step": 1643 }, { "epoch": 0.45499204317442743, "grad_norm": 0.17210376262664795, "learning_rate": 1e-05, "loss": 0.5335, "step": 1644 }, { "epoch": 0.45526880232477684, "grad_norm": 0.19019120931625366, "learning_rate": 1e-05, "loss": 0.556, "step": 1645 }, { "epoch": 0.45554556147512626, "grad_norm": 0.18290413916110992, "learning_rate": 1e-05, "loss": 0.5278, "step": 1646 }, { "epoch": 0.4558223206254757, "grad_norm": 0.18073764443397522, "learning_rate": 1e-05, "loss": 0.5915, "step": 1647 }, { "epoch": 0.4560990797758251, "grad_norm": 0.17911827564239502, "learning_rate": 1e-05, "loss": 0.531, "step": 1648 }, { "epoch": 0.4563758389261745, "grad_norm": 0.17174343764781952, "learning_rate": 1e-05, "loss": 0.5623, "step": 1649 }, { "epoch": 0.4566525980765239, "grad_norm": 0.18547669053077698, "learning_rate": 1e-05, "loss": 0.5975, "step": 1650 }, { "epoch": 0.45692935722687333, "grad_norm": 0.18203768134117126, "learning_rate": 1e-05, "loss": 0.5608, "step": 1651 }, { "epoch": 0.45720611637722275, "grad_norm": 0.1765459179878235, "learning_rate": 1e-05, "loss": 0.5747, "step": 1652 }, { "epoch": 0.4574828755275721, "grad_norm": 0.17241889238357544, "learning_rate": 1e-05, "loss": 0.553, "step": 1653 }, { "epoch": 0.4577596346779215, "grad_norm": 0.18017670512199402, "learning_rate": 1e-05, "loss": 0.5484, "step": 1654 }, { "epoch": 0.45803639382827094, "grad_norm": 0.1846148669719696, "learning_rate": 1e-05, "loss": 0.5816, "step": 1655 }, { "epoch": 0.45831315297862035, "grad_norm": 0.19108623266220093, "learning_rate": 1e-05, "loss": 0.5606, "step": 1656 }, { "epoch": 0.45858991212896977, "grad_norm": 0.18224917352199554, "learning_rate": 1e-05, "loss": 0.5443, "step": 1657 }, { "epoch": 0.4588666712793192, "grad_norm": 0.1721319705247879, "learning_rate": 1e-05, "loss": 0.5519, "step": 1658 }, { "epoch": 0.4591434304296686, "grad_norm": 0.17901445925235748, "learning_rate": 1e-05, "loss": 0.528, "step": 1659 }, { "epoch": 0.459420189580018, "grad_norm": 0.1825544536113739, "learning_rate": 1e-05, "loss": 0.6119, "step": 1660 }, { "epoch": 0.45969694873036737, "grad_norm": 0.18003007769584656, "learning_rate": 1e-05, "loss": 0.5515, "step": 1661 }, { "epoch": 0.4599737078807168, "grad_norm": 0.17733997106552124, "learning_rate": 1e-05, "loss": 0.5449, "step": 1662 }, { "epoch": 0.4602504670310662, "grad_norm": 0.178132563829422, "learning_rate": 1e-05, "loss": 0.5871, "step": 1663 }, { "epoch": 0.4605272261814156, "grad_norm": 0.17589201033115387, "learning_rate": 1e-05, "loss": 0.5826, "step": 1664 }, { "epoch": 0.46080398533176503, "grad_norm": 0.18389558792114258, "learning_rate": 1e-05, "loss": 0.5441, "step": 1665 }, { "epoch": 0.46108074448211445, "grad_norm": 0.17437689006328583, "learning_rate": 1e-05, "loss": 0.5397, "step": 1666 }, { "epoch": 0.46135750363246386, "grad_norm": 0.18122035264968872, "learning_rate": 1e-05, "loss": 0.551, "step": 1667 }, { "epoch": 0.4616342627828133, "grad_norm": 0.17138026654720306, "learning_rate": 1e-05, "loss": 0.5568, "step": 1668 }, { "epoch": 0.4619110219331627, "grad_norm": 0.18272562325000763, "learning_rate": 1e-05, "loss": 0.6013, "step": 1669 }, { "epoch": 0.46218778108351205, "grad_norm": 0.1831650286912918, "learning_rate": 1e-05, "loss": 0.5626, "step": 1670 }, { "epoch": 0.46246454023386147, "grad_norm": 0.17071384191513062, "learning_rate": 1e-05, "loss": 0.5503, "step": 1671 }, { "epoch": 0.4627412993842109, "grad_norm": 0.1811319887638092, "learning_rate": 1e-05, "loss": 0.586, "step": 1672 }, { "epoch": 0.4630180585345603, "grad_norm": 0.17684495449066162, "learning_rate": 1e-05, "loss": 0.563, "step": 1673 }, { "epoch": 0.4632948176849097, "grad_norm": 0.17563365399837494, "learning_rate": 1e-05, "loss": 0.5463, "step": 1674 }, { "epoch": 0.4635715768352591, "grad_norm": 0.17373308539390564, "learning_rate": 1e-05, "loss": 0.5647, "step": 1675 }, { "epoch": 0.46384833598560854, "grad_norm": 0.17113706469535828, "learning_rate": 1e-05, "loss": 0.5506, "step": 1676 }, { "epoch": 0.46412509513595795, "grad_norm": 0.17056848108768463, "learning_rate": 1e-05, "loss": 0.5641, "step": 1677 }, { "epoch": 0.4644018542863073, "grad_norm": 0.17647698521614075, "learning_rate": 1e-05, "loss": 0.5888, "step": 1678 }, { "epoch": 0.46467861343665673, "grad_norm": 0.1715966910123825, "learning_rate": 1e-05, "loss": 0.5351, "step": 1679 }, { "epoch": 0.46495537258700614, "grad_norm": 0.17820271849632263, "learning_rate": 1e-05, "loss": 0.5498, "step": 1680 }, { "epoch": 0.46523213173735556, "grad_norm": 0.17722539603710175, "learning_rate": 1e-05, "loss": 0.5643, "step": 1681 }, { "epoch": 0.465508890887705, "grad_norm": 0.1735992729663849, "learning_rate": 1e-05, "loss": 0.5679, "step": 1682 }, { "epoch": 0.4657856500380544, "grad_norm": 0.17254845798015594, "learning_rate": 1e-05, "loss": 0.564, "step": 1683 }, { "epoch": 0.4660624091884038, "grad_norm": 0.18572454154491425, "learning_rate": 1e-05, "loss": 0.5981, "step": 1684 }, { "epoch": 0.4663391683387532, "grad_norm": 0.17588786780834198, "learning_rate": 1e-05, "loss": 0.5655, "step": 1685 }, { "epoch": 0.46661592748910263, "grad_norm": 0.16890764236450195, "learning_rate": 1e-05, "loss": 0.5634, "step": 1686 }, { "epoch": 0.466892686639452, "grad_norm": 0.17033745348453522, "learning_rate": 1e-05, "loss": 0.5438, "step": 1687 }, { "epoch": 0.4671694457898014, "grad_norm": 0.1663769632577896, "learning_rate": 1e-05, "loss": 0.5454, "step": 1688 }, { "epoch": 0.4674462049401508, "grad_norm": 0.18532128632068634, "learning_rate": 1e-05, "loss": 0.5703, "step": 1689 }, { "epoch": 0.46772296409050024, "grad_norm": 0.17460991442203522, "learning_rate": 1e-05, "loss": 0.5656, "step": 1690 }, { "epoch": 0.46799972324084965, "grad_norm": 0.1711597442626953, "learning_rate": 1e-05, "loss": 0.5689, "step": 1691 }, { "epoch": 0.46827648239119907, "grad_norm": 0.16234120726585388, "learning_rate": 1e-05, "loss": 0.5468, "step": 1692 }, { "epoch": 0.4685532415415485, "grad_norm": 0.17352654039859772, "learning_rate": 1e-05, "loss": 0.5438, "step": 1693 }, { "epoch": 0.4688300006918979, "grad_norm": 0.17356550693511963, "learning_rate": 1e-05, "loss": 0.5762, "step": 1694 }, { "epoch": 0.4691067598422473, "grad_norm": 0.17950424551963806, "learning_rate": 1e-05, "loss": 0.5684, "step": 1695 }, { "epoch": 0.46938351899259667, "grad_norm": 0.176878422498703, "learning_rate": 1e-05, "loss": 0.532, "step": 1696 }, { "epoch": 0.4696602781429461, "grad_norm": 0.17660969495773315, "learning_rate": 1e-05, "loss": 0.5692, "step": 1697 }, { "epoch": 0.4699370372932955, "grad_norm": 0.17055952548980713, "learning_rate": 1e-05, "loss": 0.5616, "step": 1698 }, { "epoch": 0.4702137964436449, "grad_norm": 0.16822731494903564, "learning_rate": 1e-05, "loss": 0.5462, "step": 1699 }, { "epoch": 0.47049055559399433, "grad_norm": 0.16797775030136108, "learning_rate": 1e-05, "loss": 0.5851, "step": 1700 }, { "epoch": 0.47076731474434375, "grad_norm": 0.17527812719345093, "learning_rate": 1e-05, "loss": 0.5455, "step": 1701 }, { "epoch": 0.47104407389469316, "grad_norm": 0.17318303883075714, "learning_rate": 1e-05, "loss": 0.5435, "step": 1702 }, { "epoch": 0.4713208330450426, "grad_norm": 0.17506814002990723, "learning_rate": 1e-05, "loss": 0.5725, "step": 1703 }, { "epoch": 0.47159759219539193, "grad_norm": 0.16666057705879211, "learning_rate": 1e-05, "loss": 0.5544, "step": 1704 }, { "epoch": 0.47187435134574135, "grad_norm": 0.18437130749225616, "learning_rate": 1e-05, "loss": 0.5806, "step": 1705 }, { "epoch": 0.47215111049609076, "grad_norm": 0.17093202471733093, "learning_rate": 1e-05, "loss": 0.5498, "step": 1706 }, { "epoch": 0.4724278696464402, "grad_norm": 0.16893672943115234, "learning_rate": 1e-05, "loss": 0.5476, "step": 1707 }, { "epoch": 0.4727046287967896, "grad_norm": 0.17855793237686157, "learning_rate": 1e-05, "loss": 0.5609, "step": 1708 }, { "epoch": 0.472981387947139, "grad_norm": 0.18759794533252716, "learning_rate": 1e-05, "loss": 0.559, "step": 1709 }, { "epoch": 0.4732581470974884, "grad_norm": 0.17377862334251404, "learning_rate": 1e-05, "loss": 0.5651, "step": 1710 }, { "epoch": 0.47353490624783784, "grad_norm": 0.17234604060649872, "learning_rate": 1e-05, "loss": 0.5426, "step": 1711 }, { "epoch": 0.47381166539818725, "grad_norm": 0.17712464928627014, "learning_rate": 1e-05, "loss": 0.5555, "step": 1712 }, { "epoch": 0.4740884245485366, "grad_norm": 0.176315575838089, "learning_rate": 1e-05, "loss": 0.5826, "step": 1713 }, { "epoch": 0.47436518369888603, "grad_norm": 0.17369136214256287, "learning_rate": 1e-05, "loss": 0.586, "step": 1714 }, { "epoch": 0.47464194284923544, "grad_norm": 0.17688927054405212, "learning_rate": 1e-05, "loss": 0.564, "step": 1715 }, { "epoch": 0.47491870199958486, "grad_norm": 0.1772143393754959, "learning_rate": 1e-05, "loss": 0.5352, "step": 1716 }, { "epoch": 0.4751954611499343, "grad_norm": 0.17640675604343414, "learning_rate": 1e-05, "loss": 0.5381, "step": 1717 }, { "epoch": 0.4754722203002837, "grad_norm": 0.17039494216442108, "learning_rate": 1e-05, "loss": 0.5569, "step": 1718 }, { "epoch": 0.4757489794506331, "grad_norm": 0.167277991771698, "learning_rate": 1e-05, "loss": 0.5545, "step": 1719 }, { "epoch": 0.4760257386009825, "grad_norm": 0.17274589836597443, "learning_rate": 1e-05, "loss": 0.5551, "step": 1720 }, { "epoch": 0.4763024977513319, "grad_norm": 0.17705455422401428, "learning_rate": 1e-05, "loss": 0.5802, "step": 1721 }, { "epoch": 0.4765792569016813, "grad_norm": 0.1730460822582245, "learning_rate": 1e-05, "loss": 0.556, "step": 1722 }, { "epoch": 0.4768560160520307, "grad_norm": 0.1792718470096588, "learning_rate": 1e-05, "loss": 0.5743, "step": 1723 }, { "epoch": 0.4771327752023801, "grad_norm": 0.17061491310596466, "learning_rate": 1e-05, "loss": 0.5455, "step": 1724 }, { "epoch": 0.47740953435272954, "grad_norm": 0.17682379484176636, "learning_rate": 1e-05, "loss": 0.5695, "step": 1725 }, { "epoch": 0.47768629350307895, "grad_norm": 0.17289216816425323, "learning_rate": 1e-05, "loss": 0.5596, "step": 1726 }, { "epoch": 0.47796305265342837, "grad_norm": 0.17211297154426575, "learning_rate": 1e-05, "loss": 0.5856, "step": 1727 }, { "epoch": 0.4782398118037778, "grad_norm": 0.16927409172058105, "learning_rate": 1e-05, "loss": 0.5553, "step": 1728 }, { "epoch": 0.4785165709541272, "grad_norm": 0.18740873038768768, "learning_rate": 1e-05, "loss": 0.5592, "step": 1729 }, { "epoch": 0.47879333010447656, "grad_norm": 0.17538155615329742, "learning_rate": 1e-05, "loss": 0.5462, "step": 1730 }, { "epoch": 0.47907008925482597, "grad_norm": 0.17656968533992767, "learning_rate": 1e-05, "loss": 0.5723, "step": 1731 }, { "epoch": 0.4793468484051754, "grad_norm": 0.1741635799407959, "learning_rate": 1e-05, "loss": 0.535, "step": 1732 }, { "epoch": 0.4796236075555248, "grad_norm": 0.17929168045520782, "learning_rate": 1e-05, "loss": 0.554, "step": 1733 }, { "epoch": 0.4799003667058742, "grad_norm": 0.19106587767601013, "learning_rate": 1e-05, "loss": 0.5712, "step": 1734 }, { "epoch": 0.48017712585622363, "grad_norm": 0.18078120052814484, "learning_rate": 1e-05, "loss": 0.5731, "step": 1735 }, { "epoch": 0.48045388500657304, "grad_norm": 0.17141808569431305, "learning_rate": 1e-05, "loss": 0.5313, "step": 1736 }, { "epoch": 0.48073064415692246, "grad_norm": 0.1742687076330185, "learning_rate": 1e-05, "loss": 0.5326, "step": 1737 }, { "epoch": 0.4810074033072718, "grad_norm": 0.16922685503959656, "learning_rate": 1e-05, "loss": 0.5599, "step": 1738 }, { "epoch": 0.48128416245762123, "grad_norm": 0.1708962768316269, "learning_rate": 1e-05, "loss": 0.5624, "step": 1739 }, { "epoch": 0.48156092160797065, "grad_norm": 0.17670322954654694, "learning_rate": 1e-05, "loss": 0.5397, "step": 1740 }, { "epoch": 0.48183768075832006, "grad_norm": 0.16551335155963898, "learning_rate": 1e-05, "loss": 0.5499, "step": 1741 }, { "epoch": 0.4821144399086695, "grad_norm": 0.17189240455627441, "learning_rate": 1e-05, "loss": 0.5597, "step": 1742 }, { "epoch": 0.4823911990590189, "grad_norm": 0.18817895650863647, "learning_rate": 1e-05, "loss": 0.5837, "step": 1743 }, { "epoch": 0.4826679582093683, "grad_norm": 0.18372739851474762, "learning_rate": 1e-05, "loss": 0.5697, "step": 1744 }, { "epoch": 0.4829447173597177, "grad_norm": 0.18520177900791168, "learning_rate": 1e-05, "loss": 0.5693, "step": 1745 }, { "epoch": 0.48322147651006714, "grad_norm": 0.18310102820396423, "learning_rate": 1e-05, "loss": 0.585, "step": 1746 }, { "epoch": 0.4834982356604165, "grad_norm": 0.21439889073371887, "learning_rate": 1e-05, "loss": 0.5891, "step": 1747 }, { "epoch": 0.4837749948107659, "grad_norm": 0.1906304955482483, "learning_rate": 1e-05, "loss": 0.5705, "step": 1748 }, { "epoch": 0.4840517539611153, "grad_norm": 0.1663013994693756, "learning_rate": 1e-05, "loss": 0.5579, "step": 1749 }, { "epoch": 0.48432851311146474, "grad_norm": 0.1810798943042755, "learning_rate": 1e-05, "loss": 0.5649, "step": 1750 }, { "epoch": 0.48460527226181416, "grad_norm": 0.1725279688835144, "learning_rate": 1e-05, "loss": 0.534, "step": 1751 }, { "epoch": 0.48488203141216357, "grad_norm": 0.17518499493598938, "learning_rate": 1e-05, "loss": 0.5466, "step": 1752 }, { "epoch": 0.485158790562513, "grad_norm": 0.1836475282907486, "learning_rate": 1e-05, "loss": 0.5764, "step": 1753 }, { "epoch": 0.4854355497128624, "grad_norm": 0.16887721419334412, "learning_rate": 1e-05, "loss": 0.5667, "step": 1754 }, { "epoch": 0.4857123088632118, "grad_norm": 0.1890583634376526, "learning_rate": 1e-05, "loss": 0.5573, "step": 1755 }, { "epoch": 0.4859890680135612, "grad_norm": 0.18042930960655212, "learning_rate": 1e-05, "loss": 0.5546, "step": 1756 }, { "epoch": 0.4862658271639106, "grad_norm": 0.17781801521778107, "learning_rate": 1e-05, "loss": 0.5534, "step": 1757 }, { "epoch": 0.48654258631426, "grad_norm": 0.17853102087974548, "learning_rate": 1e-05, "loss": 0.5676, "step": 1758 }, { "epoch": 0.4868193454646094, "grad_norm": 0.17295239865779877, "learning_rate": 1e-05, "loss": 0.5533, "step": 1759 }, { "epoch": 0.48709610461495884, "grad_norm": 0.17624245584011078, "learning_rate": 1e-05, "loss": 0.5534, "step": 1760 }, { "epoch": 0.48737286376530825, "grad_norm": 0.18068017065525055, "learning_rate": 1e-05, "loss": 0.5581, "step": 1761 }, { "epoch": 0.48764962291565767, "grad_norm": 0.20220975577831268, "learning_rate": 1e-05, "loss": 0.5983, "step": 1762 }, { "epoch": 0.4879263820660071, "grad_norm": 0.16645140945911407, "learning_rate": 1e-05, "loss": 0.5486, "step": 1763 }, { "epoch": 0.48820314121635644, "grad_norm": 0.1697653830051422, "learning_rate": 1e-05, "loss": 0.5573, "step": 1764 }, { "epoch": 0.48847990036670585, "grad_norm": 0.1643758863210678, "learning_rate": 1e-05, "loss": 0.5606, "step": 1765 }, { "epoch": 0.48875665951705527, "grad_norm": 0.17711082100868225, "learning_rate": 1e-05, "loss": 0.5505, "step": 1766 }, { "epoch": 0.4890334186674047, "grad_norm": 0.17717361450195312, "learning_rate": 1e-05, "loss": 0.5565, "step": 1767 }, { "epoch": 0.4893101778177541, "grad_norm": 0.18102526664733887, "learning_rate": 1e-05, "loss": 0.5772, "step": 1768 }, { "epoch": 0.4895869369681035, "grad_norm": 0.1766207218170166, "learning_rate": 1e-05, "loss": 0.5522, "step": 1769 }, { "epoch": 0.48986369611845293, "grad_norm": 0.18471917510032654, "learning_rate": 1e-05, "loss": 0.5351, "step": 1770 }, { "epoch": 0.49014045526880234, "grad_norm": 0.1768186092376709, "learning_rate": 1e-05, "loss": 0.5623, "step": 1771 }, { "epoch": 0.49041721441915176, "grad_norm": 0.16356885433197021, "learning_rate": 1e-05, "loss": 0.5637, "step": 1772 }, { "epoch": 0.4906939735695011, "grad_norm": 0.17159558832645416, "learning_rate": 1e-05, "loss": 0.5743, "step": 1773 }, { "epoch": 0.49097073271985053, "grad_norm": 0.17534011602401733, "learning_rate": 1e-05, "loss": 0.5845, "step": 1774 }, { "epoch": 0.49124749187019995, "grad_norm": 0.17268924415111542, "learning_rate": 1e-05, "loss": 0.5431, "step": 1775 }, { "epoch": 0.49152425102054936, "grad_norm": 0.1761191040277481, "learning_rate": 1e-05, "loss": 0.5499, "step": 1776 }, { "epoch": 0.4918010101708988, "grad_norm": 0.1611316204071045, "learning_rate": 1e-05, "loss": 0.5264, "step": 1777 }, { "epoch": 0.4920777693212482, "grad_norm": 0.17312142252922058, "learning_rate": 1e-05, "loss": 0.5619, "step": 1778 }, { "epoch": 0.4923545284715976, "grad_norm": 0.1729031503200531, "learning_rate": 1e-05, "loss": 0.5898, "step": 1779 }, { "epoch": 0.492631287621947, "grad_norm": 0.1861211657524109, "learning_rate": 1e-05, "loss": 0.5907, "step": 1780 }, { "epoch": 0.4929080467722964, "grad_norm": 0.16766397655010223, "learning_rate": 1e-05, "loss": 0.5438, "step": 1781 }, { "epoch": 0.4931848059226458, "grad_norm": 0.17010802030563354, "learning_rate": 1e-05, "loss": 0.5468, "step": 1782 }, { "epoch": 0.4934615650729952, "grad_norm": 0.18427087366580963, "learning_rate": 1e-05, "loss": 0.5961, "step": 1783 }, { "epoch": 0.4937383242233446, "grad_norm": 0.18200050294399261, "learning_rate": 1e-05, "loss": 0.5626, "step": 1784 }, { "epoch": 0.49401508337369404, "grad_norm": 0.16962210834026337, "learning_rate": 1e-05, "loss": 0.5587, "step": 1785 }, { "epoch": 0.49429184252404346, "grad_norm": 0.17338399589061737, "learning_rate": 1e-05, "loss": 0.5556, "step": 1786 }, { "epoch": 0.49456860167439287, "grad_norm": 0.17211543023586273, "learning_rate": 1e-05, "loss": 0.5574, "step": 1787 }, { "epoch": 0.4948453608247423, "grad_norm": 0.18037846684455872, "learning_rate": 1e-05, "loss": 0.5556, "step": 1788 }, { "epoch": 0.4951221199750917, "grad_norm": 0.17706286907196045, "learning_rate": 1e-05, "loss": 0.5619, "step": 1789 }, { "epoch": 0.49539887912544106, "grad_norm": 0.1762688308954239, "learning_rate": 1e-05, "loss": 0.5726, "step": 1790 }, { "epoch": 0.4956756382757905, "grad_norm": 0.16670124232769012, "learning_rate": 1e-05, "loss": 0.5649, "step": 1791 }, { "epoch": 0.4959523974261399, "grad_norm": 0.17779873311519623, "learning_rate": 1e-05, "loss": 0.5529, "step": 1792 }, { "epoch": 0.4962291565764893, "grad_norm": 0.17980870604515076, "learning_rate": 1e-05, "loss": 0.5822, "step": 1793 }, { "epoch": 0.4965059157268387, "grad_norm": 0.1710050404071808, "learning_rate": 1e-05, "loss": 0.6137, "step": 1794 }, { "epoch": 0.49678267487718814, "grad_norm": 0.18259289860725403, "learning_rate": 1e-05, "loss": 0.594, "step": 1795 }, { "epoch": 0.49705943402753755, "grad_norm": 0.1778843253850937, "learning_rate": 1e-05, "loss": 0.5787, "step": 1796 }, { "epoch": 0.49733619317788696, "grad_norm": 0.16768403351306915, "learning_rate": 1e-05, "loss": 0.5608, "step": 1797 }, { "epoch": 0.4976129523282364, "grad_norm": 0.1849415898323059, "learning_rate": 1e-05, "loss": 0.5931, "step": 1798 }, { "epoch": 0.49788971147858574, "grad_norm": 0.1756805181503296, "learning_rate": 1e-05, "loss": 0.5718, "step": 1799 }, { "epoch": 0.49816647062893515, "grad_norm": 0.1755865514278412, "learning_rate": 1e-05, "loss": 0.55, "step": 1800 }, { "epoch": 0.49844322977928457, "grad_norm": 0.17112800478935242, "learning_rate": 1e-05, "loss": 0.5426, "step": 1801 }, { "epoch": 0.498719988929634, "grad_norm": 0.18143437802791595, "learning_rate": 1e-05, "loss": 0.5547, "step": 1802 }, { "epoch": 0.4989967480799834, "grad_norm": 0.16959276795387268, "learning_rate": 1e-05, "loss": 0.5657, "step": 1803 }, { "epoch": 0.4992735072303328, "grad_norm": 0.17221955955028534, "learning_rate": 1e-05, "loss": 0.5679, "step": 1804 }, { "epoch": 0.49955026638068223, "grad_norm": 0.1821444034576416, "learning_rate": 1e-05, "loss": 0.5607, "step": 1805 }, { "epoch": 0.49982702553103164, "grad_norm": 0.17964321374893188, "learning_rate": 1e-05, "loss": 0.582, "step": 1806 }, { "epoch": 0.5001037846813811, "grad_norm": 0.17995184659957886, "learning_rate": 1e-05, "loss": 0.5855, "step": 1807 }, { "epoch": 0.5001037846813811, "eval_loss": 0.5595284700393677, "eval_runtime": 3251.5493, "eval_samples_per_second": 75.082, "eval_steps_per_second": 2.347, "step": 1807 }, { "epoch": 0.5003805438317305, "grad_norm": 0.18341104686260223, "learning_rate": 1e-05, "loss": 0.5626, "step": 1808 }, { "epoch": 0.5006573029820799, "grad_norm": 0.18131819367408752, "learning_rate": 1e-05, "loss": 0.5963, "step": 1809 }, { "epoch": 0.5009340621324293, "grad_norm": 0.1847371906042099, "learning_rate": 1e-05, "loss": 0.5548, "step": 1810 }, { "epoch": 0.5012108212827786, "grad_norm": 0.18485334515571594, "learning_rate": 1e-05, "loss": 0.5368, "step": 1811 }, { "epoch": 0.501487580433128, "grad_norm": 0.1727416068315506, "learning_rate": 1e-05, "loss": 0.5832, "step": 1812 }, { "epoch": 0.5017643395834774, "grad_norm": 0.17691485583782196, "learning_rate": 1e-05, "loss": 0.5416, "step": 1813 }, { "epoch": 0.5020410987338269, "grad_norm": 0.1847415417432785, "learning_rate": 1e-05, "loss": 0.5478, "step": 1814 }, { "epoch": 0.5023178578841763, "grad_norm": 0.17526231706142426, "learning_rate": 1e-05, "loss": 0.5533, "step": 1815 }, { "epoch": 0.5025946170345257, "grad_norm": 0.1754435896873474, "learning_rate": 1e-05, "loss": 0.5612, "step": 1816 }, { "epoch": 0.5028713761848751, "grad_norm": 0.17412279546260834, "learning_rate": 1e-05, "loss": 0.5449, "step": 1817 }, { "epoch": 0.5031481353352245, "grad_norm": 0.17879047989845276, "learning_rate": 1e-05, "loss": 0.5646, "step": 1818 }, { "epoch": 0.5034248944855739, "grad_norm": 0.1797013282775879, "learning_rate": 1e-05, "loss": 0.5761, "step": 1819 }, { "epoch": 0.5037016536359233, "grad_norm": 0.17554520070552826, "learning_rate": 1e-05, "loss": 0.5498, "step": 1820 }, { "epoch": 0.5039784127862728, "grad_norm": 0.18929488956928253, "learning_rate": 1e-05, "loss": 0.578, "step": 1821 }, { "epoch": 0.5042551719366222, "grad_norm": 0.1801733523607254, "learning_rate": 1e-05, "loss": 0.5654, "step": 1822 }, { "epoch": 0.5045319310869716, "grad_norm": 0.17772534489631653, "learning_rate": 1e-05, "loss": 0.5676, "step": 1823 }, { "epoch": 0.504808690237321, "grad_norm": 0.1746656894683838, "learning_rate": 1e-05, "loss": 0.5243, "step": 1824 }, { "epoch": 0.5050854493876704, "grad_norm": 0.17621803283691406, "learning_rate": 1e-05, "loss": 0.5615, "step": 1825 }, { "epoch": 0.5053622085380198, "grad_norm": 0.1719622164964676, "learning_rate": 1e-05, "loss": 0.5855, "step": 1826 }, { "epoch": 0.5056389676883692, "grad_norm": 0.17831303179264069, "learning_rate": 1e-05, "loss": 0.5633, "step": 1827 }, { "epoch": 0.5059157268387187, "grad_norm": 0.18065378069877625, "learning_rate": 1e-05, "loss": 0.5574, "step": 1828 }, { "epoch": 0.506192485989068, "grad_norm": 0.1788090467453003, "learning_rate": 1e-05, "loss": 0.5623, "step": 1829 }, { "epoch": 0.5064692451394174, "grad_norm": 0.17636029422283173, "learning_rate": 1e-05, "loss": 0.5711, "step": 1830 }, { "epoch": 0.5067460042897668, "grad_norm": 0.1663035750389099, "learning_rate": 1e-05, "loss": 0.5448, "step": 1831 }, { "epoch": 0.5070227634401162, "grad_norm": 0.176080584526062, "learning_rate": 1e-05, "loss": 0.5898, "step": 1832 }, { "epoch": 0.5072995225904656, "grad_norm": 0.16319909691810608, "learning_rate": 1e-05, "loss": 0.5404, "step": 1833 }, { "epoch": 0.507576281740815, "grad_norm": 0.16714419424533844, "learning_rate": 1e-05, "loss": 0.5301, "step": 1834 }, { "epoch": 0.5078530408911645, "grad_norm": 0.17927078902721405, "learning_rate": 1e-05, "loss": 0.5919, "step": 1835 }, { "epoch": 0.5081298000415139, "grad_norm": 0.18276876211166382, "learning_rate": 1e-05, "loss": 0.5751, "step": 1836 }, { "epoch": 0.5084065591918633, "grad_norm": 0.16546949744224548, "learning_rate": 1e-05, "loss": 0.5567, "step": 1837 }, { "epoch": 0.5086833183422127, "grad_norm": 0.17527148127555847, "learning_rate": 1e-05, "loss": 0.5479, "step": 1838 }, { "epoch": 0.5089600774925621, "grad_norm": 0.1761225312948227, "learning_rate": 1e-05, "loss": 0.5799, "step": 1839 }, { "epoch": 0.5092368366429115, "grad_norm": 0.17778868973255157, "learning_rate": 1e-05, "loss": 0.5725, "step": 1840 }, { "epoch": 0.5095135957932609, "grad_norm": 0.17577162384986877, "learning_rate": 1e-05, "loss": 0.5584, "step": 1841 }, { "epoch": 0.5097903549436104, "grad_norm": 0.17961519956588745, "learning_rate": 1e-05, "loss": 0.5685, "step": 1842 }, { "epoch": 0.5100671140939598, "grad_norm": 0.18381065130233765, "learning_rate": 1e-05, "loss": 0.5758, "step": 1843 }, { "epoch": 0.5103438732443092, "grad_norm": 0.17871902883052826, "learning_rate": 1e-05, "loss": 0.5746, "step": 1844 }, { "epoch": 0.5106206323946586, "grad_norm": 0.17064699530601501, "learning_rate": 1e-05, "loss": 0.5616, "step": 1845 }, { "epoch": 0.5108973915450079, "grad_norm": 0.1795235425233841, "learning_rate": 1e-05, "loss": 0.5752, "step": 1846 }, { "epoch": 0.5111741506953573, "grad_norm": 0.24183286726474762, "learning_rate": 1e-05, "loss": 0.5393, "step": 1847 }, { "epoch": 0.5114509098457067, "grad_norm": 0.1794368475675583, "learning_rate": 1e-05, "loss": 0.5588, "step": 1848 }, { "epoch": 0.5117276689960562, "grad_norm": 0.16858817636966705, "learning_rate": 1e-05, "loss": 0.5302, "step": 1849 }, { "epoch": 0.5120044281464056, "grad_norm": 0.17210230231285095, "learning_rate": 1e-05, "loss": 0.5692, "step": 1850 }, { "epoch": 0.512281187296755, "grad_norm": 0.17946870625019073, "learning_rate": 1e-05, "loss": 0.5293, "step": 1851 }, { "epoch": 0.5125579464471044, "grad_norm": 0.18244487047195435, "learning_rate": 1e-05, "loss": 0.56, "step": 1852 }, { "epoch": 0.5128347055974538, "grad_norm": 0.17552122473716736, "learning_rate": 1e-05, "loss": 0.5578, "step": 1853 }, { "epoch": 0.5131114647478032, "grad_norm": 0.16759997606277466, "learning_rate": 1e-05, "loss": 0.5849, "step": 1854 }, { "epoch": 0.5133882238981526, "grad_norm": 0.17421217262744904, "learning_rate": 1e-05, "loss": 0.5564, "step": 1855 }, { "epoch": 0.513664983048502, "grad_norm": 0.17228972911834717, "learning_rate": 1e-05, "loss": 0.5474, "step": 1856 }, { "epoch": 0.5139417421988515, "grad_norm": 0.1695404052734375, "learning_rate": 1e-05, "loss": 0.581, "step": 1857 }, { "epoch": 0.5142185013492009, "grad_norm": 0.1726943701505661, "learning_rate": 1e-05, "loss": 0.5517, "step": 1858 }, { "epoch": 0.5144952604995503, "grad_norm": 0.17808973789215088, "learning_rate": 1e-05, "loss": 0.5601, "step": 1859 }, { "epoch": 0.5147720196498997, "grad_norm": 0.17524869740009308, "learning_rate": 1e-05, "loss": 0.5949, "step": 1860 }, { "epoch": 0.5150487788002491, "grad_norm": 0.1772812455892563, "learning_rate": 1e-05, "loss": 0.5468, "step": 1861 }, { "epoch": 0.5153255379505985, "grad_norm": 0.17820842564105988, "learning_rate": 1e-05, "loss": 0.5546, "step": 1862 }, { "epoch": 0.5156022971009478, "grad_norm": 0.17798510193824768, "learning_rate": 1e-05, "loss": 0.5588, "step": 1863 }, { "epoch": 0.5158790562512973, "grad_norm": 0.18512780964374542, "learning_rate": 1e-05, "loss": 0.5727, "step": 1864 }, { "epoch": 0.5161558154016467, "grad_norm": 0.18065835535526276, "learning_rate": 1e-05, "loss": 0.5853, "step": 1865 }, { "epoch": 0.5164325745519961, "grad_norm": 0.16852979362010956, "learning_rate": 1e-05, "loss": 0.5395, "step": 1866 }, { "epoch": 0.5167093337023455, "grad_norm": 0.1675931215286255, "learning_rate": 1e-05, "loss": 0.5747, "step": 1867 }, { "epoch": 0.5169860928526949, "grad_norm": 0.1834307461977005, "learning_rate": 1e-05, "loss": 0.5702, "step": 1868 }, { "epoch": 0.5172628520030443, "grad_norm": 0.17879396677017212, "learning_rate": 1e-05, "loss": 0.5826, "step": 1869 }, { "epoch": 0.5175396111533938, "grad_norm": 0.17971912026405334, "learning_rate": 1e-05, "loss": 0.5879, "step": 1870 }, { "epoch": 0.5178163703037432, "grad_norm": 0.1706164926290512, "learning_rate": 1e-05, "loss": 0.5523, "step": 1871 }, { "epoch": 0.5180931294540926, "grad_norm": 0.17401783168315887, "learning_rate": 1e-05, "loss": 0.5613, "step": 1872 }, { "epoch": 0.518369888604442, "grad_norm": 0.17353229224681854, "learning_rate": 1e-05, "loss": 0.5704, "step": 1873 }, { "epoch": 0.5186466477547914, "grad_norm": 0.1748887598514557, "learning_rate": 1e-05, "loss": 0.5757, "step": 1874 }, { "epoch": 0.5189234069051408, "grad_norm": 0.17453552782535553, "learning_rate": 1e-05, "loss": 0.5355, "step": 1875 }, { "epoch": 0.5192001660554902, "grad_norm": 0.17343232035636902, "learning_rate": 1e-05, "loss": 0.5412, "step": 1876 }, { "epoch": 0.5194769252058397, "grad_norm": 0.1728099286556244, "learning_rate": 1e-05, "loss": 0.5579, "step": 1877 }, { "epoch": 0.5197536843561891, "grad_norm": 0.17299748957157135, "learning_rate": 1e-05, "loss": 0.5592, "step": 1878 }, { "epoch": 0.5200304435065385, "grad_norm": 0.17102238535881042, "learning_rate": 1e-05, "loss": 0.551, "step": 1879 }, { "epoch": 0.5203072026568878, "grad_norm": 0.16364571452140808, "learning_rate": 1e-05, "loss": 0.5512, "step": 1880 }, { "epoch": 0.5205839618072372, "grad_norm": 0.1746772676706314, "learning_rate": 1e-05, "loss": 0.5721, "step": 1881 }, { "epoch": 0.5208607209575866, "grad_norm": 0.1866730898618698, "learning_rate": 1e-05, "loss": 0.5745, "step": 1882 }, { "epoch": 0.521137480107936, "grad_norm": 0.16450290381908417, "learning_rate": 1e-05, "loss": 0.5462, "step": 1883 }, { "epoch": 0.5214142392582855, "grad_norm": 0.17221908271312714, "learning_rate": 1e-05, "loss": 0.5591, "step": 1884 }, { "epoch": 0.5216909984086349, "grad_norm": 0.17166858911514282, "learning_rate": 1e-05, "loss": 0.5729, "step": 1885 }, { "epoch": 0.5219677575589843, "grad_norm": 0.17161859571933746, "learning_rate": 1e-05, "loss": 0.5942, "step": 1886 }, { "epoch": 0.5222445167093337, "grad_norm": 0.17527657747268677, "learning_rate": 1e-05, "loss": 0.5554, "step": 1887 }, { "epoch": 0.5225212758596831, "grad_norm": 0.17838822305202484, "learning_rate": 1e-05, "loss": 0.5558, "step": 1888 }, { "epoch": 0.5227980350100325, "grad_norm": 0.17619748413562775, "learning_rate": 1e-05, "loss": 0.5553, "step": 1889 }, { "epoch": 0.5230747941603819, "grad_norm": 0.16286523640155792, "learning_rate": 1e-05, "loss": 0.5426, "step": 1890 }, { "epoch": 0.5233515533107314, "grad_norm": 0.17185160517692566, "learning_rate": 1e-05, "loss": 0.5775, "step": 1891 }, { "epoch": 0.5236283124610808, "grad_norm": 0.1803010255098343, "learning_rate": 1e-05, "loss": 0.57, "step": 1892 }, { "epoch": 0.5239050716114302, "grad_norm": 0.16809788346290588, "learning_rate": 1e-05, "loss": 0.5394, "step": 1893 }, { "epoch": 0.5241818307617796, "grad_norm": 0.17608726024627686, "learning_rate": 1e-05, "loss": 0.5912, "step": 1894 }, { "epoch": 0.524458589912129, "grad_norm": 0.17632818222045898, "learning_rate": 1e-05, "loss": 0.5642, "step": 1895 }, { "epoch": 0.5247353490624784, "grad_norm": 0.18664081394672394, "learning_rate": 1e-05, "loss": 0.5609, "step": 1896 }, { "epoch": 0.5250121082128277, "grad_norm": 0.17757000029087067, "learning_rate": 1e-05, "loss": 0.5527, "step": 1897 }, { "epoch": 0.5252888673631771, "grad_norm": 0.17702841758728027, "learning_rate": 1e-05, "loss": 0.5663, "step": 1898 }, { "epoch": 0.5255656265135266, "grad_norm": 0.17925354838371277, "learning_rate": 1e-05, "loss": 0.5411, "step": 1899 }, { "epoch": 0.525842385663876, "grad_norm": 0.17082858085632324, "learning_rate": 1e-05, "loss": 0.5421, "step": 1900 }, { "epoch": 0.5261191448142254, "grad_norm": 0.17776674032211304, "learning_rate": 1e-05, "loss": 0.5597, "step": 1901 }, { "epoch": 0.5263959039645748, "grad_norm": 0.16917771100997925, "learning_rate": 1e-05, "loss": 0.5268, "step": 1902 }, { "epoch": 0.5266726631149242, "grad_norm": 0.17034666240215302, "learning_rate": 1e-05, "loss": 0.5633, "step": 1903 }, { "epoch": 0.5269494222652736, "grad_norm": 0.1744510531425476, "learning_rate": 1e-05, "loss": 0.5562, "step": 1904 }, { "epoch": 0.527226181415623, "grad_norm": 0.17846925556659698, "learning_rate": 1e-05, "loss": 0.5808, "step": 1905 }, { "epoch": 0.5275029405659725, "grad_norm": 0.18262606859207153, "learning_rate": 1e-05, "loss": 0.5406, "step": 1906 }, { "epoch": 0.5277796997163219, "grad_norm": 0.19074484705924988, "learning_rate": 1e-05, "loss": 0.561, "step": 1907 }, { "epoch": 0.5280564588666713, "grad_norm": 0.18225134909152985, "learning_rate": 1e-05, "loss": 0.5772, "step": 1908 }, { "epoch": 0.5283332180170207, "grad_norm": 0.17051243782043457, "learning_rate": 1e-05, "loss": 0.5645, "step": 1909 }, { "epoch": 0.5286099771673701, "grad_norm": 0.1802610605955124, "learning_rate": 1e-05, "loss": 0.5377, "step": 1910 }, { "epoch": 0.5288867363177195, "grad_norm": 0.17941904067993164, "learning_rate": 1e-05, "loss": 0.5677, "step": 1911 }, { "epoch": 0.529163495468069, "grad_norm": 0.1729092001914978, "learning_rate": 1e-05, "loss": 0.5726, "step": 1912 }, { "epoch": 0.5294402546184184, "grad_norm": 0.18662644922733307, "learning_rate": 1e-05, "loss": 0.5859, "step": 1913 }, { "epoch": 0.5297170137687677, "grad_norm": 0.18082694709300995, "learning_rate": 1e-05, "loss": 0.5633, "step": 1914 }, { "epoch": 0.5299937729191171, "grad_norm": 0.1913125365972519, "learning_rate": 1e-05, "loss": 0.5776, "step": 1915 }, { "epoch": 0.5302705320694665, "grad_norm": 0.17779678106307983, "learning_rate": 1e-05, "loss": 0.5489, "step": 1916 }, { "epoch": 0.5305472912198159, "grad_norm": 0.1844724714756012, "learning_rate": 1e-05, "loss": 0.565, "step": 1917 }, { "epoch": 0.5308240503701653, "grad_norm": 0.16838590800762177, "learning_rate": 1e-05, "loss": 0.5461, "step": 1918 }, { "epoch": 0.5311008095205147, "grad_norm": 0.17846958339214325, "learning_rate": 1e-05, "loss": 0.5759, "step": 1919 }, { "epoch": 0.5313775686708642, "grad_norm": 0.17486563324928284, "learning_rate": 1e-05, "loss": 0.5772, "step": 1920 }, { "epoch": 0.5316543278212136, "grad_norm": 0.17742690443992615, "learning_rate": 1e-05, "loss": 0.575, "step": 1921 }, { "epoch": 0.531931086971563, "grad_norm": 0.17716962099075317, "learning_rate": 1e-05, "loss": 0.5828, "step": 1922 }, { "epoch": 0.5322078461219124, "grad_norm": 0.17497248947620392, "learning_rate": 1e-05, "loss": 0.5483, "step": 1923 }, { "epoch": 0.5324846052722618, "grad_norm": 0.17075411975383759, "learning_rate": 1e-05, "loss": 0.5583, "step": 1924 }, { "epoch": 0.5327613644226112, "grad_norm": 0.16964326798915863, "learning_rate": 1e-05, "loss": 0.5569, "step": 1925 }, { "epoch": 0.5330381235729607, "grad_norm": 0.18211594223976135, "learning_rate": 1e-05, "loss": 0.5741, "step": 1926 }, { "epoch": 0.5333148827233101, "grad_norm": 0.16949661076068878, "learning_rate": 1e-05, "loss": 0.5745, "step": 1927 }, { "epoch": 0.5335916418736595, "grad_norm": 0.17628343403339386, "learning_rate": 1e-05, "loss": 0.5638, "step": 1928 }, { "epoch": 0.5338684010240089, "grad_norm": 0.17500793933868408, "learning_rate": 1e-05, "loss": 0.5467, "step": 1929 }, { "epoch": 0.5341451601743583, "grad_norm": 0.1721728891134262, "learning_rate": 1e-05, "loss": 0.5391, "step": 1930 }, { "epoch": 0.5344219193247077, "grad_norm": 0.1720646619796753, "learning_rate": 1e-05, "loss": 0.5608, "step": 1931 }, { "epoch": 0.534698678475057, "grad_norm": 0.16617928445339203, "learning_rate": 1e-05, "loss": 0.5296, "step": 1932 }, { "epoch": 0.5349754376254064, "grad_norm": 0.1674613058567047, "learning_rate": 1e-05, "loss": 0.5814, "step": 1933 }, { "epoch": 0.5352521967757559, "grad_norm": 0.17311443388462067, "learning_rate": 1e-05, "loss": 0.5552, "step": 1934 }, { "epoch": 0.5355289559261053, "grad_norm": 0.1706092655658722, "learning_rate": 1e-05, "loss": 0.5792, "step": 1935 }, { "epoch": 0.5358057150764547, "grad_norm": 0.17671921849250793, "learning_rate": 1e-05, "loss": 0.5558, "step": 1936 }, { "epoch": 0.5360824742268041, "grad_norm": 0.16650119423866272, "learning_rate": 1e-05, "loss": 0.5822, "step": 1937 }, { "epoch": 0.5363592333771535, "grad_norm": 0.1690196692943573, "learning_rate": 1e-05, "loss": 0.5472, "step": 1938 }, { "epoch": 0.5366359925275029, "grad_norm": 0.1744525134563446, "learning_rate": 1e-05, "loss": 0.5838, "step": 1939 }, { "epoch": 0.5369127516778524, "grad_norm": 0.17266049981117249, "learning_rate": 1e-05, "loss": 0.5538, "step": 1940 }, { "epoch": 0.5371895108282018, "grad_norm": 0.1807900071144104, "learning_rate": 1e-05, "loss": 0.5621, "step": 1941 }, { "epoch": 0.5374662699785512, "grad_norm": 0.17149491608142853, "learning_rate": 1e-05, "loss": 0.5577, "step": 1942 }, { "epoch": 0.5377430291289006, "grad_norm": 0.16425803303718567, "learning_rate": 1e-05, "loss": 0.5335, "step": 1943 }, { "epoch": 0.53801978827925, "grad_norm": 0.17277412116527557, "learning_rate": 1e-05, "loss": 0.5748, "step": 1944 }, { "epoch": 0.5382965474295994, "grad_norm": 0.17265592515468597, "learning_rate": 1e-05, "loss": 0.574, "step": 1945 }, { "epoch": 0.5385733065799488, "grad_norm": 0.1688597947359085, "learning_rate": 1e-05, "loss": 0.5543, "step": 1946 }, { "epoch": 0.5388500657302983, "grad_norm": 0.16749568283557892, "learning_rate": 1e-05, "loss": 0.5465, "step": 1947 }, { "epoch": 0.5391268248806477, "grad_norm": 0.1736413985490799, "learning_rate": 1e-05, "loss": 0.5453, "step": 1948 }, { "epoch": 0.539403584030997, "grad_norm": 0.17682130634784698, "learning_rate": 1e-05, "loss": 0.5539, "step": 1949 }, { "epoch": 0.5396803431813464, "grad_norm": 0.17234306037425995, "learning_rate": 1e-05, "loss": 0.5683, "step": 1950 }, { "epoch": 0.5399571023316958, "grad_norm": 0.1776650846004486, "learning_rate": 1e-05, "loss": 0.5594, "step": 1951 }, { "epoch": 0.5402338614820452, "grad_norm": 0.18676172196865082, "learning_rate": 1e-05, "loss": 0.5444, "step": 1952 }, { "epoch": 0.5405106206323946, "grad_norm": 0.17864274978637695, "learning_rate": 1e-05, "loss": 0.5382, "step": 1953 }, { "epoch": 0.540787379782744, "grad_norm": 0.1767602562904358, "learning_rate": 1e-05, "loss": 0.5943, "step": 1954 }, { "epoch": 0.5410641389330935, "grad_norm": 0.17789560556411743, "learning_rate": 1e-05, "loss": 0.5554, "step": 1955 }, { "epoch": 0.5413408980834429, "grad_norm": 0.18271207809448242, "learning_rate": 1e-05, "loss": 0.5691, "step": 1956 }, { "epoch": 0.5416176572337923, "grad_norm": 0.17481467127799988, "learning_rate": 1e-05, "loss": 0.5806, "step": 1957 }, { "epoch": 0.5418944163841417, "grad_norm": 0.18654803931713104, "learning_rate": 1e-05, "loss": 0.5385, "step": 1958 }, { "epoch": 0.5421711755344911, "grad_norm": 0.17761200666427612, "learning_rate": 1e-05, "loss": 0.5398, "step": 1959 }, { "epoch": 0.5424479346848405, "grad_norm": 0.1796535700559616, "learning_rate": 1e-05, "loss": 0.5674, "step": 1960 }, { "epoch": 0.54272469383519, "grad_norm": 0.17934708297252655, "learning_rate": 1e-05, "loss": 0.5746, "step": 1961 }, { "epoch": 0.5430014529855394, "grad_norm": 0.18063750863075256, "learning_rate": 1e-05, "loss": 0.553, "step": 1962 }, { "epoch": 0.5432782121358888, "grad_norm": 0.1762080192565918, "learning_rate": 1e-05, "loss": 0.6049, "step": 1963 }, { "epoch": 0.5435549712862382, "grad_norm": 0.1752541959285736, "learning_rate": 1e-05, "loss": 0.5698, "step": 1964 }, { "epoch": 0.5438317304365876, "grad_norm": 0.1743788719177246, "learning_rate": 1e-05, "loss": 0.563, "step": 1965 }, { "epoch": 0.5441084895869369, "grad_norm": 0.17108964920043945, "learning_rate": 1e-05, "loss": 0.5467, "step": 1966 }, { "epoch": 0.5443852487372863, "grad_norm": 0.18108771741390228, "learning_rate": 1e-05, "loss": 0.5786, "step": 1967 }, { "epoch": 0.5446620078876357, "grad_norm": 0.1814819872379303, "learning_rate": 1e-05, "loss": 0.565, "step": 1968 }, { "epoch": 0.5449387670379852, "grad_norm": 0.17283771932125092, "learning_rate": 1e-05, "loss": 0.5492, "step": 1969 }, { "epoch": 0.5452155261883346, "grad_norm": 0.17483648657798767, "learning_rate": 1e-05, "loss": 0.548, "step": 1970 }, { "epoch": 0.545492285338684, "grad_norm": 0.1680275797843933, "learning_rate": 1e-05, "loss": 0.554, "step": 1971 }, { "epoch": 0.5457690444890334, "grad_norm": 0.18047575652599335, "learning_rate": 1e-05, "loss": 0.5463, "step": 1972 }, { "epoch": 0.5460458036393828, "grad_norm": 0.17099884152412415, "learning_rate": 1e-05, "loss": 0.5684, "step": 1973 }, { "epoch": 0.5463225627897322, "grad_norm": 0.17175069451332092, "learning_rate": 1e-05, "loss": 0.5457, "step": 1974 }, { "epoch": 0.5465993219400817, "grad_norm": 0.1700928658246994, "learning_rate": 1e-05, "loss": 0.5717, "step": 1975 }, { "epoch": 0.5468760810904311, "grad_norm": 0.1739731878042221, "learning_rate": 1e-05, "loss": 0.5727, "step": 1976 }, { "epoch": 0.5471528402407805, "grad_norm": 0.16857987642288208, "learning_rate": 1e-05, "loss": 0.5748, "step": 1977 }, { "epoch": 0.5474295993911299, "grad_norm": 0.17397302389144897, "learning_rate": 1e-05, "loss": 0.5682, "step": 1978 }, { "epoch": 0.5477063585414793, "grad_norm": 0.16993261873722076, "learning_rate": 1e-05, "loss": 0.5478, "step": 1979 }, { "epoch": 0.5479831176918287, "grad_norm": 0.1702284812927246, "learning_rate": 1e-05, "loss": 0.5521, "step": 1980 }, { "epoch": 0.5482598768421781, "grad_norm": 0.17860271036624908, "learning_rate": 1e-05, "loss": 0.5275, "step": 1981 }, { "epoch": 0.5485366359925276, "grad_norm": 0.17465387284755707, "learning_rate": 1e-05, "loss": 0.5405, "step": 1982 }, { "epoch": 0.5488133951428769, "grad_norm": 0.17473247647285461, "learning_rate": 1e-05, "loss": 0.5545, "step": 1983 }, { "epoch": 0.5490901542932263, "grad_norm": 0.17207138240337372, "learning_rate": 1e-05, "loss": 0.5812, "step": 1984 }, { "epoch": 0.5493669134435757, "grad_norm": 0.1717279553413391, "learning_rate": 1e-05, "loss": 0.5505, "step": 1985 }, { "epoch": 0.5496436725939251, "grad_norm": 0.1778380274772644, "learning_rate": 1e-05, "loss": 0.5813, "step": 1986 }, { "epoch": 0.5499204317442745, "grad_norm": 0.18001879751682281, "learning_rate": 1e-05, "loss": 0.5762, "step": 1987 }, { "epoch": 0.5501971908946239, "grad_norm": 0.18487201631069183, "learning_rate": 1e-05, "loss": 0.5583, "step": 1988 }, { "epoch": 0.5504739500449733, "grad_norm": 0.1685303896665573, "learning_rate": 1e-05, "loss": 0.5741, "step": 1989 }, { "epoch": 0.5507507091953228, "grad_norm": 0.18520300090312958, "learning_rate": 1e-05, "loss": 0.5775, "step": 1990 }, { "epoch": 0.5510274683456722, "grad_norm": 0.17470434308052063, "learning_rate": 1e-05, "loss": 0.5478, "step": 1991 }, { "epoch": 0.5513042274960216, "grad_norm": 0.161653071641922, "learning_rate": 1e-05, "loss": 0.504, "step": 1992 }, { "epoch": 0.551580986646371, "grad_norm": 0.1745692491531372, "learning_rate": 1e-05, "loss": 0.5464, "step": 1993 }, { "epoch": 0.5518577457967204, "grad_norm": 0.1744071990251541, "learning_rate": 1e-05, "loss": 0.5792, "step": 1994 }, { "epoch": 0.5521345049470698, "grad_norm": 0.16892141103744507, "learning_rate": 1e-05, "loss": 0.537, "step": 1995 }, { "epoch": 0.5524112640974193, "grad_norm": 0.1733822375535965, "learning_rate": 1e-05, "loss": 0.5669, "step": 1996 }, { "epoch": 0.5526880232477687, "grad_norm": 0.18258444964885712, "learning_rate": 1e-05, "loss": 0.5591, "step": 1997 }, { "epoch": 0.5529647823981181, "grad_norm": 0.16841904819011688, "learning_rate": 1e-05, "loss": 0.5581, "step": 1998 }, { "epoch": 0.5532415415484675, "grad_norm": 0.17288817465305328, "learning_rate": 1e-05, "loss": 0.5774, "step": 1999 }, { "epoch": 0.5535183006988168, "grad_norm": 0.1736564189195633, "learning_rate": 1e-05, "loss": 0.5593, "step": 2000 }, { "epoch": 0.5537950598491662, "grad_norm": 0.17487749457359314, "learning_rate": 1e-05, "loss": 0.55, "step": 2001 }, { "epoch": 0.5540718189995156, "grad_norm": 0.15931639075279236, "learning_rate": 1e-05, "loss": 0.5749, "step": 2002 }, { "epoch": 0.554348578149865, "grad_norm": 0.1716032177209854, "learning_rate": 1e-05, "loss": 0.5436, "step": 2003 }, { "epoch": 0.5546253373002145, "grad_norm": 0.17648661136627197, "learning_rate": 1e-05, "loss": 0.5959, "step": 2004 }, { "epoch": 0.5549020964505639, "grad_norm": 0.1773480325937271, "learning_rate": 1e-05, "loss": 0.5466, "step": 2005 }, { "epoch": 0.5551788556009133, "grad_norm": 0.1653442531824112, "learning_rate": 1e-05, "loss": 0.5597, "step": 2006 }, { "epoch": 0.5554556147512627, "grad_norm": 0.17959707975387573, "learning_rate": 1e-05, "loss": 0.5935, "step": 2007 }, { "epoch": 0.5557323739016121, "grad_norm": 0.17554613947868347, "learning_rate": 1e-05, "loss": 0.5965, "step": 2008 }, { "epoch": 0.5560091330519615, "grad_norm": 0.16751620173454285, "learning_rate": 1e-05, "loss": 0.5618, "step": 2009 }, { "epoch": 0.556285892202311, "grad_norm": 0.1702941507101059, "learning_rate": 1e-05, "loss": 0.5676, "step": 2010 }, { "epoch": 0.5565626513526604, "grad_norm": 0.17548371851444244, "learning_rate": 1e-05, "loss": 0.5587, "step": 2011 }, { "epoch": 0.5568394105030098, "grad_norm": 0.16687129437923431, "learning_rate": 1e-05, "loss": 0.5692, "step": 2012 }, { "epoch": 0.5571161696533592, "grad_norm": 0.1628004014492035, "learning_rate": 1e-05, "loss": 0.528, "step": 2013 }, { "epoch": 0.5573929288037086, "grad_norm": 0.16788747906684875, "learning_rate": 1e-05, "loss": 0.5757, "step": 2014 }, { "epoch": 0.557669687954058, "grad_norm": 0.1762511432170868, "learning_rate": 1e-05, "loss": 0.5699, "step": 2015 }, { "epoch": 0.5579464471044074, "grad_norm": 0.16620859503746033, "learning_rate": 1e-05, "loss": 0.5535, "step": 2016 }, { "epoch": 0.5582232062547567, "grad_norm": 0.1660090833902359, "learning_rate": 1e-05, "loss": 0.5658, "step": 2017 }, { "epoch": 0.5584999654051062, "grad_norm": 0.16680756211280823, "learning_rate": 1e-05, "loss": 0.5568, "step": 2018 }, { "epoch": 0.5587767245554556, "grad_norm": 0.16855204105377197, "learning_rate": 1e-05, "loss": 0.5404, "step": 2019 }, { "epoch": 0.559053483705805, "grad_norm": 0.1740337312221527, "learning_rate": 1e-05, "loss": 0.5491, "step": 2020 }, { "epoch": 0.5593302428561544, "grad_norm": 0.1770401895046234, "learning_rate": 1e-05, "loss": 0.5639, "step": 2021 }, { "epoch": 0.5596070020065038, "grad_norm": 0.17176862061023712, "learning_rate": 1e-05, "loss": 0.5663, "step": 2022 }, { "epoch": 0.5598837611568532, "grad_norm": 0.1763453483581543, "learning_rate": 1e-05, "loss": 0.5725, "step": 2023 }, { "epoch": 0.5601605203072026, "grad_norm": 0.1798967868089676, "learning_rate": 1e-05, "loss": 0.5501, "step": 2024 }, { "epoch": 0.5604372794575521, "grad_norm": 0.170345276594162, "learning_rate": 1e-05, "loss": 0.57, "step": 2025 }, { "epoch": 0.5607140386079015, "grad_norm": 0.18190279603004456, "learning_rate": 1e-05, "loss": 0.5795, "step": 2026 }, { "epoch": 0.5609907977582509, "grad_norm": 0.19019608199596405, "learning_rate": 1e-05, "loss": 0.5994, "step": 2027 }, { "epoch": 0.5612675569086003, "grad_norm": 0.16913969814777374, "learning_rate": 1e-05, "loss": 0.5713, "step": 2028 }, { "epoch": 0.5615443160589497, "grad_norm": 0.17873841524124146, "learning_rate": 1e-05, "loss": 0.5785, "step": 2029 }, { "epoch": 0.5618210752092991, "grad_norm": 0.17697162926197052, "learning_rate": 1e-05, "loss": 0.5429, "step": 2030 }, { "epoch": 0.5620978343596486, "grad_norm": 0.17942242324352264, "learning_rate": 1e-05, "loss": 0.5616, "step": 2031 }, { "epoch": 0.562374593509998, "grad_norm": 0.1771325021982193, "learning_rate": 1e-05, "loss": 0.5652, "step": 2032 }, { "epoch": 0.5626513526603474, "grad_norm": 0.1680523157119751, "learning_rate": 1e-05, "loss": 0.5518, "step": 2033 }, { "epoch": 0.5629281118106968, "grad_norm": 0.17866969108581543, "learning_rate": 1e-05, "loss": 0.5698, "step": 2034 }, { "epoch": 0.5632048709610461, "grad_norm": 0.1802847981452942, "learning_rate": 1e-05, "loss": 0.5462, "step": 2035 }, { "epoch": 0.5634816301113955, "grad_norm": 0.17168819904327393, "learning_rate": 1e-05, "loss": 0.5521, "step": 2036 }, { "epoch": 0.5637583892617449, "grad_norm": 0.18127579987049103, "learning_rate": 1e-05, "loss": 0.5523, "step": 2037 }, { "epoch": 0.5640351484120943, "grad_norm": 0.1787552386522293, "learning_rate": 1e-05, "loss": 0.554, "step": 2038 }, { "epoch": 0.5643119075624438, "grad_norm": 0.1790616363286972, "learning_rate": 1e-05, "loss": 0.5408, "step": 2039 }, { "epoch": 0.5645886667127932, "grad_norm": 0.1703723669052124, "learning_rate": 1e-05, "loss": 0.583, "step": 2040 }, { "epoch": 0.5648654258631426, "grad_norm": 0.17192471027374268, "learning_rate": 1e-05, "loss": 0.5348, "step": 2041 }, { "epoch": 0.565142185013492, "grad_norm": 0.17329983413219452, "learning_rate": 1e-05, "loss": 0.5554, "step": 2042 }, { "epoch": 0.5654189441638414, "grad_norm": 0.1802607774734497, "learning_rate": 1e-05, "loss": 0.5521, "step": 2043 }, { "epoch": 0.5656957033141908, "grad_norm": 0.17309920489788055, "learning_rate": 1e-05, "loss": 0.5525, "step": 2044 }, { "epoch": 0.5659724624645402, "grad_norm": 0.16267180442810059, "learning_rate": 1e-05, "loss": 0.5181, "step": 2045 }, { "epoch": 0.5662492216148897, "grad_norm": 0.1705552190542221, "learning_rate": 1e-05, "loss": 0.5468, "step": 2046 }, { "epoch": 0.5665259807652391, "grad_norm": 0.1725357174873352, "learning_rate": 1e-05, "loss": 0.5294, "step": 2047 }, { "epoch": 0.5668027399155885, "grad_norm": 0.16936737298965454, "learning_rate": 1e-05, "loss": 0.5467, "step": 2048 }, { "epoch": 0.5670794990659379, "grad_norm": 0.1767825335264206, "learning_rate": 1e-05, "loss": 0.5499, "step": 2049 }, { "epoch": 0.5673562582162873, "grad_norm": 0.18184193968772888, "learning_rate": 1e-05, "loss": 0.5715, "step": 2050 }, { "epoch": 0.5676330173666367, "grad_norm": 0.18375426530838013, "learning_rate": 1e-05, "loss": 0.5628, "step": 2051 }, { "epoch": 0.567909776516986, "grad_norm": 0.17622284591197968, "learning_rate": 1e-05, "loss": 0.543, "step": 2052 }, { "epoch": 0.5681865356673355, "grad_norm": 0.1735362857580185, "learning_rate": 1e-05, "loss": 0.5505, "step": 2053 }, { "epoch": 0.5684632948176849, "grad_norm": 0.1714009940624237, "learning_rate": 1e-05, "loss": 0.5864, "step": 2054 }, { "epoch": 0.5687400539680343, "grad_norm": 0.17720085382461548, "learning_rate": 1e-05, "loss": 0.5653, "step": 2055 }, { "epoch": 0.5690168131183837, "grad_norm": 0.1745796948671341, "learning_rate": 1e-05, "loss": 0.536, "step": 2056 }, { "epoch": 0.5692935722687331, "grad_norm": 0.17010918259620667, "learning_rate": 1e-05, "loss": 0.5655, "step": 2057 }, { "epoch": 0.5695703314190825, "grad_norm": 0.17354173958301544, "learning_rate": 1e-05, "loss": 0.5832, "step": 2058 }, { "epoch": 0.569847090569432, "grad_norm": 0.1757994145154953, "learning_rate": 1e-05, "loss": 0.5778, "step": 2059 }, { "epoch": 0.5701238497197814, "grad_norm": 0.17411555349826813, "learning_rate": 1e-05, "loss": 0.554, "step": 2060 }, { "epoch": 0.5704006088701308, "grad_norm": 0.1806405782699585, "learning_rate": 1e-05, "loss": 0.538, "step": 2061 }, { "epoch": 0.5706773680204802, "grad_norm": 0.16916631162166595, "learning_rate": 1e-05, "loss": 0.5268, "step": 2062 }, { "epoch": 0.5709541271708296, "grad_norm": 0.18126483261585236, "learning_rate": 1e-05, "loss": 0.5759, "step": 2063 }, { "epoch": 0.571230886321179, "grad_norm": 0.17963317036628723, "learning_rate": 1e-05, "loss": 0.5505, "step": 2064 }, { "epoch": 0.5715076454715284, "grad_norm": 0.18582755327224731, "learning_rate": 1e-05, "loss": 0.5557, "step": 2065 }, { "epoch": 0.5717844046218779, "grad_norm": 0.1667535901069641, "learning_rate": 1e-05, "loss": 0.5595, "step": 2066 }, { "epoch": 0.5720611637722273, "grad_norm": 0.16520746052265167, "learning_rate": 1e-05, "loss": 0.5814, "step": 2067 }, { "epoch": 0.5723379229225767, "grad_norm": 0.1658456027507782, "learning_rate": 1e-05, "loss": 0.5321, "step": 2068 }, { "epoch": 0.572614682072926, "grad_norm": 0.17846497893333435, "learning_rate": 1e-05, "loss": 0.5507, "step": 2069 }, { "epoch": 0.5728914412232754, "grad_norm": 0.18353776633739471, "learning_rate": 1e-05, "loss": 0.5769, "step": 2070 }, { "epoch": 0.5731682003736248, "grad_norm": 0.17965148389339447, "learning_rate": 1e-05, "loss": 0.5831, "step": 2071 }, { "epoch": 0.5734449595239742, "grad_norm": 0.1684381663799286, "learning_rate": 1e-05, "loss": 0.5428, "step": 2072 }, { "epoch": 0.5737217186743236, "grad_norm": 0.16944573819637299, "learning_rate": 1e-05, "loss": 0.5579, "step": 2073 }, { "epoch": 0.5739984778246731, "grad_norm": 0.181576207280159, "learning_rate": 1e-05, "loss": 0.5716, "step": 2074 }, { "epoch": 0.5742752369750225, "grad_norm": 0.17028898000717163, "learning_rate": 1e-05, "loss": 0.5526, "step": 2075 }, { "epoch": 0.5745519961253719, "grad_norm": 0.16901683807373047, "learning_rate": 1e-05, "loss": 0.5669, "step": 2076 }, { "epoch": 0.5748287552757213, "grad_norm": 0.17243586480617523, "learning_rate": 1e-05, "loss": 0.5675, "step": 2077 }, { "epoch": 0.5751055144260707, "grad_norm": 0.16803386807441711, "learning_rate": 1e-05, "loss": 0.5328, "step": 2078 }, { "epoch": 0.5753822735764201, "grad_norm": 0.1754719614982605, "learning_rate": 1e-05, "loss": 0.5443, "step": 2079 }, { "epoch": 0.5756590327267695, "grad_norm": 0.16857445240020752, "learning_rate": 1e-05, "loss": 0.5597, "step": 2080 }, { "epoch": 0.575935791877119, "grad_norm": 0.17088967561721802, "learning_rate": 1e-05, "loss": 0.5383, "step": 2081 }, { "epoch": 0.5762125510274684, "grad_norm": 0.1708289086818695, "learning_rate": 1e-05, "loss": 0.5411, "step": 2082 }, { "epoch": 0.5764893101778178, "grad_norm": 0.169466033577919, "learning_rate": 1e-05, "loss": 0.5386, "step": 2083 }, { "epoch": 0.5767660693281672, "grad_norm": 0.1719343513250351, "learning_rate": 1e-05, "loss": 0.5602, "step": 2084 }, { "epoch": 0.5770428284785166, "grad_norm": 0.16736432909965515, "learning_rate": 1e-05, "loss": 0.5563, "step": 2085 }, { "epoch": 0.5773195876288659, "grad_norm": 0.16304133832454681, "learning_rate": 1e-05, "loss": 0.5274, "step": 2086 }, { "epoch": 0.5775963467792153, "grad_norm": 0.16483834385871887, "learning_rate": 1e-05, "loss": 0.5373, "step": 2087 }, { "epoch": 0.5778731059295648, "grad_norm": 0.18093787133693695, "learning_rate": 1e-05, "loss": 0.5617, "step": 2088 }, { "epoch": 0.5781498650799142, "grad_norm": 0.16772472858428955, "learning_rate": 1e-05, "loss": 0.5248, "step": 2089 }, { "epoch": 0.5784266242302636, "grad_norm": 0.1775091588497162, "learning_rate": 1e-05, "loss": 0.5613, "step": 2090 }, { "epoch": 0.578703383380613, "grad_norm": 0.18104836344718933, "learning_rate": 1e-05, "loss": 0.5746, "step": 2091 }, { "epoch": 0.5789801425309624, "grad_norm": 0.17702656984329224, "learning_rate": 1e-05, "loss": 0.5523, "step": 2092 }, { "epoch": 0.5792569016813118, "grad_norm": 0.16742070019245148, "learning_rate": 1e-05, "loss": 0.5615, "step": 2093 }, { "epoch": 0.5795336608316612, "grad_norm": 0.1706288605928421, "learning_rate": 1e-05, "loss": 0.5793, "step": 2094 }, { "epoch": 0.5798104199820107, "grad_norm": 0.1732875257730484, "learning_rate": 1e-05, "loss": 0.5732, "step": 2095 }, { "epoch": 0.5800871791323601, "grad_norm": 0.16684779524803162, "learning_rate": 1e-05, "loss": 0.5467, "step": 2096 }, { "epoch": 0.5803639382827095, "grad_norm": 0.16872583329677582, "learning_rate": 1e-05, "loss": 0.5497, "step": 2097 }, { "epoch": 0.5806406974330589, "grad_norm": 0.1725061535835266, "learning_rate": 1e-05, "loss": 0.571, "step": 2098 }, { "epoch": 0.5809174565834083, "grad_norm": 0.17543670535087585, "learning_rate": 1e-05, "loss": 0.5867, "step": 2099 }, { "epoch": 0.5811942157337577, "grad_norm": 0.17526857554912567, "learning_rate": 1e-05, "loss": 0.571, "step": 2100 }, { "epoch": 0.5814709748841072, "grad_norm": 0.1734503209590912, "learning_rate": 1e-05, "loss": 0.5584, "step": 2101 }, { "epoch": 0.5817477340344566, "grad_norm": 0.17935103178024292, "learning_rate": 1e-05, "loss": 0.5491, "step": 2102 }, { "epoch": 0.5820244931848059, "grad_norm": 0.17921899259090424, "learning_rate": 1e-05, "loss": 0.5295, "step": 2103 }, { "epoch": 0.5823012523351553, "grad_norm": 0.17573679983615875, "learning_rate": 1e-05, "loss": 0.5461, "step": 2104 }, { "epoch": 0.5825780114855047, "grad_norm": 0.16823497414588928, "learning_rate": 1e-05, "loss": 0.585, "step": 2105 }, { "epoch": 0.5828547706358541, "grad_norm": 0.16874973475933075, "learning_rate": 1e-05, "loss": 0.5733, "step": 2106 }, { "epoch": 0.5831315297862035, "grad_norm": 0.17818641662597656, "learning_rate": 1e-05, "loss": 0.5688, "step": 2107 }, { "epoch": 0.5834082889365529, "grad_norm": 0.17456184327602386, "learning_rate": 1e-05, "loss": 0.5706, "step": 2108 }, { "epoch": 0.5836850480869024, "grad_norm": 0.16603194177150726, "learning_rate": 1e-05, "loss": 0.5324, "step": 2109 }, { "epoch": 0.5839618072372518, "grad_norm": 0.17713533341884613, "learning_rate": 1e-05, "loss": 0.5742, "step": 2110 }, { "epoch": 0.5842385663876012, "grad_norm": 0.17130924761295319, "learning_rate": 1e-05, "loss": 0.5986, "step": 2111 }, { "epoch": 0.5845153255379506, "grad_norm": 0.1726629137992859, "learning_rate": 1e-05, "loss": 0.5721, "step": 2112 }, { "epoch": 0.5847920846883, "grad_norm": 0.1740168184041977, "learning_rate": 1e-05, "loss": 0.5467, "step": 2113 }, { "epoch": 0.5850688438386494, "grad_norm": 0.16997122764587402, "learning_rate": 1e-05, "loss": 0.532, "step": 2114 }, { "epoch": 0.5853456029889988, "grad_norm": 0.165291890501976, "learning_rate": 1e-05, "loss": 0.5624, "step": 2115 }, { "epoch": 0.5856223621393483, "grad_norm": 0.1740363985300064, "learning_rate": 1e-05, "loss": 0.547, "step": 2116 }, { "epoch": 0.5858991212896977, "grad_norm": 0.1701834499835968, "learning_rate": 1e-05, "loss": 0.5313, "step": 2117 }, { "epoch": 0.5861758804400471, "grad_norm": 0.18175965547561646, "learning_rate": 1e-05, "loss": 0.5418, "step": 2118 }, { "epoch": 0.5864526395903965, "grad_norm": 0.17683249711990356, "learning_rate": 1e-05, "loss": 0.5591, "step": 2119 }, { "epoch": 0.5867293987407458, "grad_norm": 0.17242415249347687, "learning_rate": 1e-05, "loss": 0.5522, "step": 2120 }, { "epoch": 0.5870061578910952, "grad_norm": 0.17345868051052094, "learning_rate": 1e-05, "loss": 0.549, "step": 2121 }, { "epoch": 0.5872829170414446, "grad_norm": 0.17121019959449768, "learning_rate": 1e-05, "loss": 0.5673, "step": 2122 }, { "epoch": 0.5875596761917941, "grad_norm": 0.17043524980545044, "learning_rate": 1e-05, "loss": 0.5653, "step": 2123 }, { "epoch": 0.5878364353421435, "grad_norm": 0.17966413497924805, "learning_rate": 1e-05, "loss": 0.5692, "step": 2124 }, { "epoch": 0.5881131944924929, "grad_norm": 0.18451671302318573, "learning_rate": 1e-05, "loss": 0.5806, "step": 2125 }, { "epoch": 0.5883899536428423, "grad_norm": 0.16826961934566498, "learning_rate": 1e-05, "loss": 0.5389, "step": 2126 }, { "epoch": 0.5886667127931917, "grad_norm": 0.17339061200618744, "learning_rate": 1e-05, "loss": 0.5648, "step": 2127 }, { "epoch": 0.5889434719435411, "grad_norm": 0.17642638087272644, "learning_rate": 1e-05, "loss": 0.543, "step": 2128 }, { "epoch": 0.5892202310938905, "grad_norm": 0.17220439016819, "learning_rate": 1e-05, "loss": 0.573, "step": 2129 }, { "epoch": 0.58949699024424, "grad_norm": 0.16965459287166595, "learning_rate": 1e-05, "loss": 0.5544, "step": 2130 }, { "epoch": 0.5897737493945894, "grad_norm": 0.16725651919841766, "learning_rate": 1e-05, "loss": 0.5885, "step": 2131 }, { "epoch": 0.5900505085449388, "grad_norm": 0.18113112449645996, "learning_rate": 1e-05, "loss": 0.5801, "step": 2132 }, { "epoch": 0.5903272676952882, "grad_norm": 0.18165788054466248, "learning_rate": 1e-05, "loss": 0.5724, "step": 2133 }, { "epoch": 0.5906040268456376, "grad_norm": 0.1793775111436844, "learning_rate": 1e-05, "loss": 0.5466, "step": 2134 }, { "epoch": 0.590880785995987, "grad_norm": 0.17093592882156372, "learning_rate": 1e-05, "loss": 0.5484, "step": 2135 }, { "epoch": 0.5911575451463364, "grad_norm": 0.17342473566532135, "learning_rate": 1e-05, "loss": 0.5513, "step": 2136 }, { "epoch": 0.5914343042966859, "grad_norm": 0.17034098505973816, "learning_rate": 1e-05, "loss": 0.5452, "step": 2137 }, { "epoch": 0.5917110634470352, "grad_norm": 0.17261898517608643, "learning_rate": 1e-05, "loss": 0.5593, "step": 2138 }, { "epoch": 0.5919878225973846, "grad_norm": 0.17335163056850433, "learning_rate": 1e-05, "loss": 0.5737, "step": 2139 }, { "epoch": 0.592264581747734, "grad_norm": 0.17134007811546326, "learning_rate": 1e-05, "loss": 0.5358, "step": 2140 }, { "epoch": 0.5925413408980834, "grad_norm": 0.17210420966148376, "learning_rate": 1e-05, "loss": 0.5612, "step": 2141 }, { "epoch": 0.5928181000484328, "grad_norm": 0.18242530524730682, "learning_rate": 1e-05, "loss": 0.5512, "step": 2142 }, { "epoch": 0.5930948591987822, "grad_norm": 0.17825667560100555, "learning_rate": 1e-05, "loss": 0.5775, "step": 2143 }, { "epoch": 0.5933716183491317, "grad_norm": 0.16905561089515686, "learning_rate": 1e-05, "loss": 0.5677, "step": 2144 }, { "epoch": 0.5936483774994811, "grad_norm": 0.16831351816654205, "learning_rate": 1e-05, "loss": 0.575, "step": 2145 }, { "epoch": 0.5939251366498305, "grad_norm": 0.1700465977191925, "learning_rate": 1e-05, "loss": 0.5666, "step": 2146 }, { "epoch": 0.5942018958001799, "grad_norm": 0.17954117059707642, "learning_rate": 1e-05, "loss": 0.5787, "step": 2147 }, { "epoch": 0.5944786549505293, "grad_norm": 0.17254219949245453, "learning_rate": 1e-05, "loss": 0.5365, "step": 2148 }, { "epoch": 0.5947554141008787, "grad_norm": 0.16157715022563934, "learning_rate": 1e-05, "loss": 0.5438, "step": 2149 }, { "epoch": 0.5950321732512281, "grad_norm": 0.16644948720932007, "learning_rate": 1e-05, "loss": 0.5442, "step": 2150 }, { "epoch": 0.5953089324015776, "grad_norm": 0.16988404095172882, "learning_rate": 1e-05, "loss": 0.5748, "step": 2151 }, { "epoch": 0.595585691551927, "grad_norm": 0.1788991391658783, "learning_rate": 1e-05, "loss": 0.5719, "step": 2152 }, { "epoch": 0.5958624507022764, "grad_norm": 0.19523178040981293, "learning_rate": 1e-05, "loss": 0.5708, "step": 2153 }, { "epoch": 0.5961392098526258, "grad_norm": 0.1833600550889969, "learning_rate": 1e-05, "loss": 0.551, "step": 2154 }, { "epoch": 0.5964159690029751, "grad_norm": 0.1721370816230774, "learning_rate": 1e-05, "loss": 0.5839, "step": 2155 }, { "epoch": 0.5966927281533245, "grad_norm": 0.17942026257514954, "learning_rate": 1e-05, "loss": 0.5521, "step": 2156 }, { "epoch": 0.5969694873036739, "grad_norm": 0.16371065378189087, "learning_rate": 1e-05, "loss": 0.5363, "step": 2157 }, { "epoch": 0.5972462464540234, "grad_norm": 0.17133134603500366, "learning_rate": 1e-05, "loss": 0.5535, "step": 2158 }, { "epoch": 0.5975230056043728, "grad_norm": 0.17163190245628357, "learning_rate": 1e-05, "loss": 0.5479, "step": 2159 }, { "epoch": 0.5977997647547222, "grad_norm": 0.1738308072090149, "learning_rate": 1e-05, "loss": 0.5468, "step": 2160 }, { "epoch": 0.5980765239050716, "grad_norm": 0.16739867627620697, "learning_rate": 1e-05, "loss": 0.5562, "step": 2161 }, { "epoch": 0.598353283055421, "grad_norm": 0.17350149154663086, "learning_rate": 1e-05, "loss": 0.5611, "step": 2162 }, { "epoch": 0.5986300422057704, "grad_norm": 0.17685888707637787, "learning_rate": 1e-05, "loss": 0.5657, "step": 2163 }, { "epoch": 0.5989068013561198, "grad_norm": 0.18136398494243622, "learning_rate": 1e-05, "loss": 0.5801, "step": 2164 }, { "epoch": 0.5991835605064693, "grad_norm": 0.173369899392128, "learning_rate": 1e-05, "loss": 0.541, "step": 2165 }, { "epoch": 0.5994603196568187, "grad_norm": 0.1738273948431015, "learning_rate": 1e-05, "loss": 0.5607, "step": 2166 }, { "epoch": 0.5997370788071681, "grad_norm": 0.16939005255699158, "learning_rate": 1e-05, "loss": 0.5384, "step": 2167 }, { "epoch": 0.6000138379575175, "grad_norm": 0.18012766540050507, "learning_rate": 1e-05, "loss": 0.5344, "step": 2168 }, { "epoch": 0.6002905971078669, "grad_norm": 0.17555253207683563, "learning_rate": 1e-05, "loss": 0.5531, "step": 2169 }, { "epoch": 0.6005673562582163, "grad_norm": 0.17365066707134247, "learning_rate": 1e-05, "loss": 0.5714, "step": 2170 }, { "epoch": 0.6008441154085657, "grad_norm": 0.17033128440380096, "learning_rate": 1e-05, "loss": 0.5625, "step": 2171 }, { "epoch": 0.601120874558915, "grad_norm": 0.17618824541568756, "learning_rate": 1e-05, "loss": 0.5996, "step": 2172 }, { "epoch": 0.6013976337092645, "grad_norm": 0.17415833473205566, "learning_rate": 1e-05, "loss": 0.5741, "step": 2173 }, { "epoch": 0.6016743928596139, "grad_norm": 0.17253704369068146, "learning_rate": 1e-05, "loss": 0.5503, "step": 2174 }, { "epoch": 0.6019511520099633, "grad_norm": 0.16649998724460602, "learning_rate": 1e-05, "loss": 0.5485, "step": 2175 }, { "epoch": 0.6022279111603127, "grad_norm": 0.17095625400543213, "learning_rate": 1e-05, "loss": 0.5453, "step": 2176 }, { "epoch": 0.6025046703106621, "grad_norm": 0.1645006388425827, "learning_rate": 1e-05, "loss": 0.5416, "step": 2177 }, { "epoch": 0.6027814294610115, "grad_norm": 0.1755443811416626, "learning_rate": 1e-05, "loss": 0.5471, "step": 2178 }, { "epoch": 0.603058188611361, "grad_norm": 0.16968074440956116, "learning_rate": 1e-05, "loss": 0.5384, "step": 2179 }, { "epoch": 0.6033349477617104, "grad_norm": 0.1745689958333969, "learning_rate": 1e-05, "loss": 0.5529, "step": 2180 }, { "epoch": 0.6036117069120598, "grad_norm": 0.17780227959156036, "learning_rate": 1e-05, "loss": 0.5495, "step": 2181 }, { "epoch": 0.6038884660624092, "grad_norm": 0.17314665019512177, "learning_rate": 1e-05, "loss": 0.5434, "step": 2182 }, { "epoch": 0.6041652252127586, "grad_norm": 0.1924513429403305, "learning_rate": 1e-05, "loss": 0.5864, "step": 2183 }, { "epoch": 0.604441984363108, "grad_norm": 0.17736908793449402, "learning_rate": 1e-05, "loss": 0.6039, "step": 2184 }, { "epoch": 0.6047187435134574, "grad_norm": 0.16942991316318512, "learning_rate": 1e-05, "loss": 0.5636, "step": 2185 }, { "epoch": 0.6049955026638069, "grad_norm": 0.17920830845832825, "learning_rate": 1e-05, "loss": 0.5713, "step": 2186 }, { "epoch": 0.6052722618141563, "grad_norm": 0.17364738881587982, "learning_rate": 1e-05, "loss": 0.5616, "step": 2187 }, { "epoch": 0.6055490209645057, "grad_norm": 0.16921740770339966, "learning_rate": 1e-05, "loss": 0.5365, "step": 2188 }, { "epoch": 0.605825780114855, "grad_norm": 0.16665342450141907, "learning_rate": 1e-05, "loss": 0.5478, "step": 2189 }, { "epoch": 0.6061025392652044, "grad_norm": 0.17575229704380035, "learning_rate": 1e-05, "loss": 0.563, "step": 2190 }, { "epoch": 0.6063792984155538, "grad_norm": 0.1781122237443924, "learning_rate": 1e-05, "loss": 0.5729, "step": 2191 }, { "epoch": 0.6066560575659032, "grad_norm": 0.18990203738212585, "learning_rate": 1e-05, "loss": 0.5703, "step": 2192 }, { "epoch": 0.6069328167162527, "grad_norm": 0.18138577044010162, "learning_rate": 1e-05, "loss": 0.5737, "step": 2193 }, { "epoch": 0.6072095758666021, "grad_norm": 0.17749270796775818, "learning_rate": 1e-05, "loss": 0.5856, "step": 2194 }, { "epoch": 0.6074863350169515, "grad_norm": 0.1671728640794754, "learning_rate": 1e-05, "loss": 0.5599, "step": 2195 }, { "epoch": 0.6077630941673009, "grad_norm": 0.17486020922660828, "learning_rate": 1e-05, "loss": 0.5644, "step": 2196 }, { "epoch": 0.6080398533176503, "grad_norm": 0.1718221753835678, "learning_rate": 1e-05, "loss": 0.5962, "step": 2197 }, { "epoch": 0.6083166124679997, "grad_norm": 0.17849212884902954, "learning_rate": 1e-05, "loss": 0.5416, "step": 2198 }, { "epoch": 0.6085933716183491, "grad_norm": 0.1707196831703186, "learning_rate": 1e-05, "loss": 0.5765, "step": 2199 }, { "epoch": 0.6088701307686986, "grad_norm": 0.16914144158363342, "learning_rate": 1e-05, "loss": 0.5896, "step": 2200 }, { "epoch": 0.609146889919048, "grad_norm": 0.17838452756404877, "learning_rate": 1e-05, "loss": 0.5692, "step": 2201 }, { "epoch": 0.6094236490693974, "grad_norm": 0.17289431393146515, "learning_rate": 1e-05, "loss": 0.5419, "step": 2202 }, { "epoch": 0.6097004082197468, "grad_norm": 0.1741851270198822, "learning_rate": 1e-05, "loss": 0.5282, "step": 2203 }, { "epoch": 0.6099771673700962, "grad_norm": 0.17579761147499084, "learning_rate": 1e-05, "loss": 0.5527, "step": 2204 }, { "epoch": 0.6102539265204456, "grad_norm": 0.18604359030723572, "learning_rate": 1e-05, "loss": 0.5741, "step": 2205 }, { "epoch": 0.6105306856707949, "grad_norm": 0.17687572538852692, "learning_rate": 1e-05, "loss": 0.5848, "step": 2206 }, { "epoch": 0.6108074448211444, "grad_norm": 0.1685599535703659, "learning_rate": 1e-05, "loss": 0.5633, "step": 2207 }, { "epoch": 0.6110842039714938, "grad_norm": 13.40020751953125, "learning_rate": 1e-05, "loss": 0.539, "step": 2208 }, { "epoch": 0.6113609631218432, "grad_norm": 47.94011306762695, "learning_rate": 1e-05, "loss": 0.5563, "step": 2209 }, { "epoch": 0.6116377222721926, "grad_norm": 0.18288567662239075, "learning_rate": 1e-05, "loss": 0.5625, "step": 2210 }, { "epoch": 0.611914481422542, "grad_norm": 0.20714718103408813, "learning_rate": 1e-05, "loss": 0.576, "step": 2211 }, { "epoch": 0.6121912405728914, "grad_norm": 0.17809490859508514, "learning_rate": 1e-05, "loss": 0.5718, "step": 2212 }, { "epoch": 0.6124679997232408, "grad_norm": 0.16755719482898712, "learning_rate": 1e-05, "loss": 0.5212, "step": 2213 }, { "epoch": 0.6127447588735903, "grad_norm": 0.1742374747991562, "learning_rate": 1e-05, "loss": 0.5567, "step": 2214 }, { "epoch": 0.6130215180239397, "grad_norm": 0.18100954592227936, "learning_rate": 1e-05, "loss": 0.5483, "step": 2215 }, { "epoch": 0.6132982771742891, "grad_norm": 0.17408587038516998, "learning_rate": 1e-05, "loss": 0.5444, "step": 2216 }, { "epoch": 0.6135750363246385, "grad_norm": 0.1698635071516037, "learning_rate": 1e-05, "loss": 0.5574, "step": 2217 }, { "epoch": 0.6138517954749879, "grad_norm": 0.1738133579492569, "learning_rate": 1e-05, "loss": 0.5622, "step": 2218 }, { "epoch": 0.6141285546253373, "grad_norm": 0.17559093236923218, "learning_rate": 1e-05, "loss": 0.543, "step": 2219 }, { "epoch": 0.6144053137756867, "grad_norm": 0.17530594766139984, "learning_rate": 1e-05, "loss": 0.557, "step": 2220 }, { "epoch": 0.6146820729260362, "grad_norm": 0.18112359941005707, "learning_rate": 1e-05, "loss": 0.536, "step": 2221 }, { "epoch": 0.6149588320763856, "grad_norm": 0.17859847843647003, "learning_rate": 1e-05, "loss": 0.5305, "step": 2222 }, { "epoch": 0.6152355912267349, "grad_norm": 0.17502766847610474, "learning_rate": 1e-05, "loss": 0.5451, "step": 2223 }, { "epoch": 0.6155123503770843, "grad_norm": 0.19071777164936066, "learning_rate": 1e-05, "loss": 0.5625, "step": 2224 }, { "epoch": 0.6157891095274337, "grad_norm": 0.18086019158363342, "learning_rate": 1e-05, "loss": 0.5542, "step": 2225 }, { "epoch": 0.6160658686777831, "grad_norm": 0.1795615255832672, "learning_rate": 1e-05, "loss": 0.5253, "step": 2226 }, { "epoch": 0.6163426278281325, "grad_norm": 0.1835160106420517, "learning_rate": 1e-05, "loss": 0.551, "step": 2227 }, { "epoch": 0.616619386978482, "grad_norm": 0.1690492331981659, "learning_rate": 1e-05, "loss": 0.5715, "step": 2228 }, { "epoch": 0.6168961461288314, "grad_norm": 0.17332088947296143, "learning_rate": 1e-05, "loss": 0.5657, "step": 2229 }, { "epoch": 0.6171729052791808, "grad_norm": 0.1872040033340454, "learning_rate": 1e-05, "loss": 0.5477, "step": 2230 }, { "epoch": 0.6174496644295302, "grad_norm": 0.18225915729999542, "learning_rate": 1e-05, "loss": 0.5602, "step": 2231 }, { "epoch": 0.6177264235798796, "grad_norm": 0.17226485908031464, "learning_rate": 1e-05, "loss": 0.5588, "step": 2232 }, { "epoch": 0.618003182730229, "grad_norm": 0.18264999985694885, "learning_rate": 1e-05, "loss": 0.5633, "step": 2233 }, { "epoch": 0.6182799418805784, "grad_norm": 0.18316172063350677, "learning_rate": 1e-05, "loss": 0.581, "step": 2234 }, { "epoch": 0.6185567010309279, "grad_norm": 0.18604029715061188, "learning_rate": 1e-05, "loss": 0.5701, "step": 2235 }, { "epoch": 0.6188334601812773, "grad_norm": 0.18127885460853577, "learning_rate": 1e-05, "loss": 0.5657, "step": 2236 }, { "epoch": 0.6191102193316267, "grad_norm": 0.17335666716098785, "learning_rate": 1e-05, "loss": 0.54, "step": 2237 }, { "epoch": 0.6193869784819761, "grad_norm": 0.1792353093624115, "learning_rate": 1e-05, "loss": 0.5696, "step": 2238 }, { "epoch": 0.6196637376323255, "grad_norm": 0.18520484864711761, "learning_rate": 1e-05, "loss": 0.5886, "step": 2239 }, { "epoch": 0.6199404967826749, "grad_norm": 0.17304681241512299, "learning_rate": 1e-05, "loss": 0.572, "step": 2240 }, { "epoch": 0.6202172559330242, "grad_norm": 0.169776052236557, "learning_rate": 1e-05, "loss": 0.5655, "step": 2241 }, { "epoch": 0.6204940150833737, "grad_norm": 0.16992580890655518, "learning_rate": 1e-05, "loss": 0.5462, "step": 2242 }, { "epoch": 0.6207707742337231, "grad_norm": 0.18688850104808807, "learning_rate": 1e-05, "loss": 0.5587, "step": 2243 }, { "epoch": 0.6210475333840725, "grad_norm": 0.1662135124206543, "learning_rate": 1e-05, "loss": 0.5449, "step": 2244 }, { "epoch": 0.6213242925344219, "grad_norm": 0.18175148963928223, "learning_rate": 1e-05, "loss": 0.5772, "step": 2245 }, { "epoch": 0.6216010516847713, "grad_norm": 0.1691540628671646, "learning_rate": 1e-05, "loss": 0.5563, "step": 2246 }, { "epoch": 0.6218778108351207, "grad_norm": 0.17269812524318695, "learning_rate": 1e-05, "loss": 0.5424, "step": 2247 }, { "epoch": 0.6221545699854701, "grad_norm": 0.1742839813232422, "learning_rate": 1e-05, "loss": 0.5535, "step": 2248 }, { "epoch": 0.6224313291358196, "grad_norm": 0.17605724930763245, "learning_rate": 1e-05, "loss": 0.5514, "step": 2249 }, { "epoch": 0.622708088286169, "grad_norm": 0.1668228954076767, "learning_rate": 1e-05, "loss": 0.5494, "step": 2250 }, { "epoch": 0.6229848474365184, "grad_norm": 0.1727142632007599, "learning_rate": 1e-05, "loss": 0.5549, "step": 2251 }, { "epoch": 0.6232616065868678, "grad_norm": 0.17197689414024353, "learning_rate": 1e-05, "loss": 0.5489, "step": 2252 }, { "epoch": 0.6235383657372172, "grad_norm": 0.16746976971626282, "learning_rate": 1e-05, "loss": 0.558, "step": 2253 }, { "epoch": 0.6238151248875666, "grad_norm": 0.17376808822155, "learning_rate": 1e-05, "loss": 0.5523, "step": 2254 }, { "epoch": 0.624091884037916, "grad_norm": 0.17718665301799774, "learning_rate": 1e-05, "loss": 0.5443, "step": 2255 }, { "epoch": 0.6243686431882655, "grad_norm": 0.1865278035402298, "learning_rate": 1e-05, "loss": 0.5807, "step": 2256 }, { "epoch": 0.6246454023386149, "grad_norm": 0.17716793715953827, "learning_rate": 1e-05, "loss": 0.5551, "step": 2257 }, { "epoch": 0.6249221614889642, "grad_norm": 0.18355108797550201, "learning_rate": 1e-05, "loss": 0.558, "step": 2258 }, { "epoch": 0.6251989206393136, "grad_norm": 0.18572069704532623, "learning_rate": 1e-05, "loss": 0.6042, "step": 2259 }, { "epoch": 0.625475679789663, "grad_norm": 0.16490595042705536, "learning_rate": 1e-05, "loss": 0.5672, "step": 2260 }, { "epoch": 0.6257524389400124, "grad_norm": 0.1639026552438736, "learning_rate": 1e-05, "loss": 0.5334, "step": 2261 }, { "epoch": 0.6260291980903618, "grad_norm": 0.18405069410800934, "learning_rate": 1e-05, "loss": 0.5786, "step": 2262 }, { "epoch": 0.6263059572407113, "grad_norm": 0.1808580905199051, "learning_rate": 1e-05, "loss": 0.5734, "step": 2263 }, { "epoch": 0.6265827163910607, "grad_norm": 0.17772582173347473, "learning_rate": 1e-05, "loss": 0.5697, "step": 2264 }, { "epoch": 0.6268594755414101, "grad_norm": 0.17429611086845398, "learning_rate": 1e-05, "loss": 0.5485, "step": 2265 }, { "epoch": 0.6271362346917595, "grad_norm": 0.1703696846961975, "learning_rate": 1e-05, "loss": 0.5403, "step": 2266 }, { "epoch": 0.6274129938421089, "grad_norm": 0.17865586280822754, "learning_rate": 1e-05, "loss": 0.5951, "step": 2267 }, { "epoch": 0.6276897529924583, "grad_norm": 0.17063015699386597, "learning_rate": 1e-05, "loss": 0.5349, "step": 2268 }, { "epoch": 0.6279665121428077, "grad_norm": 0.174655482172966, "learning_rate": 1e-05, "loss": 0.5749, "step": 2269 }, { "epoch": 0.6282432712931572, "grad_norm": 0.1679481863975525, "learning_rate": 1e-05, "loss": 0.5343, "step": 2270 }, { "epoch": 0.6285200304435066, "grad_norm": 0.17972268164157867, "learning_rate": 1e-05, "loss": 0.5354, "step": 2271 }, { "epoch": 0.628796789593856, "grad_norm": 0.16361911594867706, "learning_rate": 1e-05, "loss": 0.5803, "step": 2272 }, { "epoch": 0.6290735487442054, "grad_norm": 0.18688960373401642, "learning_rate": 1e-05, "loss": 0.5798, "step": 2273 }, { "epoch": 0.6293503078945548, "grad_norm": 0.17233462631702423, "learning_rate": 1e-05, "loss": 0.5561, "step": 2274 }, { "epoch": 0.6296270670449041, "grad_norm": 0.16738928854465485, "learning_rate": 1e-05, "loss": 0.5681, "step": 2275 }, { "epoch": 0.6299038261952535, "grad_norm": 0.18187496066093445, "learning_rate": 1e-05, "loss": 0.5493, "step": 2276 }, { "epoch": 0.630180585345603, "grad_norm": 0.17215397953987122, "learning_rate": 1e-05, "loss": 0.5525, "step": 2277 }, { "epoch": 0.6304573444959524, "grad_norm": 0.17648212611675262, "learning_rate": 1e-05, "loss": 0.5385, "step": 2278 }, { "epoch": 0.6307341036463018, "grad_norm": 0.17061179876327515, "learning_rate": 1e-05, "loss": 0.5579, "step": 2279 }, { "epoch": 0.6310108627966512, "grad_norm": 0.16416840255260468, "learning_rate": 1e-05, "loss": 0.5257, "step": 2280 }, { "epoch": 0.6312876219470006, "grad_norm": 0.17659984529018402, "learning_rate": 1e-05, "loss": 0.5572, "step": 2281 }, { "epoch": 0.63156438109735, "grad_norm": 0.16553713381290436, "learning_rate": 1e-05, "loss": 0.5606, "step": 2282 }, { "epoch": 0.6318411402476994, "grad_norm": 0.16787485778331757, "learning_rate": 1e-05, "loss": 0.5527, "step": 2283 }, { "epoch": 0.6321178993980489, "grad_norm": 0.1752990037202835, "learning_rate": 1e-05, "loss": 0.554, "step": 2284 }, { "epoch": 0.6323946585483983, "grad_norm": 0.1797342747449875, "learning_rate": 1e-05, "loss": 0.532, "step": 2285 }, { "epoch": 0.6326714176987477, "grad_norm": 0.17427347600460052, "learning_rate": 1e-05, "loss": 0.5678, "step": 2286 }, { "epoch": 0.6329481768490971, "grad_norm": 9.78564739227295, "learning_rate": 1e-05, "loss": 0.5779, "step": 2287 }, { "epoch": 0.6332249359994465, "grad_norm": 0.18531662225723267, "learning_rate": 1e-05, "loss": 0.5909, "step": 2288 }, { "epoch": 0.6335016951497959, "grad_norm": 0.17412063479423523, "learning_rate": 1e-05, "loss": 0.5544, "step": 2289 }, { "epoch": 0.6337784543001453, "grad_norm": 0.17431309819221497, "learning_rate": 1e-05, "loss": 0.5491, "step": 2290 }, { "epoch": 0.6340552134504948, "grad_norm": 0.17101170122623444, "learning_rate": 1e-05, "loss": 0.5647, "step": 2291 }, { "epoch": 0.6343319726008441, "grad_norm": 0.17978531122207642, "learning_rate": 1e-05, "loss": 0.5477, "step": 2292 }, { "epoch": 0.6346087317511935, "grad_norm": 0.1690674126148224, "learning_rate": 1e-05, "loss": 0.5575, "step": 2293 }, { "epoch": 0.6348854909015429, "grad_norm": 0.17282354831695557, "learning_rate": 1e-05, "loss": 0.5574, "step": 2294 }, { "epoch": 0.6351622500518923, "grad_norm": 0.16775354743003845, "learning_rate": 1e-05, "loss": 0.5697, "step": 2295 }, { "epoch": 0.6354390092022417, "grad_norm": 0.1717098206281662, "learning_rate": 1e-05, "loss": 0.5649, "step": 2296 }, { "epoch": 0.6357157683525911, "grad_norm": 0.18799751996994019, "learning_rate": 1e-05, "loss": 0.57, "step": 2297 }, { "epoch": 0.6359925275029406, "grad_norm": 0.16942638158798218, "learning_rate": 1e-05, "loss": 0.5504, "step": 2298 }, { "epoch": 0.63626928665329, "grad_norm": 0.1764516532421112, "learning_rate": 1e-05, "loss": 0.5488, "step": 2299 }, { "epoch": 0.6365460458036394, "grad_norm": 0.1712055653333664, "learning_rate": 1e-05, "loss": 0.5738, "step": 2300 }, { "epoch": 0.6368228049539888, "grad_norm": 0.16672347486019135, "learning_rate": 1e-05, "loss": 0.51, "step": 2301 }, { "epoch": 0.6370995641043382, "grad_norm": 0.1785862147808075, "learning_rate": 1e-05, "loss": 0.5637, "step": 2302 }, { "epoch": 0.6373763232546876, "grad_norm": 0.16917099058628082, "learning_rate": 1e-05, "loss": 0.5785, "step": 2303 }, { "epoch": 0.637653082405037, "grad_norm": 0.17539678514003754, "learning_rate": 1e-05, "loss": 0.5759, "step": 2304 }, { "epoch": 0.6379298415553865, "grad_norm": 0.1769489198923111, "learning_rate": 1e-05, "loss": 0.5645, "step": 2305 }, { "epoch": 0.6382066007057359, "grad_norm": 0.16654305160045624, "learning_rate": 1e-05, "loss": 0.5482, "step": 2306 }, { "epoch": 0.6384833598560853, "grad_norm": 0.16434219479560852, "learning_rate": 1e-05, "loss": 0.5141, "step": 2307 }, { "epoch": 0.6387601190064347, "grad_norm": 0.17228522896766663, "learning_rate": 1e-05, "loss": 0.531, "step": 2308 }, { "epoch": 0.639036878156784, "grad_norm": 0.18009042739868164, "learning_rate": 1e-05, "loss": 0.5265, "step": 2309 }, { "epoch": 0.6393136373071334, "grad_norm": 0.17348024249076843, "learning_rate": 1e-05, "loss": 0.5554, "step": 2310 }, { "epoch": 0.6395903964574828, "grad_norm": 0.1672944277524948, "learning_rate": 1e-05, "loss": 0.5411, "step": 2311 }, { "epoch": 0.6398671556078322, "grad_norm": 0.16618874669075012, "learning_rate": 1e-05, "loss": 0.5583, "step": 2312 }, { "epoch": 0.6401439147581817, "grad_norm": 0.1675325483083725, "learning_rate": 1e-05, "loss": 0.5465, "step": 2313 }, { "epoch": 0.6404206739085311, "grad_norm": 0.17146903276443481, "learning_rate": 1e-05, "loss": 0.564, "step": 2314 }, { "epoch": 0.6406974330588805, "grad_norm": 0.17181670665740967, "learning_rate": 1e-05, "loss": 0.5404, "step": 2315 }, { "epoch": 0.6409741922092299, "grad_norm": 0.17358867824077606, "learning_rate": 1e-05, "loss": 0.5463, "step": 2316 }, { "epoch": 0.6412509513595793, "grad_norm": 0.1635688841342926, "learning_rate": 1e-05, "loss": 0.5386, "step": 2317 }, { "epoch": 0.6415277105099287, "grad_norm": 0.17472028732299805, "learning_rate": 1e-05, "loss": 0.5334, "step": 2318 }, { "epoch": 0.6418044696602782, "grad_norm": 0.16743780672550201, "learning_rate": 1e-05, "loss": 0.5302, "step": 2319 }, { "epoch": 0.6420812288106276, "grad_norm": 0.16664181649684906, "learning_rate": 1e-05, "loss": 0.5385, "step": 2320 }, { "epoch": 0.642357987960977, "grad_norm": 0.168562650680542, "learning_rate": 1e-05, "loss": 0.5335, "step": 2321 }, { "epoch": 0.6426347471113264, "grad_norm": 0.17445571720600128, "learning_rate": 1e-05, "loss": 0.5567, "step": 2322 }, { "epoch": 0.6429115062616758, "grad_norm": 0.16327030956745148, "learning_rate": 1e-05, "loss": 0.5385, "step": 2323 }, { "epoch": 0.6431882654120252, "grad_norm": 0.17121312022209167, "learning_rate": 1e-05, "loss": 0.5777, "step": 2324 }, { "epoch": 0.6434650245623746, "grad_norm": 0.17061348259449005, "learning_rate": 1e-05, "loss": 0.5351, "step": 2325 }, { "epoch": 0.643741783712724, "grad_norm": 0.17421287298202515, "learning_rate": 1e-05, "loss": 0.5305, "step": 2326 }, { "epoch": 0.6440185428630734, "grad_norm": 0.1677539348602295, "learning_rate": 1e-05, "loss": 0.567, "step": 2327 }, { "epoch": 0.6442953020134228, "grad_norm": 0.16012750566005707, "learning_rate": 1e-05, "loss": 0.5453, "step": 2328 }, { "epoch": 0.6445720611637722, "grad_norm": 0.171873539686203, "learning_rate": 1e-05, "loss": 0.554, "step": 2329 }, { "epoch": 0.6448488203141216, "grad_norm": 0.1641082763671875, "learning_rate": 1e-05, "loss": 0.5307, "step": 2330 }, { "epoch": 0.645125579464471, "grad_norm": 0.17022587358951569, "learning_rate": 1e-05, "loss": 0.5779, "step": 2331 }, { "epoch": 0.6454023386148204, "grad_norm": 0.17183950543403625, "learning_rate": 1e-05, "loss": 0.5312, "step": 2332 }, { "epoch": 0.6456790977651699, "grad_norm": 0.17365358769893646, "learning_rate": 1e-05, "loss": 0.5123, "step": 2333 }, { "epoch": 0.6459558569155193, "grad_norm": 0.17611977458000183, "learning_rate": 1e-05, "loss": 0.5478, "step": 2334 }, { "epoch": 0.6462326160658687, "grad_norm": 0.16683071851730347, "learning_rate": 1e-05, "loss": 0.592, "step": 2335 }, { "epoch": 0.6465093752162181, "grad_norm": 0.17022867500782013, "learning_rate": 1e-05, "loss": 0.566, "step": 2336 }, { "epoch": 0.6467861343665675, "grad_norm": 0.1822931468486786, "learning_rate": 1e-05, "loss": 0.579, "step": 2337 }, { "epoch": 0.6470628935169169, "grad_norm": 0.17498579621315002, "learning_rate": 1e-05, "loss": 0.6169, "step": 2338 }, { "epoch": 0.6473396526672663, "grad_norm": 0.1768830567598343, "learning_rate": 1e-05, "loss": 0.5304, "step": 2339 }, { "epoch": 0.6476164118176158, "grad_norm": 0.17530149221420288, "learning_rate": 1e-05, "loss": 0.5359, "step": 2340 }, { "epoch": 0.6478931709679652, "grad_norm": 0.17634303867816925, "learning_rate": 1e-05, "loss": 0.5506, "step": 2341 }, { "epoch": 0.6481699301183146, "grad_norm": 0.18382583558559418, "learning_rate": 1e-05, "loss": 0.5478, "step": 2342 }, { "epoch": 0.648446689268664, "grad_norm": 0.1715085208415985, "learning_rate": 1e-05, "loss": 0.542, "step": 2343 }, { "epoch": 0.6487234484190133, "grad_norm": 0.16766496002674103, "learning_rate": 1e-05, "loss": 0.5679, "step": 2344 }, { "epoch": 0.6490002075693627, "grad_norm": 0.18697166442871094, "learning_rate": 1e-05, "loss": 0.605, "step": 2345 }, { "epoch": 0.6492769667197121, "grad_norm": 0.16798271238803864, "learning_rate": 1e-05, "loss": 0.5277, "step": 2346 }, { "epoch": 0.6495537258700615, "grad_norm": 0.17683248221874237, "learning_rate": 1e-05, "loss": 0.5697, "step": 2347 }, { "epoch": 0.649830485020411, "grad_norm": 0.17420506477355957, "learning_rate": 1e-05, "loss": 0.545, "step": 2348 }, { "epoch": 0.6501072441707604, "grad_norm": 0.18444061279296875, "learning_rate": 1e-05, "loss": 0.5606, "step": 2349 }, { "epoch": 0.6503840033211098, "grad_norm": 0.17353719472885132, "learning_rate": 1e-05, "loss": 0.5716, "step": 2350 }, { "epoch": 0.6506607624714592, "grad_norm": 0.1757625788450241, "learning_rate": 1e-05, "loss": 0.5801, "step": 2351 }, { "epoch": 0.6509375216218086, "grad_norm": 0.17824602127075195, "learning_rate": 1e-05, "loss": 0.5704, "step": 2352 }, { "epoch": 0.651214280772158, "grad_norm": 0.17012473940849304, "learning_rate": 1e-05, "loss": 0.5397, "step": 2353 }, { "epoch": 0.6514910399225075, "grad_norm": 0.1656632274389267, "learning_rate": 1e-05, "loss": 0.515, "step": 2354 }, { "epoch": 0.6517677990728569, "grad_norm": 0.16990406811237335, "learning_rate": 1e-05, "loss": 0.5441, "step": 2355 }, { "epoch": 0.6520445582232063, "grad_norm": 0.1712074875831604, "learning_rate": 1e-05, "loss": 0.5332, "step": 2356 }, { "epoch": 0.6523213173735557, "grad_norm": 0.18404150009155273, "learning_rate": 1e-05, "loss": 0.5444, "step": 2357 }, { "epoch": 0.6525980765239051, "grad_norm": 0.18424101173877716, "learning_rate": 1e-05, "loss": 0.5633, "step": 2358 }, { "epoch": 0.6528748356742545, "grad_norm": 0.17656540870666504, "learning_rate": 1e-05, "loss": 0.5854, "step": 2359 }, { "epoch": 0.6531515948246039, "grad_norm": 0.16948096454143524, "learning_rate": 1e-05, "loss": 0.5596, "step": 2360 }, { "epoch": 0.6534283539749532, "grad_norm": 0.17498087882995605, "learning_rate": 1e-05, "loss": 0.5601, "step": 2361 }, { "epoch": 0.6537051131253027, "grad_norm": 0.1733863651752472, "learning_rate": 1e-05, "loss": 0.561, "step": 2362 }, { "epoch": 0.6539818722756521, "grad_norm": 0.17198260128498077, "learning_rate": 1e-05, "loss": 0.5418, "step": 2363 }, { "epoch": 0.6542586314260015, "grad_norm": 0.1675989180803299, "learning_rate": 1e-05, "loss": 0.5797, "step": 2364 }, { "epoch": 0.6545353905763509, "grad_norm": 0.16502287983894348, "learning_rate": 1e-05, "loss": 0.582, "step": 2365 }, { "epoch": 0.6548121497267003, "grad_norm": 0.16958287358283997, "learning_rate": 1e-05, "loss": 0.5492, "step": 2366 }, { "epoch": 0.6550889088770497, "grad_norm": 0.1801491379737854, "learning_rate": 1e-05, "loss": 0.5454, "step": 2367 }, { "epoch": 0.6553656680273992, "grad_norm": 0.16753703355789185, "learning_rate": 1e-05, "loss": 0.5557, "step": 2368 }, { "epoch": 0.6556424271777486, "grad_norm": 0.16774606704711914, "learning_rate": 1e-05, "loss": 0.5223, "step": 2369 }, { "epoch": 0.655919186328098, "grad_norm": 0.18394866585731506, "learning_rate": 1e-05, "loss": 0.6067, "step": 2370 }, { "epoch": 0.6561959454784474, "grad_norm": 0.1793018877506256, "learning_rate": 1e-05, "loss": 0.5911, "step": 2371 }, { "epoch": 0.6564727046287968, "grad_norm": 0.16740640997886658, "learning_rate": 1e-05, "loss": 0.5442, "step": 2372 }, { "epoch": 0.6567494637791462, "grad_norm": 0.16655412316322327, "learning_rate": 1e-05, "loss": 0.5556, "step": 2373 }, { "epoch": 0.6570262229294956, "grad_norm": 0.1823354959487915, "learning_rate": 1e-05, "loss": 0.5976, "step": 2374 }, { "epoch": 0.657302982079845, "grad_norm": 0.18593797087669373, "learning_rate": 1e-05, "loss": 0.5488, "step": 2375 }, { "epoch": 0.6575797412301945, "grad_norm": 0.16904717683792114, "learning_rate": 1e-05, "loss": 0.558, "step": 2376 }, { "epoch": 0.6578565003805439, "grad_norm": 0.17556293308734894, "learning_rate": 1e-05, "loss": 0.5509, "step": 2377 }, { "epoch": 0.6581332595308932, "grad_norm": 0.17902064323425293, "learning_rate": 1e-05, "loss": 0.5544, "step": 2378 }, { "epoch": 0.6584100186812426, "grad_norm": 0.17598816752433777, "learning_rate": 1e-05, "loss": 0.55, "step": 2379 }, { "epoch": 0.658686777831592, "grad_norm": 0.17251358926296234, "learning_rate": 1e-05, "loss": 0.5347, "step": 2380 }, { "epoch": 0.6589635369819414, "grad_norm": 0.16716307401657104, "learning_rate": 1e-05, "loss": 0.5443, "step": 2381 }, { "epoch": 0.6592402961322908, "grad_norm": 0.17984971404075623, "learning_rate": 1e-05, "loss": 0.5708, "step": 2382 }, { "epoch": 0.6595170552826403, "grad_norm": 0.1709076464176178, "learning_rate": 1e-05, "loss": 0.5398, "step": 2383 }, { "epoch": 0.6597938144329897, "grad_norm": 0.1738801896572113, "learning_rate": 1e-05, "loss": 0.5746, "step": 2384 }, { "epoch": 0.6600705735833391, "grad_norm": 0.16422995924949646, "learning_rate": 1e-05, "loss": 0.5266, "step": 2385 }, { "epoch": 0.6603473327336885, "grad_norm": 0.17383776605129242, "learning_rate": 1e-05, "loss": 0.5671, "step": 2386 }, { "epoch": 0.6606240918840379, "grad_norm": 0.1713864952325821, "learning_rate": 1e-05, "loss": 0.5421, "step": 2387 }, { "epoch": 0.6609008510343873, "grad_norm": 0.17740648984909058, "learning_rate": 1e-05, "loss": 0.5427, "step": 2388 }, { "epoch": 0.6611776101847368, "grad_norm": 0.17047543823719025, "learning_rate": 1e-05, "loss": 0.5402, "step": 2389 }, { "epoch": 0.6614543693350862, "grad_norm": 0.18968555331230164, "learning_rate": 1e-05, "loss": 0.5905, "step": 2390 }, { "epoch": 0.6617311284854356, "grad_norm": 0.17500433325767517, "learning_rate": 1e-05, "loss": 0.5683, "step": 2391 }, { "epoch": 0.662007887635785, "grad_norm": 0.16996309161186218, "learning_rate": 1e-05, "loss": 0.5687, "step": 2392 }, { "epoch": 0.6622846467861344, "grad_norm": 0.17576079070568085, "learning_rate": 1e-05, "loss": 0.551, "step": 2393 }, { "epoch": 0.6625614059364838, "grad_norm": 0.16714052855968475, "learning_rate": 1e-05, "loss": 0.5198, "step": 2394 }, { "epoch": 0.6628381650868331, "grad_norm": 0.1779322326183319, "learning_rate": 1e-05, "loss": 0.5516, "step": 2395 }, { "epoch": 0.6631149242371825, "grad_norm": 0.1633656620979309, "learning_rate": 1e-05, "loss": 0.5566, "step": 2396 }, { "epoch": 0.663391683387532, "grad_norm": 0.18291151523590088, "learning_rate": 1e-05, "loss": 0.5924, "step": 2397 }, { "epoch": 0.6636684425378814, "grad_norm": 0.16408570110797882, "learning_rate": 1e-05, "loss": 0.5401, "step": 2398 }, { "epoch": 0.6639452016882308, "grad_norm": 0.16920991241931915, "learning_rate": 1e-05, "loss": 0.5657, "step": 2399 }, { "epoch": 0.6642219608385802, "grad_norm": 0.16991016268730164, "learning_rate": 1e-05, "loss": 0.5608, "step": 2400 }, { "epoch": 0.6644987199889296, "grad_norm": 0.1693553328514099, "learning_rate": 1e-05, "loss": 0.5625, "step": 2401 }, { "epoch": 0.664775479139279, "grad_norm": 0.1787785142660141, "learning_rate": 1e-05, "loss": 0.5308, "step": 2402 }, { "epoch": 0.6650522382896285, "grad_norm": 0.17271067202091217, "learning_rate": 1e-05, "loss": 0.5624, "step": 2403 }, { "epoch": 0.6653289974399779, "grad_norm": 0.1677132248878479, "learning_rate": 1e-05, "loss": 0.5613, "step": 2404 }, { "epoch": 0.6656057565903273, "grad_norm": 0.1718834787607193, "learning_rate": 1e-05, "loss": 0.5575, "step": 2405 }, { "epoch": 0.6658825157406767, "grad_norm": 0.1749771535396576, "learning_rate": 1e-05, "loss": 0.5425, "step": 2406 }, { "epoch": 0.6661592748910261, "grad_norm": 0.18055766820907593, "learning_rate": 1e-05, "loss": 0.5744, "step": 2407 }, { "epoch": 0.6664360340413755, "grad_norm": 0.17513149976730347, "learning_rate": 1e-05, "loss": 0.5559, "step": 2408 }, { "epoch": 0.6667127931917249, "grad_norm": 0.17232979834079742, "learning_rate": 1e-05, "loss": 0.5363, "step": 2409 }, { "epoch": 0.6669895523420744, "grad_norm": 0.17790120840072632, "learning_rate": 1e-05, "loss": 0.5488, "step": 2410 }, { "epoch": 0.6672663114924238, "grad_norm": 0.17243164777755737, "learning_rate": 1e-05, "loss": 0.5675, "step": 2411 }, { "epoch": 0.6675430706427731, "grad_norm": 0.1727181077003479, "learning_rate": 1e-05, "loss": 0.572, "step": 2412 }, { "epoch": 0.6678198297931225, "grad_norm": 0.17203795909881592, "learning_rate": 1e-05, "loss": 0.5565, "step": 2413 }, { "epoch": 0.6680965889434719, "grad_norm": 0.16918988525867462, "learning_rate": 1e-05, "loss": 0.5439, "step": 2414 }, { "epoch": 0.6683733480938213, "grad_norm": 0.16928882896900177, "learning_rate": 1e-05, "loss": 0.5314, "step": 2415 }, { "epoch": 0.6686501072441707, "grad_norm": 0.17905084788799286, "learning_rate": 1e-05, "loss": 0.5612, "step": 2416 }, { "epoch": 0.6689268663945201, "grad_norm": 0.17209160327911377, "learning_rate": 1e-05, "loss": 0.575, "step": 2417 }, { "epoch": 0.6692036255448696, "grad_norm": 0.17365984618663788, "learning_rate": 1e-05, "loss": 0.5231, "step": 2418 }, { "epoch": 0.669480384695219, "grad_norm": 0.2037716656923294, "learning_rate": 1e-05, "loss": 0.5272, "step": 2419 }, { "epoch": 0.6697571438455684, "grad_norm": 0.16994768381118774, "learning_rate": 1e-05, "loss": 0.5498, "step": 2420 }, { "epoch": 0.6700339029959178, "grad_norm": 0.16618239879608154, "learning_rate": 1e-05, "loss": 0.5316, "step": 2421 }, { "epoch": 0.6703106621462672, "grad_norm": 0.17827226221561432, "learning_rate": 1e-05, "loss": 0.5488, "step": 2422 }, { "epoch": 0.6705874212966166, "grad_norm": 0.17900103330612183, "learning_rate": 1e-05, "loss": 0.5664, "step": 2423 }, { "epoch": 0.670864180446966, "grad_norm": 0.17836426198482513, "learning_rate": 1e-05, "loss": 0.5885, "step": 2424 }, { "epoch": 0.6711409395973155, "grad_norm": 0.16661648452281952, "learning_rate": 1e-05, "loss": 0.536, "step": 2425 }, { "epoch": 0.6714176987476649, "grad_norm": 0.1831710785627365, "learning_rate": 1e-05, "loss": 0.5484, "step": 2426 }, { "epoch": 0.6716944578980143, "grad_norm": 0.18034982681274414, "learning_rate": 1e-05, "loss": 0.5278, "step": 2427 }, { "epoch": 0.6719712170483637, "grad_norm": 0.17765159904956818, "learning_rate": 1e-05, "loss": 0.5952, "step": 2428 }, { "epoch": 0.672247976198713, "grad_norm": 0.17423900961875916, "learning_rate": 1e-05, "loss": 0.5662, "step": 2429 }, { "epoch": 0.6725247353490624, "grad_norm": 0.18243153393268585, "learning_rate": 1e-05, "loss": 0.5657, "step": 2430 }, { "epoch": 0.6728014944994118, "grad_norm": 0.16965240240097046, "learning_rate": 1e-05, "loss": 0.5208, "step": 2431 }, { "epoch": 0.6730782536497613, "grad_norm": 0.16740241646766663, "learning_rate": 1e-05, "loss": 0.5175, "step": 2432 }, { "epoch": 0.6733550128001107, "grad_norm": 0.17715315520763397, "learning_rate": 1e-05, "loss": 0.5421, "step": 2433 }, { "epoch": 0.6736317719504601, "grad_norm": 0.18324801325798035, "learning_rate": 1e-05, "loss": 0.5758, "step": 2434 }, { "epoch": 0.6739085311008095, "grad_norm": 0.16913573443889618, "learning_rate": 1e-05, "loss": 0.5544, "step": 2435 }, { "epoch": 0.6741852902511589, "grad_norm": 0.17142808437347412, "learning_rate": 1e-05, "loss": 0.5552, "step": 2436 }, { "epoch": 0.6744620494015083, "grad_norm": 0.17875593900680542, "learning_rate": 1e-05, "loss": 0.5881, "step": 2437 }, { "epoch": 0.6747388085518577, "grad_norm": 0.170981764793396, "learning_rate": 1e-05, "loss": 0.5475, "step": 2438 }, { "epoch": 0.6750155677022072, "grad_norm": 0.17292781174182892, "learning_rate": 1e-05, "loss": 0.5564, "step": 2439 }, { "epoch": 0.6752923268525566, "grad_norm": 0.17641937732696533, "learning_rate": 1e-05, "loss": 0.5742, "step": 2440 }, { "epoch": 0.675569086002906, "grad_norm": 0.16763755679130554, "learning_rate": 1e-05, "loss": 0.5393, "step": 2441 }, { "epoch": 0.6758458451532554, "grad_norm": 0.17410272359848022, "learning_rate": 1e-05, "loss": 0.5801, "step": 2442 }, { "epoch": 0.6761226043036048, "grad_norm": 0.17467771470546722, "learning_rate": 1e-05, "loss": 0.5382, "step": 2443 }, { "epoch": 0.6763993634539542, "grad_norm": 0.17149055004119873, "learning_rate": 1e-05, "loss": 0.5651, "step": 2444 }, { "epoch": 0.6766761226043037, "grad_norm": 0.17333050072193146, "learning_rate": 1e-05, "loss": 0.5477, "step": 2445 }, { "epoch": 0.6769528817546531, "grad_norm": 0.1709410399198532, "learning_rate": 1e-05, "loss": 0.5525, "step": 2446 }, { "epoch": 0.6772296409050024, "grad_norm": 0.1651879847049713, "learning_rate": 1e-05, "loss": 0.5349, "step": 2447 }, { "epoch": 0.6775064000553518, "grad_norm": 0.17753982543945312, "learning_rate": 1e-05, "loss": 0.5647, "step": 2448 }, { "epoch": 0.6777831592057012, "grad_norm": 0.16574494540691376, "learning_rate": 1e-05, "loss": 0.5309, "step": 2449 }, { "epoch": 0.6780599183560506, "grad_norm": 0.17390424013137817, "learning_rate": 1e-05, "loss": 0.5439, "step": 2450 }, { "epoch": 0.6783366775064, "grad_norm": 0.17531675100326538, "learning_rate": 1e-05, "loss": 0.5502, "step": 2451 }, { "epoch": 0.6786134366567494, "grad_norm": 0.17229586839675903, "learning_rate": 1e-05, "loss": 0.5503, "step": 2452 }, { "epoch": 0.6788901958070989, "grad_norm": 0.17495720088481903, "learning_rate": 1e-05, "loss": 0.5626, "step": 2453 }, { "epoch": 0.6791669549574483, "grad_norm": 0.17291875183582306, "learning_rate": 1e-05, "loss": 0.5576, "step": 2454 }, { "epoch": 0.6794437141077977, "grad_norm": 0.1842542588710785, "learning_rate": 1e-05, "loss": 0.6085, "step": 2455 }, { "epoch": 0.6797204732581471, "grad_norm": 0.1741335093975067, "learning_rate": 1e-05, "loss": 0.522, "step": 2456 }, { "epoch": 0.6799972324084965, "grad_norm": 0.1793806105852127, "learning_rate": 1e-05, "loss": 0.5654, "step": 2457 }, { "epoch": 0.6802739915588459, "grad_norm": 0.1682165116071701, "learning_rate": 1e-05, "loss": 0.5713, "step": 2458 }, { "epoch": 0.6805507507091954, "grad_norm": 0.17028851807117462, "learning_rate": 1e-05, "loss": 0.547, "step": 2459 }, { "epoch": 0.6808275098595448, "grad_norm": 0.16575674712657928, "learning_rate": 1e-05, "loss": 0.5689, "step": 2460 }, { "epoch": 0.6811042690098942, "grad_norm": 0.1744442880153656, "learning_rate": 1e-05, "loss": 0.5512, "step": 2461 }, { "epoch": 0.6813810281602436, "grad_norm": 0.1805192530155182, "learning_rate": 1e-05, "loss": 0.5492, "step": 2462 }, { "epoch": 0.681657787310593, "grad_norm": 0.17484335601329803, "learning_rate": 1e-05, "loss": 0.5583, "step": 2463 }, { "epoch": 0.6819345464609423, "grad_norm": 0.16341541707515717, "learning_rate": 1e-05, "loss": 0.5379, "step": 2464 }, { "epoch": 0.6822113056112917, "grad_norm": 0.178728848695755, "learning_rate": 1e-05, "loss": 0.5475, "step": 2465 }, { "epoch": 0.6824880647616411, "grad_norm": 0.17479351162910461, "learning_rate": 1e-05, "loss": 0.5337, "step": 2466 }, { "epoch": 0.6827648239119906, "grad_norm": 0.16967053711414337, "learning_rate": 1e-05, "loss": 0.5605, "step": 2467 }, { "epoch": 0.68304158306234, "grad_norm": 0.17420192062854767, "learning_rate": 1e-05, "loss": 0.5525, "step": 2468 }, { "epoch": 0.6833183422126894, "grad_norm": 0.17308133840560913, "learning_rate": 1e-05, "loss": 0.5415, "step": 2469 }, { "epoch": 0.6835951013630388, "grad_norm": 0.17093750834465027, "learning_rate": 1e-05, "loss": 0.5736, "step": 2470 }, { "epoch": 0.6838718605133882, "grad_norm": 0.1738828718662262, "learning_rate": 1e-05, "loss": 0.5998, "step": 2471 }, { "epoch": 0.6841486196637376, "grad_norm": 0.16596871614456177, "learning_rate": 1e-05, "loss": 0.5659, "step": 2472 }, { "epoch": 0.684425378814087, "grad_norm": 0.1680709719657898, "learning_rate": 1e-05, "loss": 0.5411, "step": 2473 }, { "epoch": 0.6847021379644365, "grad_norm": 0.17609646916389465, "learning_rate": 1e-05, "loss": 0.5746, "step": 2474 }, { "epoch": 0.6849788971147859, "grad_norm": 0.16879843175411224, "learning_rate": 1e-05, "loss": 0.5759, "step": 2475 }, { "epoch": 0.6852556562651353, "grad_norm": 0.18933849036693573, "learning_rate": 1e-05, "loss": 0.5297, "step": 2476 }, { "epoch": 0.6855324154154847, "grad_norm": 0.16831985116004944, "learning_rate": 1e-05, "loss": 0.5503, "step": 2477 }, { "epoch": 0.6858091745658341, "grad_norm": 0.172191321849823, "learning_rate": 1e-05, "loss": 0.5378, "step": 2478 }, { "epoch": 0.6860859337161835, "grad_norm": 0.17245973646640778, "learning_rate": 1e-05, "loss": 0.539, "step": 2479 }, { "epoch": 0.686362692866533, "grad_norm": 0.16562628746032715, "learning_rate": 1e-05, "loss": 0.5169, "step": 2480 }, { "epoch": 0.6866394520168823, "grad_norm": 0.17827321588993073, "learning_rate": 1e-05, "loss": 0.5321, "step": 2481 }, { "epoch": 0.6869162111672317, "grad_norm": 0.1707017719745636, "learning_rate": 1e-05, "loss": 0.5509, "step": 2482 }, { "epoch": 0.6871929703175811, "grad_norm": 0.17408354580402374, "learning_rate": 1e-05, "loss": 0.558, "step": 2483 }, { "epoch": 0.6874697294679305, "grad_norm": 0.1689358949661255, "learning_rate": 1e-05, "loss": 0.5479, "step": 2484 }, { "epoch": 0.6877464886182799, "grad_norm": 0.17397214472293854, "learning_rate": 1e-05, "loss": 0.5332, "step": 2485 }, { "epoch": 0.6880232477686293, "grad_norm": 0.1758936643600464, "learning_rate": 1e-05, "loss": 0.5475, "step": 2486 }, { "epoch": 0.6883000069189787, "grad_norm": 0.17603257298469543, "learning_rate": 1e-05, "loss": 0.544, "step": 2487 }, { "epoch": 0.6885767660693282, "grad_norm": 0.1663367748260498, "learning_rate": 1e-05, "loss": 0.5487, "step": 2488 }, { "epoch": 0.6888535252196776, "grad_norm": 0.16708973050117493, "learning_rate": 1e-05, "loss": 0.5457, "step": 2489 }, { "epoch": 0.689130284370027, "grad_norm": 0.17244328558444977, "learning_rate": 1e-05, "loss": 0.5667, "step": 2490 }, { "epoch": 0.6894070435203764, "grad_norm": 0.17332978546619415, "learning_rate": 1e-05, "loss": 0.5455, "step": 2491 }, { "epoch": 0.6896838026707258, "grad_norm": 0.16720788180828094, "learning_rate": 1e-05, "loss": 0.5635, "step": 2492 }, { "epoch": 0.6899605618210752, "grad_norm": 0.16755704581737518, "learning_rate": 1e-05, "loss": 0.5312, "step": 2493 }, { "epoch": 0.6902373209714247, "grad_norm": 0.17010469734668732, "learning_rate": 1e-05, "loss": 0.5477, "step": 2494 }, { "epoch": 0.6905140801217741, "grad_norm": 0.19122089445590973, "learning_rate": 1e-05, "loss": 0.5235, "step": 2495 }, { "epoch": 0.6907908392721235, "grad_norm": 0.17697674036026, "learning_rate": 1e-05, "loss": 0.5626, "step": 2496 }, { "epoch": 0.6910675984224729, "grad_norm": 0.18062043190002441, "learning_rate": 1e-05, "loss": 0.5532, "step": 2497 }, { "epoch": 0.6913443575728222, "grad_norm": 0.17295041680335999, "learning_rate": 1e-05, "loss": 0.538, "step": 2498 }, { "epoch": 0.6916211167231716, "grad_norm": 0.17192545533180237, "learning_rate": 1e-05, "loss": 0.5422, "step": 2499 }, { "epoch": 0.691897875873521, "grad_norm": 0.18992285430431366, "learning_rate": 1e-05, "loss": 0.5762, "step": 2500 }, { "epoch": 0.6921746350238704, "grad_norm": 0.17216037213802338, "learning_rate": 1e-05, "loss": 0.5746, "step": 2501 }, { "epoch": 0.6924513941742199, "grad_norm": 0.16861023008823395, "learning_rate": 1e-05, "loss": 0.5279, "step": 2502 }, { "epoch": 0.6927281533245693, "grad_norm": 0.17985935509204865, "learning_rate": 1e-05, "loss": 0.5769, "step": 2503 }, { "epoch": 0.6930049124749187, "grad_norm": 0.18899744749069214, "learning_rate": 1e-05, "loss": 0.5484, "step": 2504 }, { "epoch": 0.6932816716252681, "grad_norm": 0.17532838881015778, "learning_rate": 1e-05, "loss": 0.5379, "step": 2505 }, { "epoch": 0.6935584307756175, "grad_norm": 0.17069067060947418, "learning_rate": 1e-05, "loss": 0.5706, "step": 2506 }, { "epoch": 0.6938351899259669, "grad_norm": 0.1702159196138382, "learning_rate": 1e-05, "loss": 0.5209, "step": 2507 }, { "epoch": 0.6941119490763163, "grad_norm": 0.17590180039405823, "learning_rate": 1e-05, "loss": 0.5689, "step": 2508 }, { "epoch": 0.6943887082266658, "grad_norm": 0.20148152112960815, "learning_rate": 1e-05, "loss": 0.5291, "step": 2509 }, { "epoch": 0.6946654673770152, "grad_norm": 0.16719162464141846, "learning_rate": 1e-05, "loss": 0.5707, "step": 2510 }, { "epoch": 0.6949422265273646, "grad_norm": 0.16804806888103485, "learning_rate": 1e-05, "loss": 0.552, "step": 2511 }, { "epoch": 0.695218985677714, "grad_norm": 0.1676834374666214, "learning_rate": 1e-05, "loss": 0.5056, "step": 2512 }, { "epoch": 0.6954957448280634, "grad_norm": 0.17976580560207367, "learning_rate": 1e-05, "loss": 0.5794, "step": 2513 }, { "epoch": 0.6957725039784128, "grad_norm": 0.17572419345378876, "learning_rate": 1e-05, "loss": 0.577, "step": 2514 }, { "epoch": 0.6960492631287621, "grad_norm": 0.17790864408016205, "learning_rate": 1e-05, "loss": 0.611, "step": 2515 }, { "epoch": 0.6963260222791116, "grad_norm": 0.17315144836902618, "learning_rate": 1e-05, "loss": 0.5517, "step": 2516 }, { "epoch": 0.696602781429461, "grad_norm": 0.18765993416309357, "learning_rate": 1e-05, "loss": 0.5477, "step": 2517 }, { "epoch": 0.6968795405798104, "grad_norm": 0.17318803071975708, "learning_rate": 1e-05, "loss": 0.5681, "step": 2518 }, { "epoch": 0.6971562997301598, "grad_norm": 0.18334932625293732, "learning_rate": 1e-05, "loss": 0.565, "step": 2519 }, { "epoch": 0.6974330588805092, "grad_norm": 0.17546527087688446, "learning_rate": 1e-05, "loss": 0.5523, "step": 2520 }, { "epoch": 0.6977098180308586, "grad_norm": 0.17612969875335693, "learning_rate": 1e-05, "loss": 0.5563, "step": 2521 }, { "epoch": 0.697986577181208, "grad_norm": 0.17028747498989105, "learning_rate": 1e-05, "loss": 0.5545, "step": 2522 }, { "epoch": 0.6982633363315575, "grad_norm": 0.17679539322853088, "learning_rate": 1e-05, "loss": 0.5453, "step": 2523 }, { "epoch": 0.6985400954819069, "grad_norm": 0.1785540133714676, "learning_rate": 1e-05, "loss": 0.5688, "step": 2524 }, { "epoch": 0.6988168546322563, "grad_norm": 0.17147010564804077, "learning_rate": 1e-05, "loss": 0.5641, "step": 2525 }, { "epoch": 0.6990936137826057, "grad_norm": 0.17185227572917938, "learning_rate": 1e-05, "loss": 0.5495, "step": 2526 }, { "epoch": 0.6993703729329551, "grad_norm": 0.17096073925495148, "learning_rate": 1e-05, "loss": 0.5376, "step": 2527 }, { "epoch": 0.6996471320833045, "grad_norm": 0.1727747768163681, "learning_rate": 1e-05, "loss": 0.5667, "step": 2528 }, { "epoch": 0.699923891233654, "grad_norm": 0.1777854561805725, "learning_rate": 1e-05, "loss": 0.5796, "step": 2529 }, { "epoch": 0.7002006503840034, "grad_norm": 0.17001378536224365, "learning_rate": 1e-05, "loss": 0.5544, "step": 2530 }, { "epoch": 0.7004774095343528, "grad_norm": 0.17354345321655273, "learning_rate": 1e-05, "loss": 0.5742, "step": 2531 }, { "epoch": 0.7007541686847021, "grad_norm": 0.1710493564605713, "learning_rate": 1e-05, "loss": 0.538, "step": 2532 }, { "epoch": 0.7010309278350515, "grad_norm": 0.18927453458309174, "learning_rate": 1e-05, "loss": 0.5755, "step": 2533 }, { "epoch": 0.7013076869854009, "grad_norm": 0.18137674033641815, "learning_rate": 1e-05, "loss": 0.5496, "step": 2534 }, { "epoch": 0.7015844461357503, "grad_norm": 0.16149738430976868, "learning_rate": 1e-05, "loss": 0.5433, "step": 2535 }, { "epoch": 0.7018612052860997, "grad_norm": 0.18399637937545776, "learning_rate": 1e-05, "loss": 0.5539, "step": 2536 }, { "epoch": 0.7021379644364492, "grad_norm": 0.1774549037218094, "learning_rate": 1e-05, "loss": 0.5563, "step": 2537 }, { "epoch": 0.7024147235867986, "grad_norm": 0.17889751493930817, "learning_rate": 1e-05, "loss": 0.5685, "step": 2538 }, { "epoch": 0.702691482737148, "grad_norm": 0.16987918317317963, "learning_rate": 1e-05, "loss": 0.5638, "step": 2539 }, { "epoch": 0.7029682418874974, "grad_norm": 0.18107753992080688, "learning_rate": 1e-05, "loss": 0.5609, "step": 2540 }, { "epoch": 0.7032450010378468, "grad_norm": 0.16982711851596832, "learning_rate": 1e-05, "loss": 0.5513, "step": 2541 }, { "epoch": 0.7035217601881962, "grad_norm": 0.176279678940773, "learning_rate": 1e-05, "loss": 0.6012, "step": 2542 }, { "epoch": 0.7037985193385456, "grad_norm": 0.1806613951921463, "learning_rate": 1e-05, "loss": 0.5625, "step": 2543 }, { "epoch": 0.7040752784888951, "grad_norm": 0.1681927740573883, "learning_rate": 1e-05, "loss": 0.5011, "step": 2544 }, { "epoch": 0.7043520376392445, "grad_norm": 0.1651466339826584, "learning_rate": 1e-05, "loss": 0.53, "step": 2545 }, { "epoch": 0.7046287967895939, "grad_norm": 0.1696578860282898, "learning_rate": 1e-05, "loss": 0.5743, "step": 2546 }, { "epoch": 0.7049055559399433, "grad_norm": 0.174209326505661, "learning_rate": 1e-05, "loss": 0.5345, "step": 2547 }, { "epoch": 0.7051823150902927, "grad_norm": 0.17239029705524445, "learning_rate": 1e-05, "loss": 0.5664, "step": 2548 }, { "epoch": 0.7054590742406421, "grad_norm": 0.1736888736486435, "learning_rate": 1e-05, "loss": 0.5611, "step": 2549 }, { "epoch": 0.7057358333909914, "grad_norm": 0.1693800687789917, "learning_rate": 1e-05, "loss": 0.5507, "step": 2550 }, { "epoch": 0.7060125925413409, "grad_norm": 0.1765696257352829, "learning_rate": 1e-05, "loss": 0.567, "step": 2551 }, { "epoch": 0.7062893516916903, "grad_norm": 0.18611665070056915, "learning_rate": 1e-05, "loss": 0.5848, "step": 2552 }, { "epoch": 0.7065661108420397, "grad_norm": 0.17167511582374573, "learning_rate": 1e-05, "loss": 0.5578, "step": 2553 }, { "epoch": 0.7068428699923891, "grad_norm": 0.1767633855342865, "learning_rate": 1e-05, "loss": 0.5587, "step": 2554 }, { "epoch": 0.7071196291427385, "grad_norm": 0.17405956983566284, "learning_rate": 1e-05, "loss": 0.5628, "step": 2555 }, { "epoch": 0.7073963882930879, "grad_norm": 0.17261876165866852, "learning_rate": 1e-05, "loss": 0.5654, "step": 2556 }, { "epoch": 0.7076731474434373, "grad_norm": 0.17316703498363495, "learning_rate": 1e-05, "loss": 0.5436, "step": 2557 }, { "epoch": 0.7079499065937868, "grad_norm": 0.1701882779598236, "learning_rate": 1e-05, "loss": 0.5734, "step": 2558 }, { "epoch": 0.7082266657441362, "grad_norm": 0.18085268139839172, "learning_rate": 1e-05, "loss": 0.5426, "step": 2559 }, { "epoch": 0.7085034248944856, "grad_norm": 0.16874133050441742, "learning_rate": 1e-05, "loss": 0.5612, "step": 2560 }, { "epoch": 0.708780184044835, "grad_norm": 0.17006191611289978, "learning_rate": 1e-05, "loss": 0.5383, "step": 2561 }, { "epoch": 0.7090569431951844, "grad_norm": 0.16960468888282776, "learning_rate": 1e-05, "loss": 0.5549, "step": 2562 }, { "epoch": 0.7093337023455338, "grad_norm": 0.17605946958065033, "learning_rate": 1e-05, "loss": 0.5712, "step": 2563 }, { "epoch": 0.7096104614958832, "grad_norm": 0.16641934216022491, "learning_rate": 1e-05, "loss": 0.5388, "step": 2564 }, { "epoch": 0.7098872206462327, "grad_norm": 0.16308023035526276, "learning_rate": 1e-05, "loss": 0.5403, "step": 2565 }, { "epoch": 0.7101639797965821, "grad_norm": 0.1735360324382782, "learning_rate": 1e-05, "loss": 0.5649, "step": 2566 }, { "epoch": 0.7104407389469314, "grad_norm": 0.1732647567987442, "learning_rate": 1e-05, "loss": 0.5555, "step": 2567 }, { "epoch": 0.7107174980972808, "grad_norm": 0.16434557735919952, "learning_rate": 1e-05, "loss": 0.582, "step": 2568 }, { "epoch": 0.7109942572476302, "grad_norm": 0.16929477453231812, "learning_rate": 1e-05, "loss": 0.5817, "step": 2569 }, { "epoch": 0.7112710163979796, "grad_norm": 0.17322155833244324, "learning_rate": 1e-05, "loss": 0.5588, "step": 2570 }, { "epoch": 0.711547775548329, "grad_norm": 0.16520988941192627, "learning_rate": 1e-05, "loss": 0.5453, "step": 2571 }, { "epoch": 0.7118245346986785, "grad_norm": 0.16655662655830383, "learning_rate": 1e-05, "loss": 0.5457, "step": 2572 }, { "epoch": 0.7121012938490279, "grad_norm": 0.16663841903209686, "learning_rate": 1e-05, "loss": 0.5387, "step": 2573 }, { "epoch": 0.7123780529993773, "grad_norm": 0.16859832406044006, "learning_rate": 1e-05, "loss": 0.5577, "step": 2574 }, { "epoch": 0.7126548121497267, "grad_norm": 0.171508327126503, "learning_rate": 1e-05, "loss": 0.5544, "step": 2575 }, { "epoch": 0.7129315713000761, "grad_norm": 0.1714528501033783, "learning_rate": 1e-05, "loss": 0.5491, "step": 2576 }, { "epoch": 0.7132083304504255, "grad_norm": 0.17180050909519196, "learning_rate": 1e-05, "loss": 0.551, "step": 2577 }, { "epoch": 0.713485089600775, "grad_norm": 0.16797484457492828, "learning_rate": 1e-05, "loss": 0.534, "step": 2578 }, { "epoch": 0.7137618487511244, "grad_norm": 0.17195133864879608, "learning_rate": 1e-05, "loss": 0.5501, "step": 2579 }, { "epoch": 0.7140386079014738, "grad_norm": 0.17660686373710632, "learning_rate": 1e-05, "loss": 0.5841, "step": 2580 }, { "epoch": 0.7143153670518232, "grad_norm": 0.17231225967407227, "learning_rate": 1e-05, "loss": 0.5485, "step": 2581 }, { "epoch": 0.7145921262021726, "grad_norm": 0.17298747599124908, "learning_rate": 1e-05, "loss": 0.5559, "step": 2582 }, { "epoch": 0.714868885352522, "grad_norm": 0.1666283905506134, "learning_rate": 1e-05, "loss": 0.5404, "step": 2583 }, { "epoch": 0.7151456445028713, "grad_norm": 0.17399467527866364, "learning_rate": 1e-05, "loss": 0.5765, "step": 2584 }, { "epoch": 0.7154224036532207, "grad_norm": 0.18054620921611786, "learning_rate": 1e-05, "loss": 0.5501, "step": 2585 }, { "epoch": 0.7156991628035702, "grad_norm": 0.17519643902778625, "learning_rate": 1e-05, "loss": 0.5178, "step": 2586 }, { "epoch": 0.7159759219539196, "grad_norm": 0.1765386164188385, "learning_rate": 1e-05, "loss": 0.5829, "step": 2587 }, { "epoch": 0.716252681104269, "grad_norm": 0.16975997388362885, "learning_rate": 1e-05, "loss": 0.5612, "step": 2588 }, { "epoch": 0.7165294402546184, "grad_norm": 0.1811857968568802, "learning_rate": 1e-05, "loss": 0.5438, "step": 2589 }, { "epoch": 0.7168061994049678, "grad_norm": 0.18259629607200623, "learning_rate": 1e-05, "loss": 0.5674, "step": 2590 }, { "epoch": 0.7170829585553172, "grad_norm": 0.17573852837085724, "learning_rate": 1e-05, "loss": 0.5572, "step": 2591 }, { "epoch": 0.7173597177056666, "grad_norm": 0.17237626016139984, "learning_rate": 1e-05, "loss": 0.5492, "step": 2592 }, { "epoch": 0.7176364768560161, "grad_norm": 0.17240233719348907, "learning_rate": 1e-05, "loss": 0.5591, "step": 2593 }, { "epoch": 0.7179132360063655, "grad_norm": 0.17327609658241272, "learning_rate": 1e-05, "loss": 0.5425, "step": 2594 }, { "epoch": 0.7181899951567149, "grad_norm": 0.17408223450183868, "learning_rate": 1e-05, "loss": 0.5565, "step": 2595 }, { "epoch": 0.7184667543070643, "grad_norm": 0.17206327617168427, "learning_rate": 1e-05, "loss": 0.5356, "step": 2596 }, { "epoch": 0.7187435134574137, "grad_norm": 0.17374195158481598, "learning_rate": 1e-05, "loss": 0.5597, "step": 2597 }, { "epoch": 0.7190202726077631, "grad_norm": 0.16870065033435822, "learning_rate": 1e-05, "loss": 0.5439, "step": 2598 }, { "epoch": 0.7192970317581125, "grad_norm": 0.1622265726327896, "learning_rate": 1e-05, "loss": 0.5241, "step": 2599 }, { "epoch": 0.719573790908462, "grad_norm": 0.1734859049320221, "learning_rate": 1e-05, "loss": 0.5818, "step": 2600 }, { "epoch": 0.7198505500588113, "grad_norm": 0.16758079826831818, "learning_rate": 1e-05, "loss": 0.5436, "step": 2601 }, { "epoch": 0.7201273092091607, "grad_norm": 0.16512399911880493, "learning_rate": 1e-05, "loss": 0.5746, "step": 2602 }, { "epoch": 0.7204040683595101, "grad_norm": 0.16672341525554657, "learning_rate": 1e-05, "loss": 0.5529, "step": 2603 }, { "epoch": 0.7206808275098595, "grad_norm": 0.16999410092830658, "learning_rate": 1e-05, "loss": 0.5153, "step": 2604 }, { "epoch": 0.7209575866602089, "grad_norm": 0.1680947095155716, "learning_rate": 1e-05, "loss": 0.5582, "step": 2605 }, { "epoch": 0.7212343458105583, "grad_norm": 0.17282405495643616, "learning_rate": 1e-05, "loss": 0.5692, "step": 2606 }, { "epoch": 0.7215111049609078, "grad_norm": 0.16611438989639282, "learning_rate": 1e-05, "loss": 0.5448, "step": 2607 }, { "epoch": 0.7217878641112572, "grad_norm": 0.1720113903284073, "learning_rate": 1e-05, "loss": 0.5615, "step": 2608 }, { "epoch": 0.7220646232616066, "grad_norm": 0.16694337129592896, "learning_rate": 1e-05, "loss": 0.5418, "step": 2609 }, { "epoch": 0.722341382411956, "grad_norm": 0.17534388601779938, "learning_rate": 1e-05, "loss": 0.5465, "step": 2610 }, { "epoch": 0.7226181415623054, "grad_norm": 0.17481577396392822, "learning_rate": 1e-05, "loss": 0.5676, "step": 2611 }, { "epoch": 0.7228949007126548, "grad_norm": 0.17318053543567657, "learning_rate": 1e-05, "loss": 0.5837, "step": 2612 }, { "epoch": 0.7231716598630042, "grad_norm": 0.16788339614868164, "learning_rate": 1e-05, "loss": 0.5124, "step": 2613 }, { "epoch": 0.7234484190133537, "grad_norm": 0.17035603523254395, "learning_rate": 1e-05, "loss": 0.5676, "step": 2614 }, { "epoch": 0.7237251781637031, "grad_norm": 0.178401380777359, "learning_rate": 1e-05, "loss": 0.5632, "step": 2615 }, { "epoch": 0.7240019373140525, "grad_norm": 0.16571733355522156, "learning_rate": 1e-05, "loss": 0.5435, "step": 2616 }, { "epoch": 0.7242786964644019, "grad_norm": 0.17952771484851837, "learning_rate": 1e-05, "loss": 0.5501, "step": 2617 }, { "epoch": 0.7245554556147512, "grad_norm": 0.17362195253372192, "learning_rate": 1e-05, "loss": 0.5631, "step": 2618 }, { "epoch": 0.7248322147651006, "grad_norm": 0.16449476778507233, "learning_rate": 1e-05, "loss": 0.5213, "step": 2619 }, { "epoch": 0.72510897391545, "grad_norm": 0.17659352719783783, "learning_rate": 1e-05, "loss": 0.5977, "step": 2620 }, { "epoch": 0.7253857330657995, "grad_norm": 0.1712510585784912, "learning_rate": 1e-05, "loss": 0.5277, "step": 2621 }, { "epoch": 0.7256624922161489, "grad_norm": 0.17563383281230927, "learning_rate": 1e-05, "loss": 0.5482, "step": 2622 }, { "epoch": 0.7259392513664983, "grad_norm": 0.1745145171880722, "learning_rate": 1e-05, "loss": 0.5728, "step": 2623 }, { "epoch": 0.7262160105168477, "grad_norm": 0.16659781336784363, "learning_rate": 1e-05, "loss": 0.5581, "step": 2624 }, { "epoch": 0.7264927696671971, "grad_norm": 0.1802118420600891, "learning_rate": 1e-05, "loss": 0.5949, "step": 2625 }, { "epoch": 0.7267695288175465, "grad_norm": 0.1816747784614563, "learning_rate": 1e-05, "loss": 0.5453, "step": 2626 }, { "epoch": 0.7270462879678959, "grad_norm": 0.17069102823734283, "learning_rate": 1e-05, "loss": 0.5765, "step": 2627 }, { "epoch": 0.7273230471182454, "grad_norm": 0.17540834844112396, "learning_rate": 1e-05, "loss": 0.5662, "step": 2628 }, { "epoch": 0.7275998062685948, "grad_norm": 0.1697671264410019, "learning_rate": 1e-05, "loss": 0.5197, "step": 2629 }, { "epoch": 0.7278765654189442, "grad_norm": 0.17897632718086243, "learning_rate": 1e-05, "loss": 0.5218, "step": 2630 }, { "epoch": 0.7281533245692936, "grad_norm": 0.16933588683605194, "learning_rate": 1e-05, "loss": 0.5276, "step": 2631 }, { "epoch": 0.728430083719643, "grad_norm": 0.17354579269886017, "learning_rate": 1e-05, "loss": 0.5834, "step": 2632 }, { "epoch": 0.7287068428699924, "grad_norm": 0.18150770664215088, "learning_rate": 1e-05, "loss": 0.5679, "step": 2633 }, { "epoch": 0.7289836020203418, "grad_norm": 0.1735323965549469, "learning_rate": 1e-05, "loss": 0.5782, "step": 2634 }, { "epoch": 0.7292603611706912, "grad_norm": 0.16566088795661926, "learning_rate": 1e-05, "loss": 0.5408, "step": 2635 }, { "epoch": 0.7295371203210406, "grad_norm": 0.17411789298057556, "learning_rate": 1e-05, "loss": 0.5377, "step": 2636 }, { "epoch": 0.72981387947139, "grad_norm": 0.17933864891529083, "learning_rate": 1e-05, "loss": 0.5373, "step": 2637 }, { "epoch": 0.7300906386217394, "grad_norm": 0.18131740391254425, "learning_rate": 1e-05, "loss": 0.5571, "step": 2638 }, { "epoch": 0.7303673977720888, "grad_norm": 0.16968899965286255, "learning_rate": 1e-05, "loss": 0.5514, "step": 2639 }, { "epoch": 0.7306441569224382, "grad_norm": 0.17414765059947968, "learning_rate": 1e-05, "loss": 0.5614, "step": 2640 }, { "epoch": 0.7309209160727876, "grad_norm": 0.17897607386112213, "learning_rate": 1e-05, "loss": 0.5494, "step": 2641 }, { "epoch": 0.7311976752231371, "grad_norm": 0.18090830743312836, "learning_rate": 1e-05, "loss": 0.5858, "step": 2642 }, { "epoch": 0.7314744343734865, "grad_norm": 0.17389151453971863, "learning_rate": 1e-05, "loss": 0.5592, "step": 2643 }, { "epoch": 0.7317511935238359, "grad_norm": 0.17640206217765808, "learning_rate": 1e-05, "loss": 0.5644, "step": 2644 }, { "epoch": 0.7320279526741853, "grad_norm": 0.17250485718250275, "learning_rate": 1e-05, "loss": 0.5603, "step": 2645 }, { "epoch": 0.7323047118245347, "grad_norm": 0.1665598303079605, "learning_rate": 1e-05, "loss": 0.5368, "step": 2646 }, { "epoch": 0.7325814709748841, "grad_norm": 0.18161830306053162, "learning_rate": 1e-05, "loss": 0.5704, "step": 2647 }, { "epoch": 0.7328582301252335, "grad_norm": 0.1693730503320694, "learning_rate": 1e-05, "loss": 0.5413, "step": 2648 }, { "epoch": 0.733134989275583, "grad_norm": 0.1818326711654663, "learning_rate": 1e-05, "loss": 0.558, "step": 2649 }, { "epoch": 0.7334117484259324, "grad_norm": 0.17105980217456818, "learning_rate": 1e-05, "loss": 0.5641, "step": 2650 }, { "epoch": 0.7336885075762818, "grad_norm": 0.17106394469738007, "learning_rate": 1e-05, "loss": 0.5996, "step": 2651 }, { "epoch": 0.7339652667266312, "grad_norm": 0.17906217277050018, "learning_rate": 1e-05, "loss": 0.5583, "step": 2652 }, { "epoch": 0.7342420258769805, "grad_norm": 0.17083579301834106, "learning_rate": 1e-05, "loss": 0.5273, "step": 2653 }, { "epoch": 0.7345187850273299, "grad_norm": 0.17205701768398285, "learning_rate": 1e-05, "loss": 0.5413, "step": 2654 }, { "epoch": 0.7347955441776793, "grad_norm": 0.1753655970096588, "learning_rate": 1e-05, "loss": 0.547, "step": 2655 }, { "epoch": 0.7350723033280288, "grad_norm": 0.16983498632907867, "learning_rate": 1e-05, "loss": 0.5739, "step": 2656 }, { "epoch": 0.7353490624783782, "grad_norm": 0.17854374647140503, "learning_rate": 1e-05, "loss": 0.5638, "step": 2657 }, { "epoch": 0.7356258216287276, "grad_norm": 0.1793244183063507, "learning_rate": 1e-05, "loss": 0.5525, "step": 2658 }, { "epoch": 0.735902580779077, "grad_norm": 0.17539718747138977, "learning_rate": 1e-05, "loss": 0.5657, "step": 2659 }, { "epoch": 0.7361793399294264, "grad_norm": 0.16506731510162354, "learning_rate": 1e-05, "loss": 0.5417, "step": 2660 }, { "epoch": 0.7364560990797758, "grad_norm": 0.16526377201080322, "learning_rate": 1e-05, "loss": 0.5404, "step": 2661 }, { "epoch": 0.7367328582301252, "grad_norm": 0.17374709248542786, "learning_rate": 1e-05, "loss": 0.5573, "step": 2662 }, { "epoch": 0.7370096173804747, "grad_norm": 0.1704902946949005, "learning_rate": 1e-05, "loss": 0.5689, "step": 2663 }, { "epoch": 0.7372863765308241, "grad_norm": 0.16862386465072632, "learning_rate": 1e-05, "loss": 0.5276, "step": 2664 }, { "epoch": 0.7375631356811735, "grad_norm": 0.18394215404987335, "learning_rate": 1e-05, "loss": 0.5564, "step": 2665 }, { "epoch": 0.7378398948315229, "grad_norm": 0.16896651685237885, "learning_rate": 1e-05, "loss": 0.568, "step": 2666 }, { "epoch": 0.7381166539818723, "grad_norm": 0.1665654182434082, "learning_rate": 1e-05, "loss": 0.5522, "step": 2667 }, { "epoch": 0.7383934131322217, "grad_norm": 0.17959414422512054, "learning_rate": 1e-05, "loss": 0.5272, "step": 2668 }, { "epoch": 0.7386701722825711, "grad_norm": 0.16905930638313293, "learning_rate": 1e-05, "loss": 0.5375, "step": 2669 }, { "epoch": 0.7389469314329205, "grad_norm": 0.1815154254436493, "learning_rate": 1e-05, "loss": 0.5712, "step": 2670 }, { "epoch": 0.7392236905832699, "grad_norm": 0.17413470149040222, "learning_rate": 1e-05, "loss": 0.5803, "step": 2671 }, { "epoch": 0.7395004497336193, "grad_norm": 0.16915658116340637, "learning_rate": 1e-05, "loss": 0.5467, "step": 2672 }, { "epoch": 0.7397772088839687, "grad_norm": 0.17170564830303192, "learning_rate": 1e-05, "loss": 0.5552, "step": 2673 }, { "epoch": 0.7400539680343181, "grad_norm": 0.1828358769416809, "learning_rate": 1e-05, "loss": 0.5627, "step": 2674 }, { "epoch": 0.7403307271846675, "grad_norm": 0.16798794269561768, "learning_rate": 1e-05, "loss": 0.5302, "step": 2675 }, { "epoch": 0.7406074863350169, "grad_norm": 0.17029978334903717, "learning_rate": 1e-05, "loss": 0.5513, "step": 2676 }, { "epoch": 0.7408842454853664, "grad_norm": 0.17156273126602173, "learning_rate": 1e-05, "loss": 0.5562, "step": 2677 }, { "epoch": 0.7411610046357158, "grad_norm": 0.17719514667987823, "learning_rate": 1e-05, "loss": 0.557, "step": 2678 }, { "epoch": 0.7414377637860652, "grad_norm": 0.17393088340759277, "learning_rate": 1e-05, "loss": 0.5545, "step": 2679 }, { "epoch": 0.7417145229364146, "grad_norm": 0.17627808451652527, "learning_rate": 1e-05, "loss": 0.5477, "step": 2680 }, { "epoch": 0.741991282086764, "grad_norm": 0.1790289431810379, "learning_rate": 1e-05, "loss": 0.5859, "step": 2681 }, { "epoch": 0.7422680412371134, "grad_norm": 0.16505004465579987, "learning_rate": 1e-05, "loss": 0.5305, "step": 2682 }, { "epoch": 0.7425448003874628, "grad_norm": 0.1760120838880539, "learning_rate": 1e-05, "loss": 0.5434, "step": 2683 }, { "epoch": 0.7428215595378123, "grad_norm": 0.1714673489332199, "learning_rate": 1e-05, "loss": 0.5562, "step": 2684 }, { "epoch": 0.7430983186881617, "grad_norm": 0.1774844080209732, "learning_rate": 1e-05, "loss": 0.5464, "step": 2685 }, { "epoch": 0.7433750778385111, "grad_norm": 0.16832581162452698, "learning_rate": 1e-05, "loss": 0.555, "step": 2686 }, { "epoch": 0.7436518369888604, "grad_norm": 0.1711425930261612, "learning_rate": 1e-05, "loss": 0.5301, "step": 2687 }, { "epoch": 0.7439285961392098, "grad_norm": 0.1810564398765564, "learning_rate": 1e-05, "loss": 0.5128, "step": 2688 }, { "epoch": 0.7442053552895592, "grad_norm": 0.1696447730064392, "learning_rate": 1e-05, "loss": 0.5238, "step": 2689 }, { "epoch": 0.7444821144399086, "grad_norm": 0.17201454937458038, "learning_rate": 1e-05, "loss": 0.5526, "step": 2690 }, { "epoch": 0.744758873590258, "grad_norm": 0.17242717742919922, "learning_rate": 1e-05, "loss": 0.5511, "step": 2691 }, { "epoch": 0.7450356327406075, "grad_norm": 0.1735144406557083, "learning_rate": 1e-05, "loss": 0.5452, "step": 2692 }, { "epoch": 0.7453123918909569, "grad_norm": 0.17847655713558197, "learning_rate": 1e-05, "loss": 0.5275, "step": 2693 }, { "epoch": 0.7455891510413063, "grad_norm": 0.17141957581043243, "learning_rate": 1e-05, "loss": 0.551, "step": 2694 }, { "epoch": 0.7458659101916557, "grad_norm": 0.18610958755016327, "learning_rate": 1e-05, "loss": 0.564, "step": 2695 }, { "epoch": 0.7461426693420051, "grad_norm": 0.17347507178783417, "learning_rate": 1e-05, "loss": 0.5475, "step": 2696 }, { "epoch": 0.7464194284923545, "grad_norm": 0.17323338985443115, "learning_rate": 1e-05, "loss": 0.5271, "step": 2697 }, { "epoch": 0.746696187642704, "grad_norm": 0.17393973469734192, "learning_rate": 1e-05, "loss": 0.5878, "step": 2698 }, { "epoch": 0.7469729467930534, "grad_norm": 0.16963458061218262, "learning_rate": 1e-05, "loss": 0.5418, "step": 2699 }, { "epoch": 0.7472497059434028, "grad_norm": 0.17142459750175476, "learning_rate": 1e-05, "loss": 0.5378, "step": 2700 }, { "epoch": 0.7475264650937522, "grad_norm": 0.17119786143302917, "learning_rate": 1e-05, "loss": 0.5257, "step": 2701 }, { "epoch": 0.7478032242441016, "grad_norm": 0.1807785928249359, "learning_rate": 1e-05, "loss": 0.556, "step": 2702 }, { "epoch": 0.748079983394451, "grad_norm": 0.17590689659118652, "learning_rate": 1e-05, "loss": 0.5613, "step": 2703 }, { "epoch": 0.7483567425448003, "grad_norm": 0.1713716685771942, "learning_rate": 1e-05, "loss": 0.5989, "step": 2704 }, { "epoch": 0.7486335016951498, "grad_norm": 0.17030726373195648, "learning_rate": 1e-05, "loss": 0.5455, "step": 2705 }, { "epoch": 0.7489102608454992, "grad_norm": 0.1698548048734665, "learning_rate": 1e-05, "loss": 0.5409, "step": 2706 }, { "epoch": 0.7491870199958486, "grad_norm": 0.1699403077363968, "learning_rate": 1e-05, "loss": 0.5695, "step": 2707 }, { "epoch": 0.749463779146198, "grad_norm": 0.17132742702960968, "learning_rate": 1e-05, "loss": 0.5313, "step": 2708 }, { "epoch": 0.7497405382965474, "grad_norm": 0.17526790499687195, "learning_rate": 1e-05, "loss": 0.5399, "step": 2709 }, { "epoch": 0.7500172974468968, "grad_norm": 0.16918230056762695, "learning_rate": 1e-05, "loss": 0.5342, "step": 2710 }, { "epoch": 0.7502940565972462, "grad_norm": 0.16970928013324738, "learning_rate": 1e-05, "loss": 0.5359, "step": 2711 }, { "epoch": 0.7505708157475957, "grad_norm": 0.16089311242103577, "learning_rate": 1e-05, "loss": 0.5333, "step": 2712 }, { "epoch": 0.7508475748979451, "grad_norm": 0.17641645669937134, "learning_rate": 1e-05, "loss": 0.5935, "step": 2713 }, { "epoch": 0.7511243340482945, "grad_norm": 0.16942933201789856, "learning_rate": 1e-05, "loss": 0.5439, "step": 2714 }, { "epoch": 0.7514010931986439, "grad_norm": 0.17273636162281036, "learning_rate": 1e-05, "loss": 0.5572, "step": 2715 }, { "epoch": 0.7516778523489933, "grad_norm": 0.17178142070770264, "learning_rate": 1e-05, "loss": 0.5603, "step": 2716 }, { "epoch": 0.7519546114993427, "grad_norm": 0.17855191230773926, "learning_rate": 1e-05, "loss": 0.5736, "step": 2717 }, { "epoch": 0.7522313706496921, "grad_norm": 0.17686016857624054, "learning_rate": 1e-05, "loss": 0.5222, "step": 2718 }, { "epoch": 0.7525081298000416, "grad_norm": 0.1679607331752777, "learning_rate": 1e-05, "loss": 0.5644, "step": 2719 }, { "epoch": 0.752784888950391, "grad_norm": 0.1682182401418686, "learning_rate": 1e-05, "loss": 0.531, "step": 2720 }, { "epoch": 0.7530616481007403, "grad_norm": 0.1769266277551651, "learning_rate": 1e-05, "loss": 0.5632, "step": 2721 }, { "epoch": 0.7533384072510897, "grad_norm": 0.16948862373828888, "learning_rate": 1e-05, "loss": 0.5959, "step": 2722 }, { "epoch": 0.7536151664014391, "grad_norm": 0.17854264378547668, "learning_rate": 1e-05, "loss": 0.5806, "step": 2723 }, { "epoch": 0.7538919255517885, "grad_norm": 0.17599605023860931, "learning_rate": 1e-05, "loss": 0.547, "step": 2724 }, { "epoch": 0.7541686847021379, "grad_norm": 0.17098169028759003, "learning_rate": 1e-05, "loss": 0.5495, "step": 2725 }, { "epoch": 0.7544454438524874, "grad_norm": 0.17777109146118164, "learning_rate": 1e-05, "loss": 0.5704, "step": 2726 }, { "epoch": 0.7547222030028368, "grad_norm": 0.16687996685504913, "learning_rate": 1e-05, "loss": 0.5297, "step": 2727 }, { "epoch": 0.7549989621531862, "grad_norm": 0.18387249112129211, "learning_rate": 1e-05, "loss": 0.5516, "step": 2728 }, { "epoch": 0.7552757213035356, "grad_norm": 0.1728331297636032, "learning_rate": 1e-05, "loss": 0.5518, "step": 2729 }, { "epoch": 0.755552480453885, "grad_norm": 0.16672955453395844, "learning_rate": 1e-05, "loss": 0.5459, "step": 2730 }, { "epoch": 0.7558292396042344, "grad_norm": 0.16701757907867432, "learning_rate": 1e-05, "loss": 0.5775, "step": 2731 }, { "epoch": 0.7561059987545838, "grad_norm": 0.17098133265972137, "learning_rate": 1e-05, "loss": 0.524, "step": 2732 }, { "epoch": 0.7563827579049333, "grad_norm": 0.17229600250720978, "learning_rate": 1e-05, "loss": 0.5263, "step": 2733 }, { "epoch": 0.7566595170552827, "grad_norm": 0.16863639652729034, "learning_rate": 1e-05, "loss": 0.5552, "step": 2734 }, { "epoch": 0.7569362762056321, "grad_norm": 0.1720982939004898, "learning_rate": 1e-05, "loss": 0.539, "step": 2735 }, { "epoch": 0.7572130353559815, "grad_norm": 0.17851032316684723, "learning_rate": 1e-05, "loss": 0.5776, "step": 2736 }, { "epoch": 0.7574897945063309, "grad_norm": 0.17302930355072021, "learning_rate": 1e-05, "loss": 0.5555, "step": 2737 }, { "epoch": 0.7577665536566802, "grad_norm": 0.18024413287639618, "learning_rate": 1e-05, "loss": 0.5645, "step": 2738 }, { "epoch": 0.7580433128070296, "grad_norm": 0.17195434868335724, "learning_rate": 1e-05, "loss": 0.5256, "step": 2739 }, { "epoch": 0.758320071957379, "grad_norm": 0.17434631288051605, "learning_rate": 1e-05, "loss": 0.5407, "step": 2740 }, { "epoch": 0.7585968311077285, "grad_norm": 0.17050357162952423, "learning_rate": 1e-05, "loss": 0.5159, "step": 2741 }, { "epoch": 0.7588735902580779, "grad_norm": 0.17279213666915894, "learning_rate": 1e-05, "loss": 0.5612, "step": 2742 }, { "epoch": 0.7591503494084273, "grad_norm": 0.16980786621570587, "learning_rate": 1e-05, "loss": 0.5317, "step": 2743 }, { "epoch": 0.7594271085587767, "grad_norm": 0.17364299297332764, "learning_rate": 1e-05, "loss": 0.5284, "step": 2744 }, { "epoch": 0.7597038677091261, "grad_norm": 0.17580218613147736, "learning_rate": 1e-05, "loss": 0.5338, "step": 2745 }, { "epoch": 0.7599806268594755, "grad_norm": 0.17670761048793793, "learning_rate": 1e-05, "loss": 0.5414, "step": 2746 }, { "epoch": 0.760257386009825, "grad_norm": 0.16906630992889404, "learning_rate": 1e-05, "loss": 0.5733, "step": 2747 }, { "epoch": 0.7605341451601744, "grad_norm": 0.16824372112751007, "learning_rate": 1e-05, "loss": 0.5557, "step": 2748 }, { "epoch": 0.7608109043105238, "grad_norm": 0.18426041305065155, "learning_rate": 1e-05, "loss": 0.5403, "step": 2749 }, { "epoch": 0.7610876634608732, "grad_norm": 0.17436188459396362, "learning_rate": 1e-05, "loss": 0.5572, "step": 2750 }, { "epoch": 0.7613644226112226, "grad_norm": 0.1657233089208603, "learning_rate": 1e-05, "loss": 0.5508, "step": 2751 }, { "epoch": 0.761641181761572, "grad_norm": 0.1702972650527954, "learning_rate": 1e-05, "loss": 0.5484, "step": 2752 }, { "epoch": 0.7619179409119214, "grad_norm": 0.1724434494972229, "learning_rate": 1e-05, "loss": 0.5669, "step": 2753 }, { "epoch": 0.7621947000622709, "grad_norm": 0.1648043692111969, "learning_rate": 1e-05, "loss": 0.5682, "step": 2754 }, { "epoch": 0.7624714592126202, "grad_norm": 0.17166003584861755, "learning_rate": 1e-05, "loss": 0.5366, "step": 2755 }, { "epoch": 0.7627482183629696, "grad_norm": 0.16740109026432037, "learning_rate": 1e-05, "loss": 0.5459, "step": 2756 }, { "epoch": 0.763024977513319, "grad_norm": 0.16289392113685608, "learning_rate": 1e-05, "loss": 0.5582, "step": 2757 }, { "epoch": 0.7633017366636684, "grad_norm": 0.16895388066768646, "learning_rate": 1e-05, "loss": 0.5383, "step": 2758 }, { "epoch": 0.7635784958140178, "grad_norm": 0.1720779985189438, "learning_rate": 1e-05, "loss": 0.548, "step": 2759 }, { "epoch": 0.7638552549643672, "grad_norm": 0.172093003988266, "learning_rate": 1e-05, "loss": 0.5324, "step": 2760 }, { "epoch": 0.7641320141147167, "grad_norm": 0.16825608909130096, "learning_rate": 1e-05, "loss": 0.5455, "step": 2761 }, { "epoch": 0.7644087732650661, "grad_norm": 0.16587676107883453, "learning_rate": 1e-05, "loss": 0.5266, "step": 2762 }, { "epoch": 0.7646855324154155, "grad_norm": 0.17138873040676117, "learning_rate": 1e-05, "loss": 0.5295, "step": 2763 }, { "epoch": 0.7649622915657649, "grad_norm": 0.174627423286438, "learning_rate": 1e-05, "loss": 0.5569, "step": 2764 }, { "epoch": 0.7652390507161143, "grad_norm": 0.16907952725887299, "learning_rate": 1e-05, "loss": 0.5733, "step": 2765 }, { "epoch": 0.7655158098664637, "grad_norm": 0.1626833975315094, "learning_rate": 1e-05, "loss": 0.5392, "step": 2766 }, { "epoch": 0.7657925690168131, "grad_norm": 0.1754598319530487, "learning_rate": 1e-05, "loss": 0.5284, "step": 2767 }, { "epoch": 0.7660693281671626, "grad_norm": 0.16816335916519165, "learning_rate": 1e-05, "loss": 0.566, "step": 2768 }, { "epoch": 0.766346087317512, "grad_norm": 0.17145439982414246, "learning_rate": 1e-05, "loss": 0.5641, "step": 2769 }, { "epoch": 0.7666228464678614, "grad_norm": 0.1748160570859909, "learning_rate": 1e-05, "loss": 0.5727, "step": 2770 }, { "epoch": 0.7668996056182108, "grad_norm": 0.17580170929431915, "learning_rate": 1e-05, "loss": 0.5799, "step": 2771 }, { "epoch": 0.7671763647685602, "grad_norm": 0.16291259229183197, "learning_rate": 1e-05, "loss": 0.5538, "step": 2772 }, { "epoch": 0.7674531239189095, "grad_norm": 0.1686815470457077, "learning_rate": 1e-05, "loss": 0.5378, "step": 2773 }, { "epoch": 0.7677298830692589, "grad_norm": 0.1755620539188385, "learning_rate": 1e-05, "loss": 0.582, "step": 2774 }, { "epoch": 0.7680066422196083, "grad_norm": 0.1733938306570053, "learning_rate": 1e-05, "loss": 0.5324, "step": 2775 }, { "epoch": 0.7682834013699578, "grad_norm": 0.17203591763973236, "learning_rate": 1e-05, "loss": 0.5525, "step": 2776 }, { "epoch": 0.7685601605203072, "grad_norm": 0.1747211515903473, "learning_rate": 1e-05, "loss": 0.5535, "step": 2777 }, { "epoch": 0.7688369196706566, "grad_norm": 0.1805342584848404, "learning_rate": 1e-05, "loss": 0.5582, "step": 2778 }, { "epoch": 0.769113678821006, "grad_norm": 0.16472016274929047, "learning_rate": 1e-05, "loss": 0.5422, "step": 2779 }, { "epoch": 0.7693904379713554, "grad_norm": 0.16906970739364624, "learning_rate": 1e-05, "loss": 0.5569, "step": 2780 }, { "epoch": 0.7696671971217048, "grad_norm": 0.1718807816505432, "learning_rate": 1e-05, "loss": 0.5606, "step": 2781 }, { "epoch": 0.7699439562720543, "grad_norm": 0.17141105234622955, "learning_rate": 1e-05, "loss": 0.5424, "step": 2782 }, { "epoch": 0.7702207154224037, "grad_norm": 0.16499729454517365, "learning_rate": 1e-05, "loss": 0.5101, "step": 2783 }, { "epoch": 0.7704974745727531, "grad_norm": 0.17319455742835999, "learning_rate": 1e-05, "loss": 0.5599, "step": 2784 }, { "epoch": 0.7707742337231025, "grad_norm": 0.17044484615325928, "learning_rate": 1e-05, "loss": 0.5678, "step": 2785 }, { "epoch": 0.7710509928734519, "grad_norm": 0.17712117731571198, "learning_rate": 1e-05, "loss": 0.5768, "step": 2786 }, { "epoch": 0.7713277520238013, "grad_norm": 0.1649801880121231, "learning_rate": 1e-05, "loss": 0.561, "step": 2787 }, { "epoch": 0.7716045111741507, "grad_norm": 0.1652776300907135, "learning_rate": 1e-05, "loss": 0.5323, "step": 2788 }, { "epoch": 0.7718812703245002, "grad_norm": 0.17516657710075378, "learning_rate": 1e-05, "loss": 0.5709, "step": 2789 }, { "epoch": 0.7721580294748495, "grad_norm": 0.17671585083007812, "learning_rate": 1e-05, "loss": 0.5486, "step": 2790 }, { "epoch": 0.7724347886251989, "grad_norm": 0.17270319163799286, "learning_rate": 1e-05, "loss": 0.5403, "step": 2791 }, { "epoch": 0.7727115477755483, "grad_norm": 0.17534708976745605, "learning_rate": 1e-05, "loss": 0.6057, "step": 2792 }, { "epoch": 0.7729883069258977, "grad_norm": 0.173179492354393, "learning_rate": 1e-05, "loss": 0.572, "step": 2793 }, { "epoch": 0.7732650660762471, "grad_norm": 0.17073504626750946, "learning_rate": 1e-05, "loss": 0.524, "step": 2794 }, { "epoch": 0.7735418252265965, "grad_norm": 0.17043660581111908, "learning_rate": 1e-05, "loss": 0.5585, "step": 2795 }, { "epoch": 0.773818584376946, "grad_norm": 0.16771358251571655, "learning_rate": 1e-05, "loss": 0.5585, "step": 2796 }, { "epoch": 0.7740953435272954, "grad_norm": 0.18395671248435974, "learning_rate": 1e-05, "loss": 0.5483, "step": 2797 }, { "epoch": 0.7743721026776448, "grad_norm": 0.16784217953681946, "learning_rate": 1e-05, "loss": 0.5513, "step": 2798 }, { "epoch": 0.7746488618279942, "grad_norm": 0.1728440672159195, "learning_rate": 1e-05, "loss": 0.5823, "step": 2799 }, { "epoch": 0.7749256209783436, "grad_norm": 0.16441144049167633, "learning_rate": 1e-05, "loss": 0.5432, "step": 2800 }, { "epoch": 0.775202380128693, "grad_norm": 0.1644848734140396, "learning_rate": 1e-05, "loss": 0.56, "step": 2801 }, { "epoch": 0.7754791392790424, "grad_norm": 0.17435333132743835, "learning_rate": 1e-05, "loss": 0.5434, "step": 2802 }, { "epoch": 0.7757558984293919, "grad_norm": 0.16655652225017548, "learning_rate": 1e-05, "loss": 0.5517, "step": 2803 }, { "epoch": 0.7760326575797413, "grad_norm": 0.17386074364185333, "learning_rate": 1e-05, "loss": 0.5588, "step": 2804 }, { "epoch": 0.7763094167300907, "grad_norm": 0.16575880348682404, "learning_rate": 1e-05, "loss": 0.5473, "step": 2805 }, { "epoch": 0.7765861758804401, "grad_norm": 0.1751997321844101, "learning_rate": 1e-05, "loss": 0.5418, "step": 2806 }, { "epoch": 0.7768629350307894, "grad_norm": 0.16675934195518494, "learning_rate": 1e-05, "loss": 0.5403, "step": 2807 }, { "epoch": 0.7771396941811388, "grad_norm": 0.17311470210552216, "learning_rate": 1e-05, "loss": 0.5598, "step": 2808 }, { "epoch": 0.7774164533314882, "grad_norm": 0.17854025959968567, "learning_rate": 1e-05, "loss": 0.5695, "step": 2809 }, { "epoch": 0.7776932124818376, "grad_norm": 0.16506324708461761, "learning_rate": 1e-05, "loss": 0.5294, "step": 2810 }, { "epoch": 0.7779699716321871, "grad_norm": 0.1648063063621521, "learning_rate": 1e-05, "loss": 0.5456, "step": 2811 }, { "epoch": 0.7782467307825365, "grad_norm": 0.17100729048252106, "learning_rate": 1e-05, "loss": 0.56, "step": 2812 }, { "epoch": 0.7785234899328859, "grad_norm": 0.16934770345687866, "learning_rate": 1e-05, "loss": 0.5321, "step": 2813 }, { "epoch": 0.7788002490832353, "grad_norm": 0.15953490138053894, "learning_rate": 1e-05, "loss": 0.5168, "step": 2814 }, { "epoch": 0.7790770082335847, "grad_norm": 0.15846335887908936, "learning_rate": 1e-05, "loss": 0.4917, "step": 2815 }, { "epoch": 0.7793537673839341, "grad_norm": 0.17235781252384186, "learning_rate": 1e-05, "loss": 0.5053, "step": 2816 }, { "epoch": 0.7796305265342836, "grad_norm": 0.18085022270679474, "learning_rate": 1e-05, "loss": 0.5716, "step": 2817 }, { "epoch": 0.779907285684633, "grad_norm": 0.17968067526817322, "learning_rate": 1e-05, "loss": 0.5603, "step": 2818 }, { "epoch": 0.7801840448349824, "grad_norm": 0.17506656050682068, "learning_rate": 1e-05, "loss": 0.5436, "step": 2819 }, { "epoch": 0.7804608039853318, "grad_norm": 0.17166128754615784, "learning_rate": 1e-05, "loss": 0.5202, "step": 2820 }, { "epoch": 0.7807375631356812, "grad_norm": 0.16196683049201965, "learning_rate": 1e-05, "loss": 0.5345, "step": 2821 }, { "epoch": 0.7810143222860306, "grad_norm": 0.17318987846374512, "learning_rate": 1e-05, "loss": 0.5494, "step": 2822 }, { "epoch": 0.78129108143638, "grad_norm": 0.17860709130764008, "learning_rate": 1e-05, "loss": 0.5786, "step": 2823 }, { "epoch": 0.7815678405867293, "grad_norm": 0.17536146938800812, "learning_rate": 1e-05, "loss": 0.5433, "step": 2824 }, { "epoch": 0.7818445997370788, "grad_norm": 0.16398267447948456, "learning_rate": 1e-05, "loss": 0.5512, "step": 2825 }, { "epoch": 0.7821213588874282, "grad_norm": 0.1682225614786148, "learning_rate": 1e-05, "loss": 0.5699, "step": 2826 }, { "epoch": 0.7823981180377776, "grad_norm": 0.17401237785816193, "learning_rate": 1e-05, "loss": 0.5887, "step": 2827 }, { "epoch": 0.782674877188127, "grad_norm": 0.1823023557662964, "learning_rate": 1e-05, "loss": 0.5999, "step": 2828 }, { "epoch": 0.7829516363384764, "grad_norm": 0.1736474633216858, "learning_rate": 1e-05, "loss": 0.5498, "step": 2829 }, { "epoch": 0.7832283954888258, "grad_norm": 0.16544364392757416, "learning_rate": 1e-05, "loss": 0.5404, "step": 2830 }, { "epoch": 0.7835051546391752, "grad_norm": 0.17849044501781464, "learning_rate": 1e-05, "loss": 0.5261, "step": 2831 }, { "epoch": 0.7837819137895247, "grad_norm": 0.16789141297340393, "learning_rate": 1e-05, "loss": 0.5674, "step": 2832 }, { "epoch": 0.7840586729398741, "grad_norm": 0.16246512532234192, "learning_rate": 1e-05, "loss": 0.5226, "step": 2833 }, { "epoch": 0.7843354320902235, "grad_norm": 0.18523161113262177, "learning_rate": 1e-05, "loss": 0.5776, "step": 2834 }, { "epoch": 0.7846121912405729, "grad_norm": 0.18242181837558746, "learning_rate": 1e-05, "loss": 0.5741, "step": 2835 }, { "epoch": 0.7848889503909223, "grad_norm": 0.17563147842884064, "learning_rate": 1e-05, "loss": 0.6011, "step": 2836 }, { "epoch": 0.7851657095412717, "grad_norm": 0.17888487875461578, "learning_rate": 1e-05, "loss": 0.5587, "step": 2837 }, { "epoch": 0.7854424686916212, "grad_norm": 0.1683928519487381, "learning_rate": 1e-05, "loss": 0.536, "step": 2838 }, { "epoch": 0.7857192278419706, "grad_norm": 0.17542272806167603, "learning_rate": 1e-05, "loss": 0.5407, "step": 2839 }, { "epoch": 0.78599598699232, "grad_norm": 0.1801423281431198, "learning_rate": 1e-05, "loss": 0.5654, "step": 2840 }, { "epoch": 0.7862727461426693, "grad_norm": 0.172552689909935, "learning_rate": 1e-05, "loss": 0.5269, "step": 2841 }, { "epoch": 0.7865495052930187, "grad_norm": 0.17098455131053925, "learning_rate": 1e-05, "loss": 0.5345, "step": 2842 }, { "epoch": 0.7868262644433681, "grad_norm": 0.16732394695281982, "learning_rate": 1e-05, "loss": 0.5503, "step": 2843 }, { "epoch": 0.7871030235937175, "grad_norm": 0.16149786114692688, "learning_rate": 1e-05, "loss": 0.5307, "step": 2844 }, { "epoch": 0.787379782744067, "grad_norm": 0.18669740855693817, "learning_rate": 1e-05, "loss": 0.5685, "step": 2845 }, { "epoch": 0.7876565418944164, "grad_norm": 0.17408519983291626, "learning_rate": 1e-05, "loss": 0.5532, "step": 2846 }, { "epoch": 0.7879333010447658, "grad_norm": 0.1663636416196823, "learning_rate": 1e-05, "loss": 0.5435, "step": 2847 }, { "epoch": 0.7882100601951152, "grad_norm": 0.1611821949481964, "learning_rate": 1e-05, "loss": 0.5293, "step": 2848 }, { "epoch": 0.7884868193454646, "grad_norm": 0.16741704940795898, "learning_rate": 1e-05, "loss": 0.5471, "step": 2849 }, { "epoch": 0.788763578495814, "grad_norm": 0.17322881519794464, "learning_rate": 1e-05, "loss": 0.5581, "step": 2850 }, { "epoch": 0.7890403376461634, "grad_norm": 0.17477861046791077, "learning_rate": 1e-05, "loss": 0.5464, "step": 2851 }, { "epoch": 0.7893170967965129, "grad_norm": 0.17210963368415833, "learning_rate": 1e-05, "loss": 0.5434, "step": 2852 }, { "epoch": 0.7895938559468623, "grad_norm": 0.16884803771972656, "learning_rate": 1e-05, "loss": 0.5241, "step": 2853 }, { "epoch": 0.7898706150972117, "grad_norm": 0.1851205974817276, "learning_rate": 1e-05, "loss": 0.5623, "step": 2854 }, { "epoch": 0.7901473742475611, "grad_norm": 0.17097698152065277, "learning_rate": 1e-05, "loss": 0.5293, "step": 2855 }, { "epoch": 0.7904241333979105, "grad_norm": 0.16788232326507568, "learning_rate": 1e-05, "loss": 0.5558, "step": 2856 }, { "epoch": 0.7907008925482599, "grad_norm": 0.17893920838832855, "learning_rate": 1e-05, "loss": 0.5588, "step": 2857 }, { "epoch": 0.7909776516986092, "grad_norm": 0.17874076962471008, "learning_rate": 1e-05, "loss": 0.5647, "step": 2858 }, { "epoch": 0.7912544108489586, "grad_norm": 0.16686759889125824, "learning_rate": 1e-05, "loss": 0.561, "step": 2859 }, { "epoch": 0.7915311699993081, "grad_norm": 0.17746245861053467, "learning_rate": 1e-05, "loss": 0.5574, "step": 2860 }, { "epoch": 0.7918079291496575, "grad_norm": 0.17387445271015167, "learning_rate": 1e-05, "loss": 0.5181, "step": 2861 }, { "epoch": 0.7920846883000069, "grad_norm": 0.1836928278207779, "learning_rate": 1e-05, "loss": 0.5654, "step": 2862 }, { "epoch": 0.7923614474503563, "grad_norm": 0.17826348543167114, "learning_rate": 1e-05, "loss": 0.5307, "step": 2863 }, { "epoch": 0.7926382066007057, "grad_norm": 0.172820582985878, "learning_rate": 1e-05, "loss": 0.5397, "step": 2864 }, { "epoch": 0.7929149657510551, "grad_norm": 0.1732482612133026, "learning_rate": 1e-05, "loss": 0.5544, "step": 2865 }, { "epoch": 0.7931917249014045, "grad_norm": 0.17529140412807465, "learning_rate": 1e-05, "loss": 0.5618, "step": 2866 }, { "epoch": 0.793468484051754, "grad_norm": 0.17287541925907135, "learning_rate": 1e-05, "loss": 0.5467, "step": 2867 }, { "epoch": 0.7937452432021034, "grad_norm": 0.17341695725917816, "learning_rate": 1e-05, "loss": 0.5498, "step": 2868 }, { "epoch": 0.7940220023524528, "grad_norm": 0.1807842254638672, "learning_rate": 1e-05, "loss": 0.5514, "step": 2869 }, { "epoch": 0.7942987615028022, "grad_norm": 0.16867439448833466, "learning_rate": 1e-05, "loss": 0.5496, "step": 2870 }, { "epoch": 0.7945755206531516, "grad_norm": 0.16816279292106628, "learning_rate": 1e-05, "loss": 0.5466, "step": 2871 }, { "epoch": 0.794852279803501, "grad_norm": 0.16663336753845215, "learning_rate": 1e-05, "loss": 0.5649, "step": 2872 }, { "epoch": 0.7951290389538505, "grad_norm": 0.17053602635860443, "learning_rate": 1e-05, "loss": 0.534, "step": 2873 }, { "epoch": 0.7954057981041999, "grad_norm": 0.17417897284030914, "learning_rate": 1e-05, "loss": 0.5388, "step": 2874 }, { "epoch": 0.7956825572545493, "grad_norm": 0.1674627959728241, "learning_rate": 1e-05, "loss": 0.5477, "step": 2875 }, { "epoch": 0.7959593164048986, "grad_norm": 0.1720205545425415, "learning_rate": 1e-05, "loss": 0.5473, "step": 2876 }, { "epoch": 0.796236075555248, "grad_norm": 0.16579599678516388, "learning_rate": 1e-05, "loss": 0.5209, "step": 2877 }, { "epoch": 0.7965128347055974, "grad_norm": 0.17253488302230835, "learning_rate": 1e-05, "loss": 0.5415, "step": 2878 }, { "epoch": 0.7967895938559468, "grad_norm": 0.17193064093589783, "learning_rate": 1e-05, "loss": 0.5574, "step": 2879 }, { "epoch": 0.7970663530062962, "grad_norm": 0.1726672649383545, "learning_rate": 1e-05, "loss": 0.554, "step": 2880 }, { "epoch": 0.7973431121566457, "grad_norm": 0.17061938345432281, "learning_rate": 1e-05, "loss": 0.5406, "step": 2881 }, { "epoch": 0.7976198713069951, "grad_norm": 0.16409972310066223, "learning_rate": 1e-05, "loss": 0.5314, "step": 2882 }, { "epoch": 0.7978966304573445, "grad_norm": 0.16840030252933502, "learning_rate": 1e-05, "loss": 0.5147, "step": 2883 }, { "epoch": 0.7981733896076939, "grad_norm": 0.18991847336292267, "learning_rate": 1e-05, "loss": 0.5534, "step": 2884 }, { "epoch": 0.7984501487580433, "grad_norm": 0.16732461750507355, "learning_rate": 1e-05, "loss": 0.542, "step": 2885 }, { "epoch": 0.7987269079083927, "grad_norm": 0.17466376721858978, "learning_rate": 1e-05, "loss": 0.5561, "step": 2886 }, { "epoch": 0.7990036670587422, "grad_norm": 0.17354856431484222, "learning_rate": 1e-05, "loss": 0.5244, "step": 2887 }, { "epoch": 0.7992804262090916, "grad_norm": 0.16909614205360413, "learning_rate": 1e-05, "loss": 0.5543, "step": 2888 }, { "epoch": 0.799557185359441, "grad_norm": 0.19279305636882782, "learning_rate": 1e-05, "loss": 0.5434, "step": 2889 }, { "epoch": 0.7998339445097904, "grad_norm": 0.16921888291835785, "learning_rate": 1e-05, "loss": 0.5485, "step": 2890 }, { "epoch": 0.8001107036601398, "grad_norm": 0.16323362290859222, "learning_rate": 1e-05, "loss": 0.54, "step": 2891 }, { "epoch": 0.8003874628104892, "grad_norm": 0.17777471244335175, "learning_rate": 1e-05, "loss": 0.5354, "step": 2892 }, { "epoch": 0.8006642219608385, "grad_norm": 0.17807160317897797, "learning_rate": 1e-05, "loss": 0.5793, "step": 2893 }, { "epoch": 0.8009409811111879, "grad_norm": 0.1650971919298172, "learning_rate": 1e-05, "loss": 0.5489, "step": 2894 }, { "epoch": 0.8012177402615374, "grad_norm": 0.17796245217323303, "learning_rate": 1e-05, "loss": 0.5423, "step": 2895 }, { "epoch": 0.8014944994118868, "grad_norm": 0.17180423438549042, "learning_rate": 1e-05, "loss": 0.5579, "step": 2896 }, { "epoch": 0.8017712585622362, "grad_norm": 0.1781865507364273, "learning_rate": 1e-05, "loss": 0.5256, "step": 2897 }, { "epoch": 0.8020480177125856, "grad_norm": 0.17099997401237488, "learning_rate": 1e-05, "loss": 0.5516, "step": 2898 }, { "epoch": 0.802324776862935, "grad_norm": 0.17504078149795532, "learning_rate": 1e-05, "loss": 0.5706, "step": 2899 }, { "epoch": 0.8026015360132844, "grad_norm": 0.16882970929145813, "learning_rate": 1e-05, "loss": 0.5468, "step": 2900 }, { "epoch": 0.8028782951636338, "grad_norm": 0.17433103919029236, "learning_rate": 1e-05, "loss": 0.5674, "step": 2901 }, { "epoch": 0.8031550543139833, "grad_norm": 0.17494931817054749, "learning_rate": 1e-05, "loss": 0.573, "step": 2902 }, { "epoch": 0.8034318134643327, "grad_norm": 0.17155954241752625, "learning_rate": 1e-05, "loss": 0.5414, "step": 2903 }, { "epoch": 0.8037085726146821, "grad_norm": 0.16779260337352753, "learning_rate": 1e-05, "loss": 0.5349, "step": 2904 }, { "epoch": 0.8039853317650315, "grad_norm": 0.16946376860141754, "learning_rate": 1e-05, "loss": 0.5404, "step": 2905 }, { "epoch": 0.8042620909153809, "grad_norm": 0.16853155195713043, "learning_rate": 1e-05, "loss": 0.5752, "step": 2906 }, { "epoch": 0.8045388500657303, "grad_norm": 0.17475196719169617, "learning_rate": 1e-05, "loss": 0.5579, "step": 2907 }, { "epoch": 0.8048156092160798, "grad_norm": 0.17400500178337097, "learning_rate": 1e-05, "loss": 0.5345, "step": 2908 }, { "epoch": 0.8050923683664292, "grad_norm": 0.17862708866596222, "learning_rate": 1e-05, "loss": 0.5505, "step": 2909 }, { "epoch": 0.8053691275167785, "grad_norm": 0.182284876704216, "learning_rate": 1e-05, "loss": 0.5632, "step": 2910 }, { "epoch": 0.8056458866671279, "grad_norm": 0.1640438586473465, "learning_rate": 1e-05, "loss": 0.5237, "step": 2911 }, { "epoch": 0.8059226458174773, "grad_norm": 0.17225521802902222, "learning_rate": 1e-05, "loss": 0.5455, "step": 2912 }, { "epoch": 0.8061994049678267, "grad_norm": 0.1763138324022293, "learning_rate": 1e-05, "loss": 0.5196, "step": 2913 }, { "epoch": 0.8064761641181761, "grad_norm": 0.16859211027622223, "learning_rate": 1e-05, "loss": 0.5264, "step": 2914 }, { "epoch": 0.8067529232685255, "grad_norm": 0.17812186479568481, "learning_rate": 1e-05, "loss": 0.5823, "step": 2915 }, { "epoch": 0.807029682418875, "grad_norm": 0.1782657951116562, "learning_rate": 1e-05, "loss": 0.5373, "step": 2916 }, { "epoch": 0.8073064415692244, "grad_norm": 0.16175705194473267, "learning_rate": 1e-05, "loss": 0.5357, "step": 2917 }, { "epoch": 0.8075832007195738, "grad_norm": 0.17180763185024261, "learning_rate": 1e-05, "loss": 0.5472, "step": 2918 }, { "epoch": 0.8078599598699232, "grad_norm": 0.1723909229040146, "learning_rate": 1e-05, "loss": 0.53, "step": 2919 }, { "epoch": 0.8081367190202726, "grad_norm": 0.16997729241847992, "learning_rate": 1e-05, "loss": 0.5255, "step": 2920 }, { "epoch": 0.808413478170622, "grad_norm": 0.16852670907974243, "learning_rate": 1e-05, "loss": 0.538, "step": 2921 }, { "epoch": 0.8086902373209715, "grad_norm": 0.16955693066120148, "learning_rate": 1e-05, "loss": 0.5476, "step": 2922 }, { "epoch": 0.8089669964713209, "grad_norm": 0.17206166684627533, "learning_rate": 1e-05, "loss": 0.5498, "step": 2923 }, { "epoch": 0.8092437556216703, "grad_norm": 0.16324271261692047, "learning_rate": 1e-05, "loss": 0.52, "step": 2924 }, { "epoch": 0.8095205147720197, "grad_norm": 0.17265886068344116, "learning_rate": 1e-05, "loss": 0.5363, "step": 2925 }, { "epoch": 0.8097972739223691, "grad_norm": 0.17217829823493958, "learning_rate": 1e-05, "loss": 0.5485, "step": 2926 }, { "epoch": 0.8100740330727184, "grad_norm": 0.176965594291687, "learning_rate": 1e-05, "loss": 0.5916, "step": 2927 }, { "epoch": 0.8103507922230678, "grad_norm": 0.16991929709911346, "learning_rate": 1e-05, "loss": 0.5421, "step": 2928 }, { "epoch": 0.8106275513734172, "grad_norm": 0.16964048147201538, "learning_rate": 1e-05, "loss": 0.5587, "step": 2929 }, { "epoch": 0.8109043105237667, "grad_norm": 0.1619676947593689, "learning_rate": 1e-05, "loss": 0.558, "step": 2930 }, { "epoch": 0.8111810696741161, "grad_norm": 0.1576359122991562, "learning_rate": 1e-05, "loss": 0.5457, "step": 2931 }, { "epoch": 0.8114578288244655, "grad_norm": 0.1708419770002365, "learning_rate": 1e-05, "loss": 0.5783, "step": 2932 }, { "epoch": 0.8117345879748149, "grad_norm": 0.1720888763666153, "learning_rate": 1e-05, "loss": 0.5166, "step": 2933 }, { "epoch": 0.8120113471251643, "grad_norm": 0.1642705202102661, "learning_rate": 1e-05, "loss": 0.5535, "step": 2934 }, { "epoch": 0.8122881062755137, "grad_norm": 0.16248668730258942, "learning_rate": 1e-05, "loss": 0.5522, "step": 2935 }, { "epoch": 0.8125648654258631, "grad_norm": 0.16619190573692322, "learning_rate": 1e-05, "loss": 0.5338, "step": 2936 }, { "epoch": 0.8128416245762126, "grad_norm": 0.17124606668949127, "learning_rate": 1e-05, "loss": 0.5518, "step": 2937 }, { "epoch": 0.813118383726562, "grad_norm": 0.16693778336048126, "learning_rate": 1e-05, "loss": 0.5465, "step": 2938 }, { "epoch": 0.8133951428769114, "grad_norm": 0.16950076818466187, "learning_rate": 1e-05, "loss": 0.5397, "step": 2939 }, { "epoch": 0.8136719020272608, "grad_norm": 0.16655461490154266, "learning_rate": 1e-05, "loss": 0.54, "step": 2940 }, { "epoch": 0.8139486611776102, "grad_norm": 0.16716182231903076, "learning_rate": 1e-05, "loss": 0.5385, "step": 2941 }, { "epoch": 0.8142254203279596, "grad_norm": 0.17215977609157562, "learning_rate": 1e-05, "loss": 0.5532, "step": 2942 }, { "epoch": 0.814502179478309, "grad_norm": 0.1670747697353363, "learning_rate": 1e-05, "loss": 0.5295, "step": 2943 }, { "epoch": 0.8147789386286584, "grad_norm": 0.1688673347234726, "learning_rate": 1e-05, "loss": 0.5659, "step": 2944 }, { "epoch": 0.8150556977790078, "grad_norm": 0.1821361929178238, "learning_rate": 1e-05, "loss": 0.5584, "step": 2945 }, { "epoch": 0.8153324569293572, "grad_norm": 0.17583809792995453, "learning_rate": 1e-05, "loss": 0.5335, "step": 2946 }, { "epoch": 0.8156092160797066, "grad_norm": 0.17248739302158356, "learning_rate": 1e-05, "loss": 0.5478, "step": 2947 }, { "epoch": 0.815885975230056, "grad_norm": 0.17805232107639313, "learning_rate": 1e-05, "loss": 0.5476, "step": 2948 }, { "epoch": 0.8161627343804054, "grad_norm": 0.17441006004810333, "learning_rate": 1e-05, "loss": 0.5856, "step": 2949 }, { "epoch": 0.8164394935307548, "grad_norm": 0.17935536801815033, "learning_rate": 1e-05, "loss": 0.5391, "step": 2950 }, { "epoch": 0.8167162526811043, "grad_norm": 0.17200230062007904, "learning_rate": 1e-05, "loss": 0.5836, "step": 2951 }, { "epoch": 0.8169930118314537, "grad_norm": 0.1747102290391922, "learning_rate": 1e-05, "loss": 0.5572, "step": 2952 }, { "epoch": 0.8172697709818031, "grad_norm": 0.17655831575393677, "learning_rate": 1e-05, "loss": 0.5259, "step": 2953 }, { "epoch": 0.8175465301321525, "grad_norm": 0.168531596660614, "learning_rate": 1e-05, "loss": 0.5389, "step": 2954 }, { "epoch": 0.8178232892825019, "grad_norm": 0.18427175283432007, "learning_rate": 1e-05, "loss": 0.5671, "step": 2955 }, { "epoch": 0.8181000484328513, "grad_norm": 0.17027483880519867, "learning_rate": 1e-05, "loss": 0.5798, "step": 2956 }, { "epoch": 0.8183768075832007, "grad_norm": 0.17027164995670319, "learning_rate": 1e-05, "loss": 0.5353, "step": 2957 }, { "epoch": 0.8186535667335502, "grad_norm": 0.16667883098125458, "learning_rate": 1e-05, "loss": 0.532, "step": 2958 }, { "epoch": 0.8189303258838996, "grad_norm": 0.17354825139045715, "learning_rate": 1e-05, "loss": 0.5315, "step": 2959 }, { "epoch": 0.819207085034249, "grad_norm": 0.1672859489917755, "learning_rate": 1e-05, "loss": 0.5622, "step": 2960 }, { "epoch": 0.8194838441845983, "grad_norm": 0.17036589980125427, "learning_rate": 1e-05, "loss": 0.5468, "step": 2961 }, { "epoch": 0.8197606033349477, "grad_norm": 0.17532959580421448, "learning_rate": 1e-05, "loss": 0.5545, "step": 2962 }, { "epoch": 0.8200373624852971, "grad_norm": 0.16994069516658783, "learning_rate": 1e-05, "loss": 0.5774, "step": 2963 }, { "epoch": 0.8203141216356465, "grad_norm": 0.17329220473766327, "learning_rate": 1e-05, "loss": 0.5395, "step": 2964 }, { "epoch": 0.820590880785996, "grad_norm": 0.18293745815753937, "learning_rate": 1e-05, "loss": 0.5501, "step": 2965 }, { "epoch": 0.8208676399363454, "grad_norm": 0.1667466014623642, "learning_rate": 1e-05, "loss": 0.5554, "step": 2966 }, { "epoch": 0.8211443990866948, "grad_norm": 0.16906803846359253, "learning_rate": 1e-05, "loss": 0.5313, "step": 2967 }, { "epoch": 0.8214211582370442, "grad_norm": 0.17087194323539734, "learning_rate": 1e-05, "loss": 0.5342, "step": 2968 }, { "epoch": 0.8216979173873936, "grad_norm": 0.174810528755188, "learning_rate": 1e-05, "loss": 0.5245, "step": 2969 }, { "epoch": 0.821974676537743, "grad_norm": 0.17350119352340698, "learning_rate": 1e-05, "loss": 0.5345, "step": 2970 }, { "epoch": 0.8222514356880924, "grad_norm": 0.1709146648645401, "learning_rate": 1e-05, "loss": 0.5476, "step": 2971 }, { "epoch": 0.8225281948384419, "grad_norm": 0.16918858885765076, "learning_rate": 1e-05, "loss": 0.5064, "step": 2972 }, { "epoch": 0.8228049539887913, "grad_norm": 0.1733410805463791, "learning_rate": 1e-05, "loss": 0.5628, "step": 2973 }, { "epoch": 0.8230817131391407, "grad_norm": 0.16719189286231995, "learning_rate": 1e-05, "loss": 0.5245, "step": 2974 }, { "epoch": 0.8233584722894901, "grad_norm": 0.16426025331020355, "learning_rate": 1e-05, "loss": 0.5404, "step": 2975 }, { "epoch": 0.8236352314398395, "grad_norm": 0.15933836996555328, "learning_rate": 1e-05, "loss": 0.5192, "step": 2976 }, { "epoch": 0.8239119905901889, "grad_norm": 0.17306837439537048, "learning_rate": 1e-05, "loss": 0.5662, "step": 2977 }, { "epoch": 0.8241887497405384, "grad_norm": 0.1704317033290863, "learning_rate": 1e-05, "loss": 0.5497, "step": 2978 }, { "epoch": 0.8244655088908877, "grad_norm": 0.18564698100090027, "learning_rate": 1e-05, "loss": 0.5407, "step": 2979 }, { "epoch": 0.8247422680412371, "grad_norm": 0.16937625408172607, "learning_rate": 1e-05, "loss": 0.5718, "step": 2980 }, { "epoch": 0.8250190271915865, "grad_norm": 0.16593968868255615, "learning_rate": 1e-05, "loss": 0.5732, "step": 2981 }, { "epoch": 0.8252957863419359, "grad_norm": 0.16154183447360992, "learning_rate": 1e-05, "loss": 0.5342, "step": 2982 }, { "epoch": 0.8255725454922853, "grad_norm": 0.17240622639656067, "learning_rate": 1e-05, "loss": 0.5316, "step": 2983 }, { "epoch": 0.8258493046426347, "grad_norm": 0.1795681118965149, "learning_rate": 1e-05, "loss": 0.5475, "step": 2984 }, { "epoch": 0.8261260637929841, "grad_norm": 0.17450197041034698, "learning_rate": 1e-05, "loss": 0.5341, "step": 2985 }, { "epoch": 0.8264028229433336, "grad_norm": 0.17587175965309143, "learning_rate": 1e-05, "loss": 0.5456, "step": 2986 }, { "epoch": 0.826679582093683, "grad_norm": 0.1733914464712143, "learning_rate": 1e-05, "loss": 0.5637, "step": 2987 }, { "epoch": 0.8269563412440324, "grad_norm": 0.1725868284702301, "learning_rate": 1e-05, "loss": 0.5348, "step": 2988 }, { "epoch": 0.8272331003943818, "grad_norm": 0.1673918217420578, "learning_rate": 1e-05, "loss": 0.5299, "step": 2989 }, { "epoch": 0.8275098595447312, "grad_norm": 0.17039747536182404, "learning_rate": 1e-05, "loss": 0.5728, "step": 2990 }, { "epoch": 0.8277866186950806, "grad_norm": 0.16689322888851166, "learning_rate": 1e-05, "loss": 0.5347, "step": 2991 }, { "epoch": 0.82806337784543, "grad_norm": 0.17655062675476074, "learning_rate": 1e-05, "loss": 0.5758, "step": 2992 }, { "epoch": 0.8283401369957795, "grad_norm": 0.17180916666984558, "learning_rate": 1e-05, "loss": 0.5628, "step": 2993 }, { "epoch": 0.8286168961461289, "grad_norm": 0.16136707365512848, "learning_rate": 1e-05, "loss": 0.5162, "step": 2994 }, { "epoch": 0.8288936552964783, "grad_norm": 0.16771753132343292, "learning_rate": 1e-05, "loss": 0.5182, "step": 2995 }, { "epoch": 0.8291704144468276, "grad_norm": 0.17374348640441895, "learning_rate": 1e-05, "loss": 0.5601, "step": 2996 }, { "epoch": 0.829447173597177, "grad_norm": 0.1793673038482666, "learning_rate": 1e-05, "loss": 0.5493, "step": 2997 }, { "epoch": 0.8297239327475264, "grad_norm": 0.17336037755012512, "learning_rate": 1e-05, "loss": 0.542, "step": 2998 }, { "epoch": 0.8300006918978758, "grad_norm": 0.16323933005332947, "learning_rate": 1e-05, "loss": 0.5163, "step": 2999 }, { "epoch": 0.8302774510482253, "grad_norm": 0.16800573468208313, "learning_rate": 1e-05, "loss": 0.5211, "step": 3000 }, { "epoch": 0.8305542101985747, "grad_norm": 0.16986382007598877, "learning_rate": 1e-05, "loss": 0.5555, "step": 3001 }, { "epoch": 0.8308309693489241, "grad_norm": 0.1707543432712555, "learning_rate": 1e-05, "loss": 0.5738, "step": 3002 }, { "epoch": 0.8311077284992735, "grad_norm": 0.16683264076709747, "learning_rate": 1e-05, "loss": 0.5644, "step": 3003 }, { "epoch": 0.8313844876496229, "grad_norm": 0.1756633073091507, "learning_rate": 1e-05, "loss": 0.5689, "step": 3004 }, { "epoch": 0.8316612467999723, "grad_norm": 0.15977367758750916, "learning_rate": 1e-05, "loss": 0.5415, "step": 3005 }, { "epoch": 0.8319380059503217, "grad_norm": 0.17142276465892792, "learning_rate": 1e-05, "loss": 0.5389, "step": 3006 }, { "epoch": 0.8322147651006712, "grad_norm": 0.17853157222270966, "learning_rate": 1e-05, "loss": 0.581, "step": 3007 }, { "epoch": 0.8324915242510206, "grad_norm": 0.1761643886566162, "learning_rate": 1e-05, "loss": 0.5475, "step": 3008 }, { "epoch": 0.83276828340137, "grad_norm": 0.16247418522834778, "learning_rate": 1e-05, "loss": 0.5461, "step": 3009 }, { "epoch": 0.8330450425517194, "grad_norm": 0.17372946441173553, "learning_rate": 1e-05, "loss": 0.5537, "step": 3010 }, { "epoch": 0.8333218017020688, "grad_norm": 0.18713460862636566, "learning_rate": 1e-05, "loss": 0.5622, "step": 3011 }, { "epoch": 0.8335985608524182, "grad_norm": 0.16530995070934296, "learning_rate": 1e-05, "loss": 0.5942, "step": 3012 }, { "epoch": 0.8338753200027675, "grad_norm": 0.1691153645515442, "learning_rate": 1e-05, "loss": 0.5202, "step": 3013 }, { "epoch": 0.834152079153117, "grad_norm": 0.17008765041828156, "learning_rate": 1e-05, "loss": 0.5395, "step": 3014 }, { "epoch": 0.8344288383034664, "grad_norm": 0.16734033823013306, "learning_rate": 1e-05, "loss": 0.5206, "step": 3015 }, { "epoch": 0.8347055974538158, "grad_norm": 0.16864658892154694, "learning_rate": 1e-05, "loss": 0.534, "step": 3016 }, { "epoch": 0.8349823566041652, "grad_norm": 0.1711084246635437, "learning_rate": 1e-05, "loss": 0.5231, "step": 3017 }, { "epoch": 0.8352591157545146, "grad_norm": 0.17574897408485413, "learning_rate": 1e-05, "loss": 0.5478, "step": 3018 }, { "epoch": 0.835535874904864, "grad_norm": 0.1696849912405014, "learning_rate": 1e-05, "loss": 0.5347, "step": 3019 }, { "epoch": 0.8358126340552134, "grad_norm": 0.17245711386203766, "learning_rate": 1e-05, "loss": 0.5622, "step": 3020 }, { "epoch": 0.8360893932055629, "grad_norm": 0.16655421257019043, "learning_rate": 1e-05, "loss": 0.5317, "step": 3021 }, { "epoch": 0.8363661523559123, "grad_norm": 0.17682930827140808, "learning_rate": 1e-05, "loss": 0.5261, "step": 3022 }, { "epoch": 0.8366429115062617, "grad_norm": 0.17335867881774902, "learning_rate": 1e-05, "loss": 0.5466, "step": 3023 }, { "epoch": 0.8369196706566111, "grad_norm": 0.16427671909332275, "learning_rate": 1e-05, "loss": 0.5416, "step": 3024 }, { "epoch": 0.8371964298069605, "grad_norm": 0.17802628874778748, "learning_rate": 1e-05, "loss": 0.5429, "step": 3025 }, { "epoch": 0.8374731889573099, "grad_norm": 0.17160214483737946, "learning_rate": 1e-05, "loss": 0.5426, "step": 3026 }, { "epoch": 0.8377499481076593, "grad_norm": 0.1754649579524994, "learning_rate": 1e-05, "loss": 0.5695, "step": 3027 }, { "epoch": 0.8380267072580088, "grad_norm": 0.17827630043029785, "learning_rate": 1e-05, "loss": 0.5743, "step": 3028 }, { "epoch": 0.8383034664083582, "grad_norm": 0.16901616752147675, "learning_rate": 1e-05, "loss": 0.5276, "step": 3029 }, { "epoch": 0.8385802255587075, "grad_norm": 0.17221927642822266, "learning_rate": 1e-05, "loss": 0.5593, "step": 3030 }, { "epoch": 0.8388569847090569, "grad_norm": 0.17241103947162628, "learning_rate": 1e-05, "loss": 0.5653, "step": 3031 }, { "epoch": 0.8391337438594063, "grad_norm": 0.17767353355884552, "learning_rate": 1e-05, "loss": 0.5492, "step": 3032 }, { "epoch": 0.8394105030097557, "grad_norm": 0.17279064655303955, "learning_rate": 1e-05, "loss": 0.5718, "step": 3033 }, { "epoch": 0.8396872621601051, "grad_norm": 0.18263421952724457, "learning_rate": 1e-05, "loss": 0.5434, "step": 3034 }, { "epoch": 0.8399640213104546, "grad_norm": 0.17813631892204285, "learning_rate": 1e-05, "loss": 0.5381, "step": 3035 }, { "epoch": 0.840240780460804, "grad_norm": 0.17204280197620392, "learning_rate": 1e-05, "loss": 0.5248, "step": 3036 }, { "epoch": 0.8405175396111534, "grad_norm": 0.18195730447769165, "learning_rate": 1e-05, "loss": 0.5651, "step": 3037 }, { "epoch": 0.8407942987615028, "grad_norm": 0.1680951714515686, "learning_rate": 1e-05, "loss": 0.5365, "step": 3038 }, { "epoch": 0.8410710579118522, "grad_norm": 0.1725459098815918, "learning_rate": 1e-05, "loss": 0.5421, "step": 3039 }, { "epoch": 0.8413478170622016, "grad_norm": 0.176632821559906, "learning_rate": 1e-05, "loss": 0.5631, "step": 3040 }, { "epoch": 0.841624576212551, "grad_norm": 0.18290533125400543, "learning_rate": 1e-05, "loss": 0.5566, "step": 3041 }, { "epoch": 0.8419013353629005, "grad_norm": 0.1808788925409317, "learning_rate": 1e-05, "loss": 0.5209, "step": 3042 }, { "epoch": 0.8421780945132499, "grad_norm": 0.17783433198928833, "learning_rate": 1e-05, "loss": 0.531, "step": 3043 }, { "epoch": 0.8424548536635993, "grad_norm": 0.1769789755344391, "learning_rate": 1e-05, "loss": 0.5735, "step": 3044 }, { "epoch": 0.8427316128139487, "grad_norm": 0.1800096184015274, "learning_rate": 1e-05, "loss": 0.5602, "step": 3045 }, { "epoch": 0.8430083719642981, "grad_norm": 0.1758996993303299, "learning_rate": 1e-05, "loss": 0.5543, "step": 3046 }, { "epoch": 0.8432851311146474, "grad_norm": 0.1728782057762146, "learning_rate": 1e-05, "loss": 0.5936, "step": 3047 }, { "epoch": 0.8435618902649968, "grad_norm": 0.17076872289180756, "learning_rate": 1e-05, "loss": 0.5348, "step": 3048 }, { "epoch": 0.8438386494153463, "grad_norm": 0.1708354949951172, "learning_rate": 1e-05, "loss": 0.546, "step": 3049 }, { "epoch": 0.8441154085656957, "grad_norm": 0.17266380786895752, "learning_rate": 1e-05, "loss": 0.5471, "step": 3050 }, { "epoch": 0.8443921677160451, "grad_norm": 0.16970618069171906, "learning_rate": 1e-05, "loss": 0.5306, "step": 3051 }, { "epoch": 0.8446689268663945, "grad_norm": 0.1688990592956543, "learning_rate": 1e-05, "loss": 0.5293, "step": 3052 }, { "epoch": 0.8449456860167439, "grad_norm": 0.17158186435699463, "learning_rate": 1e-05, "loss": 0.5545, "step": 3053 }, { "epoch": 0.8452224451670933, "grad_norm": 0.1705106496810913, "learning_rate": 1e-05, "loss": 0.5202, "step": 3054 }, { "epoch": 0.8454992043174427, "grad_norm": 0.1763305813074112, "learning_rate": 1e-05, "loss": 0.5355, "step": 3055 }, { "epoch": 0.8457759634677922, "grad_norm": 0.1710885912179947, "learning_rate": 1e-05, "loss": 0.5358, "step": 3056 }, { "epoch": 0.8460527226181416, "grad_norm": 0.1757788062095642, "learning_rate": 1e-05, "loss": 0.5977, "step": 3057 }, { "epoch": 0.846329481768491, "grad_norm": 0.1785188615322113, "learning_rate": 1e-05, "loss": 0.5748, "step": 3058 }, { "epoch": 0.8466062409188404, "grad_norm": 0.17076657712459564, "learning_rate": 1e-05, "loss": 0.5618, "step": 3059 }, { "epoch": 0.8468830000691898, "grad_norm": 0.17793579399585724, "learning_rate": 1e-05, "loss": 0.5577, "step": 3060 }, { "epoch": 0.8471597592195392, "grad_norm": 0.17747214436531067, "learning_rate": 1e-05, "loss": 0.5834, "step": 3061 }, { "epoch": 0.8474365183698886, "grad_norm": 0.16350124776363373, "learning_rate": 1e-05, "loss": 0.5337, "step": 3062 }, { "epoch": 0.8477132775202381, "grad_norm": 0.1732901781797409, "learning_rate": 1e-05, "loss": 0.5432, "step": 3063 }, { "epoch": 0.8479900366705874, "grad_norm": 0.1713840514421463, "learning_rate": 1e-05, "loss": 0.5744, "step": 3064 }, { "epoch": 0.8482667958209368, "grad_norm": 0.17691153287887573, "learning_rate": 1e-05, "loss": 0.5623, "step": 3065 }, { "epoch": 0.8485435549712862, "grad_norm": 0.16591478884220123, "learning_rate": 1e-05, "loss": 0.5051, "step": 3066 }, { "epoch": 0.8488203141216356, "grad_norm": 0.17621451616287231, "learning_rate": 1e-05, "loss": 0.5343, "step": 3067 }, { "epoch": 0.849097073271985, "grad_norm": 0.1793702095746994, "learning_rate": 1e-05, "loss": 0.5509, "step": 3068 }, { "epoch": 0.8493738324223344, "grad_norm": 0.17300420999526978, "learning_rate": 1e-05, "loss": 0.5608, "step": 3069 }, { "epoch": 0.8496505915726839, "grad_norm": 0.16330914199352264, "learning_rate": 1e-05, "loss": 0.5387, "step": 3070 }, { "epoch": 0.8499273507230333, "grad_norm": 0.17058104276657104, "learning_rate": 1e-05, "loss": 0.5737, "step": 3071 }, { "epoch": 0.8502041098733827, "grad_norm": 0.1609831154346466, "learning_rate": 1e-05, "loss": 0.5283, "step": 3072 }, { "epoch": 0.8504808690237321, "grad_norm": 0.16474640369415283, "learning_rate": 1e-05, "loss": 0.5285, "step": 3073 }, { "epoch": 0.8507576281740815, "grad_norm": 0.1747579127550125, "learning_rate": 1e-05, "loss": 0.5389, "step": 3074 }, { "epoch": 0.8510343873244309, "grad_norm": 0.1815209537744522, "learning_rate": 1e-05, "loss": 0.5102, "step": 3075 }, { "epoch": 0.8513111464747803, "grad_norm": 0.16866236925125122, "learning_rate": 1e-05, "loss": 0.5346, "step": 3076 }, { "epoch": 0.8515879056251298, "grad_norm": 0.1771089732646942, "learning_rate": 1e-05, "loss": 0.5701, "step": 3077 }, { "epoch": 0.8518646647754792, "grad_norm": 0.1739141196012497, "learning_rate": 1e-05, "loss": 0.5321, "step": 3078 }, { "epoch": 0.8521414239258286, "grad_norm": 0.1709204912185669, "learning_rate": 1e-05, "loss": 0.527, "step": 3079 }, { "epoch": 0.852418183076178, "grad_norm": 0.1804884374141693, "learning_rate": 1e-05, "loss": 0.568, "step": 3080 }, { "epoch": 0.8526949422265274, "grad_norm": 0.16676285862922668, "learning_rate": 1e-05, "loss": 0.5317, "step": 3081 }, { "epoch": 0.8529717013768767, "grad_norm": 0.17620262503623962, "learning_rate": 1e-05, "loss": 0.5508, "step": 3082 }, { "epoch": 0.8532484605272261, "grad_norm": 0.17438367009162903, "learning_rate": 1e-05, "loss": 0.5289, "step": 3083 }, { "epoch": 0.8535252196775756, "grad_norm": 0.17755062878131866, "learning_rate": 1e-05, "loss": 0.5495, "step": 3084 }, { "epoch": 0.853801978827925, "grad_norm": 0.16977651417255402, "learning_rate": 1e-05, "loss": 0.5451, "step": 3085 }, { "epoch": 0.8540787379782744, "grad_norm": 0.16935773193836212, "learning_rate": 1e-05, "loss": 0.5387, "step": 3086 }, { "epoch": 0.8543554971286238, "grad_norm": 0.17083144187927246, "learning_rate": 1e-05, "loss": 0.5521, "step": 3087 }, { "epoch": 0.8546322562789732, "grad_norm": 0.17242205142974854, "learning_rate": 1e-05, "loss": 0.5777, "step": 3088 }, { "epoch": 0.8549090154293226, "grad_norm": 0.16637566685676575, "learning_rate": 1e-05, "loss": 0.5263, "step": 3089 }, { "epoch": 0.855185774579672, "grad_norm": 0.1725653111934662, "learning_rate": 1e-05, "loss": 0.5443, "step": 3090 }, { "epoch": 0.8554625337300215, "grad_norm": 0.15968820452690125, "learning_rate": 1e-05, "loss": 0.5351, "step": 3091 }, { "epoch": 0.8557392928803709, "grad_norm": 0.16072118282318115, "learning_rate": 1e-05, "loss": 0.5202, "step": 3092 }, { "epoch": 0.8560160520307203, "grad_norm": 0.1734088510274887, "learning_rate": 1e-05, "loss": 0.5552, "step": 3093 }, { "epoch": 0.8562928111810697, "grad_norm": 0.17230623960494995, "learning_rate": 1e-05, "loss": 0.531, "step": 3094 }, { "epoch": 0.8565695703314191, "grad_norm": 0.17225314676761627, "learning_rate": 1e-05, "loss": 0.5086, "step": 3095 }, { "epoch": 0.8568463294817685, "grad_norm": 0.17211340367794037, "learning_rate": 1e-05, "loss": 0.5503, "step": 3096 }, { "epoch": 0.857123088632118, "grad_norm": 0.17051783204078674, "learning_rate": 1e-05, "loss": 0.5429, "step": 3097 }, { "epoch": 0.8573998477824674, "grad_norm": 0.1662530153989792, "learning_rate": 1e-05, "loss": 0.5441, "step": 3098 }, { "epoch": 0.8576766069328167, "grad_norm": 0.1703985631465912, "learning_rate": 1e-05, "loss": 0.5567, "step": 3099 }, { "epoch": 0.8579533660831661, "grad_norm": 0.16268779337406158, "learning_rate": 1e-05, "loss": 0.5511, "step": 3100 }, { "epoch": 0.8582301252335155, "grad_norm": 0.17199379205703735, "learning_rate": 1e-05, "loss": 0.5886, "step": 3101 }, { "epoch": 0.8585068843838649, "grad_norm": 0.17662371695041656, "learning_rate": 1e-05, "loss": 0.5326, "step": 3102 }, { "epoch": 0.8587836435342143, "grad_norm": 0.17540766298770905, "learning_rate": 1e-05, "loss": 0.5326, "step": 3103 }, { "epoch": 0.8590604026845637, "grad_norm": 0.16868863999843597, "learning_rate": 1e-05, "loss": 0.5612, "step": 3104 }, { "epoch": 0.8593371618349132, "grad_norm": 0.18714043498039246, "learning_rate": 1e-05, "loss": 0.571, "step": 3105 }, { "epoch": 0.8596139209852626, "grad_norm": 0.17731882631778717, "learning_rate": 1e-05, "loss": 0.5464, "step": 3106 }, { "epoch": 0.859890680135612, "grad_norm": 0.17002016305923462, "learning_rate": 1e-05, "loss": 0.5377, "step": 3107 }, { "epoch": 0.8601674392859614, "grad_norm": 0.17288091778755188, "learning_rate": 1e-05, "loss": 0.549, "step": 3108 }, { "epoch": 0.8604441984363108, "grad_norm": 0.1697467863559723, "learning_rate": 1e-05, "loss": 0.549, "step": 3109 }, { "epoch": 0.8607209575866602, "grad_norm": 0.16299717128276825, "learning_rate": 1e-05, "loss": 0.5347, "step": 3110 }, { "epoch": 0.8609977167370096, "grad_norm": 0.16570106148719788, "learning_rate": 1e-05, "loss": 0.5392, "step": 3111 }, { "epoch": 0.8612744758873591, "grad_norm": 0.17444342374801636, "learning_rate": 1e-05, "loss": 0.54, "step": 3112 }, { "epoch": 0.8615512350377085, "grad_norm": 0.1751585304737091, "learning_rate": 1e-05, "loss": 0.5341, "step": 3113 }, { "epoch": 0.8618279941880579, "grad_norm": 0.17262916266918182, "learning_rate": 1e-05, "loss": 0.556, "step": 3114 }, { "epoch": 0.8621047533384073, "grad_norm": 0.17912612855434418, "learning_rate": 1e-05, "loss": 0.5581, "step": 3115 }, { "epoch": 0.8623815124887566, "grad_norm": 0.16335858404636383, "learning_rate": 1e-05, "loss": 0.5576, "step": 3116 }, { "epoch": 0.862658271639106, "grad_norm": 0.1793373078107834, "learning_rate": 1e-05, "loss": 0.5758, "step": 3117 }, { "epoch": 0.8629350307894554, "grad_norm": 0.1670244336128235, "learning_rate": 1e-05, "loss": 0.5613, "step": 3118 }, { "epoch": 0.8632117899398049, "grad_norm": 0.16898614168167114, "learning_rate": 1e-05, "loss": 0.5752, "step": 3119 }, { "epoch": 0.8634885490901543, "grad_norm": 0.1646735519170761, "learning_rate": 1e-05, "loss": 0.5183, "step": 3120 }, { "epoch": 0.8637653082405037, "grad_norm": 0.16811850666999817, "learning_rate": 1e-05, "loss": 0.5521, "step": 3121 }, { "epoch": 0.8640420673908531, "grad_norm": 0.16658636927604675, "learning_rate": 1e-05, "loss": 0.5541, "step": 3122 }, { "epoch": 0.8643188265412025, "grad_norm": 0.17932505905628204, "learning_rate": 1e-05, "loss": 0.5656, "step": 3123 }, { "epoch": 0.8645955856915519, "grad_norm": 0.17094548046588898, "learning_rate": 1e-05, "loss": 0.561, "step": 3124 }, { "epoch": 0.8648723448419013, "grad_norm": 0.17029404640197754, "learning_rate": 1e-05, "loss": 0.5752, "step": 3125 }, { "epoch": 0.8651491039922508, "grad_norm": 0.1737131029367447, "learning_rate": 1e-05, "loss": 0.5471, "step": 3126 }, { "epoch": 0.8654258631426002, "grad_norm": 0.17367152869701385, "learning_rate": 1e-05, "loss": 0.5477, "step": 3127 }, { "epoch": 0.8657026222929496, "grad_norm": 0.1768818199634552, "learning_rate": 1e-05, "loss": 0.5888, "step": 3128 }, { "epoch": 0.865979381443299, "grad_norm": 0.1709568351507187, "learning_rate": 1e-05, "loss": 0.5373, "step": 3129 }, { "epoch": 0.8662561405936484, "grad_norm": 0.17473232746124268, "learning_rate": 1e-05, "loss": 0.5679, "step": 3130 }, { "epoch": 0.8665328997439978, "grad_norm": 0.17406263947486877, "learning_rate": 1e-05, "loss": 0.5837, "step": 3131 }, { "epoch": 0.8668096588943472, "grad_norm": 0.16815535724163055, "learning_rate": 1e-05, "loss": 0.5541, "step": 3132 }, { "epoch": 0.8670864180446965, "grad_norm": 0.17584796249866486, "learning_rate": 1e-05, "loss": 0.5256, "step": 3133 }, { "epoch": 0.867363177195046, "grad_norm": 0.16935645043849945, "learning_rate": 1e-05, "loss": 0.5507, "step": 3134 }, { "epoch": 0.8676399363453954, "grad_norm": 0.17190711200237274, "learning_rate": 1e-05, "loss": 0.5411, "step": 3135 }, { "epoch": 0.8679166954957448, "grad_norm": 0.16989538073539734, "learning_rate": 1e-05, "loss": 0.5262, "step": 3136 }, { "epoch": 0.8681934546460942, "grad_norm": 0.16617731750011444, "learning_rate": 1e-05, "loss": 0.5373, "step": 3137 }, { "epoch": 0.8684702137964436, "grad_norm": 0.16779039800167084, "learning_rate": 1e-05, "loss": 0.5363, "step": 3138 }, { "epoch": 0.868746972946793, "grad_norm": 0.16819508373737335, "learning_rate": 1e-05, "loss": 0.5149, "step": 3139 }, { "epoch": 0.8690237320971425, "grad_norm": 0.16157937049865723, "learning_rate": 1e-05, "loss": 0.5347, "step": 3140 }, { "epoch": 0.8693004912474919, "grad_norm": 0.17440944910049438, "learning_rate": 1e-05, "loss": 0.5441, "step": 3141 }, { "epoch": 0.8695772503978413, "grad_norm": 0.17608119547367096, "learning_rate": 1e-05, "loss": 0.5464, "step": 3142 }, { "epoch": 0.8698540095481907, "grad_norm": 0.1772918552160263, "learning_rate": 1e-05, "loss": 0.5344, "step": 3143 }, { "epoch": 0.8701307686985401, "grad_norm": 0.16812512278556824, "learning_rate": 1e-05, "loss": 0.5214, "step": 3144 }, { "epoch": 0.8704075278488895, "grad_norm": 0.1668679118156433, "learning_rate": 1e-05, "loss": 0.5424, "step": 3145 }, { "epoch": 0.8706842869992389, "grad_norm": 0.17224152386188507, "learning_rate": 1e-05, "loss": 0.5558, "step": 3146 }, { "epoch": 0.8709610461495884, "grad_norm": 0.16765128076076508, "learning_rate": 1e-05, "loss": 0.5385, "step": 3147 }, { "epoch": 0.8712378052999378, "grad_norm": 0.16839823126792908, "learning_rate": 1e-05, "loss": 0.5325, "step": 3148 }, { "epoch": 0.8715145644502872, "grad_norm": 0.1656477451324463, "learning_rate": 1e-05, "loss": 0.5334, "step": 3149 }, { "epoch": 0.8717913236006365, "grad_norm": 0.1788339912891388, "learning_rate": 1e-05, "loss": 0.5715, "step": 3150 }, { "epoch": 0.8720680827509859, "grad_norm": 0.17296390235424042, "learning_rate": 1e-05, "loss": 0.563, "step": 3151 }, { "epoch": 0.8723448419013353, "grad_norm": 0.1760016530752182, "learning_rate": 1e-05, "loss": 0.5473, "step": 3152 }, { "epoch": 0.8726216010516847, "grad_norm": 0.1653965711593628, "learning_rate": 1e-05, "loss": 0.5294, "step": 3153 }, { "epoch": 0.8728983602020342, "grad_norm": 0.17043089866638184, "learning_rate": 1e-05, "loss": 0.5713, "step": 3154 }, { "epoch": 0.8731751193523836, "grad_norm": 0.1659078150987625, "learning_rate": 1e-05, "loss": 0.5258, "step": 3155 }, { "epoch": 0.873451878502733, "grad_norm": 0.17286022007465363, "learning_rate": 1e-05, "loss": 0.5414, "step": 3156 }, { "epoch": 0.8737286376530824, "grad_norm": 0.16997037827968597, "learning_rate": 1e-05, "loss": 0.5684, "step": 3157 }, { "epoch": 0.8740053968034318, "grad_norm": 0.17028820514678955, "learning_rate": 1e-05, "loss": 0.5136, "step": 3158 }, { "epoch": 0.8742821559537812, "grad_norm": 0.1719018816947937, "learning_rate": 1e-05, "loss": 0.5525, "step": 3159 }, { "epoch": 0.8745589151041306, "grad_norm": 0.1694048047065735, "learning_rate": 1e-05, "loss": 0.5394, "step": 3160 }, { "epoch": 0.8748356742544801, "grad_norm": 0.1669454425573349, "learning_rate": 1e-05, "loss": 0.5443, "step": 3161 }, { "epoch": 0.8751124334048295, "grad_norm": 0.16137750446796417, "learning_rate": 1e-05, "loss": 0.5279, "step": 3162 }, { "epoch": 0.8753891925551789, "grad_norm": 0.17215126752853394, "learning_rate": 1e-05, "loss": 0.5619, "step": 3163 }, { "epoch": 0.8756659517055283, "grad_norm": 0.16459199786186218, "learning_rate": 1e-05, "loss": 0.5515, "step": 3164 }, { "epoch": 0.8759427108558777, "grad_norm": 0.16439692676067352, "learning_rate": 1e-05, "loss": 0.5459, "step": 3165 }, { "epoch": 0.8762194700062271, "grad_norm": 0.1722557544708252, "learning_rate": 1e-05, "loss": 0.5722, "step": 3166 }, { "epoch": 0.8764962291565764, "grad_norm": 0.18108612298965454, "learning_rate": 1e-05, "loss": 0.5792, "step": 3167 }, { "epoch": 0.8767729883069258, "grad_norm": 0.16565243899822235, "learning_rate": 1e-05, "loss": 0.5326, "step": 3168 }, { "epoch": 0.8770497474572753, "grad_norm": 0.16802950203418732, "learning_rate": 1e-05, "loss": 0.5209, "step": 3169 }, { "epoch": 0.8773265066076247, "grad_norm": 0.1695140153169632, "learning_rate": 1e-05, "loss": 0.5224, "step": 3170 }, { "epoch": 0.8776032657579741, "grad_norm": 0.17655864357948303, "learning_rate": 1e-05, "loss": 0.536, "step": 3171 }, { "epoch": 0.8778800249083235, "grad_norm": 0.17550162971019745, "learning_rate": 1e-05, "loss": 0.5452, "step": 3172 }, { "epoch": 0.8781567840586729, "grad_norm": 0.1690964549779892, "learning_rate": 1e-05, "loss": 0.5515, "step": 3173 }, { "epoch": 0.8784335432090223, "grad_norm": 0.17174042761325836, "learning_rate": 1e-05, "loss": 0.543, "step": 3174 }, { "epoch": 0.8787103023593718, "grad_norm": 0.1666693389415741, "learning_rate": 1e-05, "loss": 0.5412, "step": 3175 }, { "epoch": 0.8789870615097212, "grad_norm": 0.16875699162483215, "learning_rate": 1e-05, "loss": 0.5534, "step": 3176 }, { "epoch": 0.8792638206600706, "grad_norm": 0.1709526777267456, "learning_rate": 1e-05, "loss": 0.5543, "step": 3177 }, { "epoch": 0.87954057981042, "grad_norm": 0.16901463270187378, "learning_rate": 1e-05, "loss": 0.5503, "step": 3178 }, { "epoch": 0.8798173389607694, "grad_norm": 0.1731615960597992, "learning_rate": 1e-05, "loss": 0.5709, "step": 3179 }, { "epoch": 0.8800940981111188, "grad_norm": 0.1650460660457611, "learning_rate": 1e-05, "loss": 0.5407, "step": 3180 }, { "epoch": 0.8803708572614682, "grad_norm": 0.16764608025550842, "learning_rate": 1e-05, "loss": 0.5552, "step": 3181 }, { "epoch": 0.8806476164118177, "grad_norm": 0.17073556780815125, "learning_rate": 1e-05, "loss": 0.5726, "step": 3182 }, { "epoch": 0.8809243755621671, "grad_norm": 0.1702551543712616, "learning_rate": 1e-05, "loss": 0.551, "step": 3183 }, { "epoch": 0.8812011347125165, "grad_norm": 0.17580755054950714, "learning_rate": 1e-05, "loss": 0.5757, "step": 3184 }, { "epoch": 0.8814778938628658, "grad_norm": 0.16723723709583282, "learning_rate": 1e-05, "loss": 0.512, "step": 3185 }, { "epoch": 0.8817546530132152, "grad_norm": 0.16274447739124298, "learning_rate": 1e-05, "loss": 0.5555, "step": 3186 }, { "epoch": 0.8820314121635646, "grad_norm": 0.17353567481040955, "learning_rate": 1e-05, "loss": 0.5705, "step": 3187 }, { "epoch": 0.882308171313914, "grad_norm": 0.16892613470554352, "learning_rate": 1e-05, "loss": 0.5353, "step": 3188 }, { "epoch": 0.8825849304642635, "grad_norm": 0.17636896669864655, "learning_rate": 1e-05, "loss": 0.5568, "step": 3189 }, { "epoch": 0.8828616896146129, "grad_norm": 0.17478793859481812, "learning_rate": 1e-05, "loss": 0.5437, "step": 3190 }, { "epoch": 0.8831384487649623, "grad_norm": 0.1743570864200592, "learning_rate": 1e-05, "loss": 0.5218, "step": 3191 }, { "epoch": 0.8834152079153117, "grad_norm": 0.17524099349975586, "learning_rate": 1e-05, "loss": 0.5395, "step": 3192 }, { "epoch": 0.8836919670656611, "grad_norm": 0.17714200913906097, "learning_rate": 1e-05, "loss": 0.5565, "step": 3193 }, { "epoch": 0.8839687262160105, "grad_norm": 0.1722848117351532, "learning_rate": 1e-05, "loss": 0.5664, "step": 3194 }, { "epoch": 0.8842454853663599, "grad_norm": 0.1688724011182785, "learning_rate": 1e-05, "loss": 0.5339, "step": 3195 }, { "epoch": 0.8845222445167094, "grad_norm": 0.17009508609771729, "learning_rate": 1e-05, "loss": 0.5384, "step": 3196 }, { "epoch": 0.8847990036670588, "grad_norm": 0.17937757074832916, "learning_rate": 1e-05, "loss": 0.5281, "step": 3197 }, { "epoch": 0.8850757628174082, "grad_norm": 0.16984179615974426, "learning_rate": 1e-05, "loss": 0.5348, "step": 3198 }, { "epoch": 0.8853525219677576, "grad_norm": 0.18000532686710358, "learning_rate": 1e-05, "loss": 0.5219, "step": 3199 }, { "epoch": 0.885629281118107, "grad_norm": 0.16604845225811005, "learning_rate": 1e-05, "loss": 0.5415, "step": 3200 }, { "epoch": 0.8859060402684564, "grad_norm": 0.16438895463943481, "learning_rate": 1e-05, "loss": 0.5088, "step": 3201 }, { "epoch": 0.8861827994188057, "grad_norm": 0.1878899186849594, "learning_rate": 1e-05, "loss": 0.5573, "step": 3202 }, { "epoch": 0.8864595585691551, "grad_norm": 0.1747640073299408, "learning_rate": 1e-05, "loss": 0.5689, "step": 3203 }, { "epoch": 0.8867363177195046, "grad_norm": 0.1734142303466797, "learning_rate": 1e-05, "loss": 0.5581, "step": 3204 }, { "epoch": 0.887013076869854, "grad_norm": 0.1693023145198822, "learning_rate": 1e-05, "loss": 0.5342, "step": 3205 }, { "epoch": 0.8872898360202034, "grad_norm": 0.1769869178533554, "learning_rate": 1e-05, "loss": 0.543, "step": 3206 }, { "epoch": 0.8875665951705528, "grad_norm": 0.1849977821111679, "learning_rate": 1e-05, "loss": 0.577, "step": 3207 }, { "epoch": 0.8878433543209022, "grad_norm": 0.16413460671901703, "learning_rate": 1e-05, "loss": 0.5264, "step": 3208 }, { "epoch": 0.8881201134712516, "grad_norm": 0.17128288745880127, "learning_rate": 1e-05, "loss": 0.5394, "step": 3209 }, { "epoch": 0.888396872621601, "grad_norm": 0.17402005195617676, "learning_rate": 1e-05, "loss": 0.559, "step": 3210 }, { "epoch": 0.8886736317719505, "grad_norm": 0.1635279655456543, "learning_rate": 1e-05, "loss": 0.5614, "step": 3211 }, { "epoch": 0.8889503909222999, "grad_norm": 0.1692158579826355, "learning_rate": 1e-05, "loss": 0.5105, "step": 3212 }, { "epoch": 0.8892271500726493, "grad_norm": 0.1690138727426529, "learning_rate": 1e-05, "loss": 0.578, "step": 3213 }, { "epoch": 0.8895039092229987, "grad_norm": 0.16994646191596985, "learning_rate": 1e-05, "loss": 0.5228, "step": 3214 }, { "epoch": 0.8897806683733481, "grad_norm": 0.16753093898296356, "learning_rate": 1e-05, "loss": 0.4952, "step": 3215 }, { "epoch": 0.8900574275236975, "grad_norm": 0.17552730441093445, "learning_rate": 1e-05, "loss": 0.5785, "step": 3216 }, { "epoch": 0.890334186674047, "grad_norm": 0.16872546076774597, "learning_rate": 1e-05, "loss": 0.5578, "step": 3217 }, { "epoch": 0.8906109458243964, "grad_norm": 0.1699487268924713, "learning_rate": 1e-05, "loss": 0.5408, "step": 3218 }, { "epoch": 0.8908877049747457, "grad_norm": 0.17285186052322388, "learning_rate": 1e-05, "loss": 0.557, "step": 3219 }, { "epoch": 0.8911644641250951, "grad_norm": 0.16365817189216614, "learning_rate": 1e-05, "loss": 0.55, "step": 3220 }, { "epoch": 0.8914412232754445, "grad_norm": 0.17398396134376526, "learning_rate": 1e-05, "loss": 0.583, "step": 3221 }, { "epoch": 0.8917179824257939, "grad_norm": 0.1708730012178421, "learning_rate": 1e-05, "loss": 0.5837, "step": 3222 }, { "epoch": 0.8919947415761433, "grad_norm": 0.17376358807086945, "learning_rate": 1e-05, "loss": 0.5438, "step": 3223 }, { "epoch": 0.8922715007264928, "grad_norm": 0.17377488315105438, "learning_rate": 1e-05, "loss": 0.5242, "step": 3224 }, { "epoch": 0.8925482598768422, "grad_norm": 0.16425229609012604, "learning_rate": 1e-05, "loss": 0.5125, "step": 3225 }, { "epoch": 0.8928250190271916, "grad_norm": 0.1734534204006195, "learning_rate": 1e-05, "loss": 0.543, "step": 3226 }, { "epoch": 0.893101778177541, "grad_norm": 0.16880455613136292, "learning_rate": 1e-05, "loss": 0.542, "step": 3227 }, { "epoch": 0.8933785373278904, "grad_norm": 0.17625243961811066, "learning_rate": 1e-05, "loss": 0.5301, "step": 3228 }, { "epoch": 0.8936552964782398, "grad_norm": 0.17138774693012238, "learning_rate": 1e-05, "loss": 0.5621, "step": 3229 }, { "epoch": 0.8939320556285892, "grad_norm": 0.17440854012966156, "learning_rate": 1e-05, "loss": 0.5506, "step": 3230 }, { "epoch": 0.8942088147789387, "grad_norm": 0.16908825933933258, "learning_rate": 1e-05, "loss": 0.5503, "step": 3231 }, { "epoch": 0.8944855739292881, "grad_norm": 0.16802161931991577, "learning_rate": 1e-05, "loss": 0.544, "step": 3232 }, { "epoch": 0.8947623330796375, "grad_norm": 0.18300125002861023, "learning_rate": 1e-05, "loss": 0.5445, "step": 3233 }, { "epoch": 0.8950390922299869, "grad_norm": 0.16421082615852356, "learning_rate": 1e-05, "loss": 0.5347, "step": 3234 }, { "epoch": 0.8953158513803363, "grad_norm": 0.16494525969028473, "learning_rate": 1e-05, "loss": 0.5758, "step": 3235 }, { "epoch": 0.8955926105306856, "grad_norm": 0.17123480141162872, "learning_rate": 1e-05, "loss": 0.5362, "step": 3236 }, { "epoch": 0.895869369681035, "grad_norm": 0.17301233112812042, "learning_rate": 1e-05, "loss": 0.5347, "step": 3237 }, { "epoch": 0.8961461288313844, "grad_norm": 0.1790914535522461, "learning_rate": 1e-05, "loss": 0.5542, "step": 3238 }, { "epoch": 0.8964228879817339, "grad_norm": 0.17572462558746338, "learning_rate": 1e-05, "loss": 0.5578, "step": 3239 }, { "epoch": 0.8966996471320833, "grad_norm": 0.1705130636692047, "learning_rate": 1e-05, "loss": 0.5616, "step": 3240 }, { "epoch": 0.8969764062824327, "grad_norm": 0.1734229475259781, "learning_rate": 1e-05, "loss": 0.5546, "step": 3241 }, { "epoch": 0.8972531654327821, "grad_norm": 0.16085095703601837, "learning_rate": 1e-05, "loss": 0.5232, "step": 3242 }, { "epoch": 0.8975299245831315, "grad_norm": 0.16920241713523865, "learning_rate": 1e-05, "loss": 0.5396, "step": 3243 }, { "epoch": 0.8978066837334809, "grad_norm": 0.17248617112636566, "learning_rate": 1e-05, "loss": 0.5475, "step": 3244 }, { "epoch": 0.8980834428838304, "grad_norm": 0.17891517281532288, "learning_rate": 1e-05, "loss": 0.5518, "step": 3245 }, { "epoch": 0.8983602020341798, "grad_norm": 0.1638878881931305, "learning_rate": 1e-05, "loss": 0.529, "step": 3246 }, { "epoch": 0.8986369611845292, "grad_norm": 0.18064774572849274, "learning_rate": 1e-05, "loss": 0.5507, "step": 3247 }, { "epoch": 0.8989137203348786, "grad_norm": 0.17030176520347595, "learning_rate": 1e-05, "loss": 0.5601, "step": 3248 }, { "epoch": 0.899190479485228, "grad_norm": 0.17001782357692719, "learning_rate": 1e-05, "loss": 0.5207, "step": 3249 }, { "epoch": 0.8994672386355774, "grad_norm": 0.1695767641067505, "learning_rate": 1e-05, "loss": 0.5552, "step": 3250 }, { "epoch": 0.8997439977859268, "grad_norm": 0.16272829473018646, "learning_rate": 1e-05, "loss": 0.5488, "step": 3251 }, { "epoch": 0.9000207569362763, "grad_norm": 0.17903189361095428, "learning_rate": 1e-05, "loss": 0.5482, "step": 3252 }, { "epoch": 0.9002975160866256, "grad_norm": 0.1732577085494995, "learning_rate": 1e-05, "loss": 0.5495, "step": 3253 }, { "epoch": 0.900574275236975, "grad_norm": 0.1810714304447174, "learning_rate": 1e-05, "loss": 0.5489, "step": 3254 }, { "epoch": 0.9008510343873244, "grad_norm": 0.16787829995155334, "learning_rate": 1e-05, "loss": 0.5434, "step": 3255 }, { "epoch": 0.9011277935376738, "grad_norm": 0.16996215283870697, "learning_rate": 1e-05, "loss": 0.5594, "step": 3256 }, { "epoch": 0.9014045526880232, "grad_norm": 0.17763014137744904, "learning_rate": 1e-05, "loss": 0.5411, "step": 3257 }, { "epoch": 0.9016813118383726, "grad_norm": 0.17082807421684265, "learning_rate": 1e-05, "loss": 0.5315, "step": 3258 }, { "epoch": 0.901958070988722, "grad_norm": 0.1789068728685379, "learning_rate": 1e-05, "loss": 0.5675, "step": 3259 }, { "epoch": 0.9022348301390715, "grad_norm": 0.17505969107151031, "learning_rate": 1e-05, "loss": 0.5342, "step": 3260 }, { "epoch": 0.9025115892894209, "grad_norm": 0.16842156648635864, "learning_rate": 1e-05, "loss": 0.5569, "step": 3261 }, { "epoch": 0.9027883484397703, "grad_norm": 0.16722342371940613, "learning_rate": 1e-05, "loss": 0.5277, "step": 3262 }, { "epoch": 0.9030651075901197, "grad_norm": 0.18293936550617218, "learning_rate": 1e-05, "loss": 0.5314, "step": 3263 }, { "epoch": 0.9033418667404691, "grad_norm": 0.17100706696510315, "learning_rate": 1e-05, "loss": 0.5605, "step": 3264 }, { "epoch": 0.9036186258908185, "grad_norm": 0.16477295756340027, "learning_rate": 1e-05, "loss": 0.5676, "step": 3265 }, { "epoch": 0.903895385041168, "grad_norm": 0.169800266623497, "learning_rate": 1e-05, "loss": 0.5511, "step": 3266 }, { "epoch": 0.9041721441915174, "grad_norm": 0.16595181822776794, "learning_rate": 1e-05, "loss": 0.5724, "step": 3267 }, { "epoch": 0.9044489033418668, "grad_norm": 0.17274032533168793, "learning_rate": 1e-05, "loss": 0.5726, "step": 3268 }, { "epoch": 0.9047256624922162, "grad_norm": 0.17401283979415894, "learning_rate": 1e-05, "loss": 0.5131, "step": 3269 }, { "epoch": 0.9050024216425655, "grad_norm": 0.16898827254772186, "learning_rate": 1e-05, "loss": 0.5568, "step": 3270 }, { "epoch": 0.9052791807929149, "grad_norm": 0.17228169739246368, "learning_rate": 1e-05, "loss": 0.5452, "step": 3271 }, { "epoch": 0.9055559399432643, "grad_norm": 0.17465677857398987, "learning_rate": 1e-05, "loss": 0.5518, "step": 3272 }, { "epoch": 0.9058326990936137, "grad_norm": 0.17066438496112823, "learning_rate": 1e-05, "loss": 0.5682, "step": 3273 }, { "epoch": 0.9061094582439632, "grad_norm": 0.16548432409763336, "learning_rate": 1e-05, "loss": 0.5456, "step": 3274 }, { "epoch": 0.9063862173943126, "grad_norm": 0.1755075752735138, "learning_rate": 1e-05, "loss": 0.5572, "step": 3275 }, { "epoch": 0.906662976544662, "grad_norm": 0.16964775323867798, "learning_rate": 1e-05, "loss": 0.5476, "step": 3276 }, { "epoch": 0.9069397356950114, "grad_norm": 0.16621941328048706, "learning_rate": 1e-05, "loss": 0.5162, "step": 3277 }, { "epoch": 0.9072164948453608, "grad_norm": 0.16387487947940826, "learning_rate": 1e-05, "loss": 0.5075, "step": 3278 }, { "epoch": 0.9074932539957102, "grad_norm": 0.17010872066020966, "learning_rate": 1e-05, "loss": 0.5694, "step": 3279 }, { "epoch": 0.9077700131460597, "grad_norm": 0.1645568609237671, "learning_rate": 1e-05, "loss": 0.5319, "step": 3280 }, { "epoch": 0.9080467722964091, "grad_norm": 0.17289936542510986, "learning_rate": 1e-05, "loss": 0.5761, "step": 3281 }, { "epoch": 0.9083235314467585, "grad_norm": 0.16970166563987732, "learning_rate": 1e-05, "loss": 0.5377, "step": 3282 }, { "epoch": 0.9086002905971079, "grad_norm": 0.18245717883110046, "learning_rate": 1e-05, "loss": 0.5411, "step": 3283 }, { "epoch": 0.9088770497474573, "grad_norm": 0.17651692032814026, "learning_rate": 1e-05, "loss": 0.5447, "step": 3284 }, { "epoch": 0.9091538088978067, "grad_norm": 0.17016908526420593, "learning_rate": 1e-05, "loss": 0.536, "step": 3285 }, { "epoch": 0.9094305680481561, "grad_norm": 0.17373572289943695, "learning_rate": 1e-05, "loss": 0.5415, "step": 3286 }, { "epoch": 0.9097073271985056, "grad_norm": 0.16111133992671967, "learning_rate": 1e-05, "loss": 0.5428, "step": 3287 }, { "epoch": 0.9099840863488549, "grad_norm": 0.17304489016532898, "learning_rate": 1e-05, "loss": 0.5544, "step": 3288 }, { "epoch": 0.9102608454992043, "grad_norm": 0.18267355859279633, "learning_rate": 1e-05, "loss": 0.5524, "step": 3289 }, { "epoch": 0.9105376046495537, "grad_norm": 0.17086364328861237, "learning_rate": 1e-05, "loss": 0.5323, "step": 3290 }, { "epoch": 0.9108143637999031, "grad_norm": 0.17870348691940308, "learning_rate": 1e-05, "loss": 0.5748, "step": 3291 }, { "epoch": 0.9110911229502525, "grad_norm": 0.1697886884212494, "learning_rate": 1e-05, "loss": 0.528, "step": 3292 }, { "epoch": 0.9113678821006019, "grad_norm": 0.17515929043293, "learning_rate": 1e-05, "loss": 0.5608, "step": 3293 }, { "epoch": 0.9116446412509513, "grad_norm": 0.16661608219146729, "learning_rate": 1e-05, "loss": 0.5268, "step": 3294 }, { "epoch": 0.9119214004013008, "grad_norm": 0.17603741586208344, "learning_rate": 1e-05, "loss": 0.5326, "step": 3295 }, { "epoch": 0.9121981595516502, "grad_norm": 0.17650707066059113, "learning_rate": 1e-05, "loss": 0.5422, "step": 3296 }, { "epoch": 0.9124749187019996, "grad_norm": 0.16877759993076324, "learning_rate": 1e-05, "loss": 0.5212, "step": 3297 }, { "epoch": 0.912751677852349, "grad_norm": 0.1705571860074997, "learning_rate": 1e-05, "loss": 0.543, "step": 3298 }, { "epoch": 0.9130284370026984, "grad_norm": 0.17733250558376312, "learning_rate": 1e-05, "loss": 0.5585, "step": 3299 }, { "epoch": 0.9133051961530478, "grad_norm": 0.17645327746868134, "learning_rate": 1e-05, "loss": 0.5371, "step": 3300 }, { "epoch": 0.9135819553033973, "grad_norm": 0.1751307100057602, "learning_rate": 1e-05, "loss": 0.5673, "step": 3301 }, { "epoch": 0.9138587144537467, "grad_norm": 0.1642138510942459, "learning_rate": 1e-05, "loss": 0.5496, "step": 3302 }, { "epoch": 0.9141354736040961, "grad_norm": 0.17269881069660187, "learning_rate": 1e-05, "loss": 0.5585, "step": 3303 }, { "epoch": 0.9144122327544455, "grad_norm": 0.16980582475662231, "learning_rate": 1e-05, "loss": 0.5233, "step": 3304 }, { "epoch": 0.9146889919047948, "grad_norm": 0.18108507990837097, "learning_rate": 1e-05, "loss": 0.5328, "step": 3305 }, { "epoch": 0.9149657510551442, "grad_norm": 0.174221470952034, "learning_rate": 1e-05, "loss": 0.5247, "step": 3306 }, { "epoch": 0.9152425102054936, "grad_norm": 0.1624479591846466, "learning_rate": 1e-05, "loss": 0.5336, "step": 3307 }, { "epoch": 0.915519269355843, "grad_norm": 0.1739421784877777, "learning_rate": 1e-05, "loss": 0.5443, "step": 3308 }, { "epoch": 0.9157960285061925, "grad_norm": 0.1707921326160431, "learning_rate": 1e-05, "loss": 0.5681, "step": 3309 }, { "epoch": 0.9160727876565419, "grad_norm": 0.159016415476799, "learning_rate": 1e-05, "loss": 0.5107, "step": 3310 }, { "epoch": 0.9163495468068913, "grad_norm": 0.1751103699207306, "learning_rate": 1e-05, "loss": 0.5218, "step": 3311 }, { "epoch": 0.9166263059572407, "grad_norm": 0.17082840204238892, "learning_rate": 1e-05, "loss": 0.547, "step": 3312 }, { "epoch": 0.9169030651075901, "grad_norm": 0.16144660115242004, "learning_rate": 1e-05, "loss": 0.5254, "step": 3313 }, { "epoch": 0.9171798242579395, "grad_norm": 0.16765646636486053, "learning_rate": 1e-05, "loss": 0.559, "step": 3314 }, { "epoch": 0.917456583408289, "grad_norm": 0.17132949829101562, "learning_rate": 1e-05, "loss": 0.5361, "step": 3315 }, { "epoch": 0.9177333425586384, "grad_norm": 0.16658511757850647, "learning_rate": 1e-05, "loss": 0.5328, "step": 3316 }, { "epoch": 0.9180101017089878, "grad_norm": 0.16673153638839722, "learning_rate": 1e-05, "loss": 0.5226, "step": 3317 }, { "epoch": 0.9182868608593372, "grad_norm": 0.16822148859500885, "learning_rate": 1e-05, "loss": 0.5199, "step": 3318 }, { "epoch": 0.9185636200096866, "grad_norm": 0.17445489764213562, "learning_rate": 1e-05, "loss": 0.5927, "step": 3319 }, { "epoch": 0.918840379160036, "grad_norm": 0.16785074770450592, "learning_rate": 1e-05, "loss": 0.5145, "step": 3320 }, { "epoch": 0.9191171383103854, "grad_norm": 0.17404787242412567, "learning_rate": 1e-05, "loss": 0.5631, "step": 3321 }, { "epoch": 0.9193938974607347, "grad_norm": 0.17116907238960266, "learning_rate": 1e-05, "loss": 0.5667, "step": 3322 }, { "epoch": 0.9196706566110842, "grad_norm": 0.16840232908725739, "learning_rate": 1e-05, "loss": 0.5522, "step": 3323 }, { "epoch": 0.9199474157614336, "grad_norm": 0.1741989701986313, "learning_rate": 1e-05, "loss": 0.5422, "step": 3324 }, { "epoch": 0.920224174911783, "grad_norm": 0.1619805097579956, "learning_rate": 1e-05, "loss": 0.5254, "step": 3325 }, { "epoch": 0.9205009340621324, "grad_norm": 0.17054864764213562, "learning_rate": 1e-05, "loss": 0.5358, "step": 3326 }, { "epoch": 0.9207776932124818, "grad_norm": 0.16728834807872772, "learning_rate": 1e-05, "loss": 0.5398, "step": 3327 }, { "epoch": 0.9210544523628312, "grad_norm": 0.17019516229629517, "learning_rate": 1e-05, "loss": 0.5215, "step": 3328 }, { "epoch": 0.9213312115131806, "grad_norm": 0.17181819677352905, "learning_rate": 1e-05, "loss": 0.5505, "step": 3329 }, { "epoch": 0.9216079706635301, "grad_norm": 0.17099368572235107, "learning_rate": 1e-05, "loss": 0.5438, "step": 3330 }, { "epoch": 0.9218847298138795, "grad_norm": 0.17172732949256897, "learning_rate": 1e-05, "loss": 0.5543, "step": 3331 }, { "epoch": 0.9221614889642289, "grad_norm": 0.16258013248443604, "learning_rate": 1e-05, "loss": 0.5182, "step": 3332 }, { "epoch": 0.9224382481145783, "grad_norm": 0.18315185606479645, "learning_rate": 1e-05, "loss": 0.6302, "step": 3333 }, { "epoch": 0.9227150072649277, "grad_norm": 0.1638312041759491, "learning_rate": 1e-05, "loss": 0.5472, "step": 3334 }, { "epoch": 0.9229917664152771, "grad_norm": 0.1745821088552475, "learning_rate": 1e-05, "loss": 0.5618, "step": 3335 }, { "epoch": 0.9232685255656266, "grad_norm": 0.163582444190979, "learning_rate": 1e-05, "loss": 0.543, "step": 3336 }, { "epoch": 0.923545284715976, "grad_norm": 0.1601967066526413, "learning_rate": 1e-05, "loss": 0.5591, "step": 3337 }, { "epoch": 0.9238220438663254, "grad_norm": 0.1703888177871704, "learning_rate": 1e-05, "loss": 0.5771, "step": 3338 }, { "epoch": 0.9240988030166747, "grad_norm": 0.1705920249223709, "learning_rate": 1e-05, "loss": 0.5583, "step": 3339 }, { "epoch": 0.9243755621670241, "grad_norm": 0.17040912806987762, "learning_rate": 1e-05, "loss": 0.5429, "step": 3340 }, { "epoch": 0.9246523213173735, "grad_norm": 0.16974948346614838, "learning_rate": 1e-05, "loss": 0.5227, "step": 3341 }, { "epoch": 0.9249290804677229, "grad_norm": 0.17164410650730133, "learning_rate": 1e-05, "loss": 0.5631, "step": 3342 }, { "epoch": 0.9252058396180723, "grad_norm": 0.16637888550758362, "learning_rate": 1e-05, "loss": 0.54, "step": 3343 }, { "epoch": 0.9254825987684218, "grad_norm": 0.1704077273607254, "learning_rate": 1e-05, "loss": 0.5346, "step": 3344 }, { "epoch": 0.9257593579187712, "grad_norm": 0.1750539094209671, "learning_rate": 1e-05, "loss": 0.5282, "step": 3345 }, { "epoch": 0.9260361170691206, "grad_norm": 0.1681097000837326, "learning_rate": 1e-05, "loss": 0.5567, "step": 3346 }, { "epoch": 0.92631287621947, "grad_norm": 0.16844162344932556, "learning_rate": 1e-05, "loss": 0.5456, "step": 3347 }, { "epoch": 0.9265896353698194, "grad_norm": 0.1686336249113083, "learning_rate": 1e-05, "loss": 0.5099, "step": 3348 }, { "epoch": 0.9268663945201688, "grad_norm": 0.1678282618522644, "learning_rate": 1e-05, "loss": 0.506, "step": 3349 }, { "epoch": 0.9271431536705182, "grad_norm": 0.1686890572309494, "learning_rate": 1e-05, "loss": 0.529, "step": 3350 }, { "epoch": 0.9274199128208677, "grad_norm": 0.168727308511734, "learning_rate": 1e-05, "loss": 0.5372, "step": 3351 }, { "epoch": 0.9276966719712171, "grad_norm": 0.162970170378685, "learning_rate": 1e-05, "loss": 0.5273, "step": 3352 }, { "epoch": 0.9279734311215665, "grad_norm": 0.16566133499145508, "learning_rate": 1e-05, "loss": 0.5573, "step": 3353 }, { "epoch": 0.9282501902719159, "grad_norm": 0.16606247425079346, "learning_rate": 1e-05, "loss": 0.5175, "step": 3354 }, { "epoch": 0.9285269494222653, "grad_norm": 0.16668124496936798, "learning_rate": 1e-05, "loss": 0.5397, "step": 3355 }, { "epoch": 0.9288037085726146, "grad_norm": 0.17009976506233215, "learning_rate": 1e-05, "loss": 0.5548, "step": 3356 }, { "epoch": 0.929080467722964, "grad_norm": 0.18042895197868347, "learning_rate": 1e-05, "loss": 0.5472, "step": 3357 }, { "epoch": 0.9293572268733135, "grad_norm": 0.1781848669052124, "learning_rate": 1e-05, "loss": 0.5785, "step": 3358 }, { "epoch": 0.9296339860236629, "grad_norm": 0.16951556503772736, "learning_rate": 1e-05, "loss": 0.5713, "step": 3359 }, { "epoch": 0.9299107451740123, "grad_norm": 0.1670803427696228, "learning_rate": 1e-05, "loss": 0.5268, "step": 3360 }, { "epoch": 0.9301875043243617, "grad_norm": 0.17718900740146637, "learning_rate": 1e-05, "loss": 0.5373, "step": 3361 }, { "epoch": 0.9304642634747111, "grad_norm": 0.17161710560321808, "learning_rate": 1e-05, "loss": 0.5642, "step": 3362 }, { "epoch": 0.9307410226250605, "grad_norm": 0.1797228902578354, "learning_rate": 1e-05, "loss": 0.5786, "step": 3363 }, { "epoch": 0.93101778177541, "grad_norm": 0.1732364147901535, "learning_rate": 1e-05, "loss": 0.5701, "step": 3364 }, { "epoch": 0.9312945409257594, "grad_norm": 0.1713424026966095, "learning_rate": 1e-05, "loss": 0.5473, "step": 3365 }, { "epoch": 0.9315713000761088, "grad_norm": 0.1725597083568573, "learning_rate": 1e-05, "loss": 0.541, "step": 3366 }, { "epoch": 0.9318480592264582, "grad_norm": 0.17119452357292175, "learning_rate": 1e-05, "loss": 0.5515, "step": 3367 }, { "epoch": 0.9321248183768076, "grad_norm": 0.1670142114162445, "learning_rate": 1e-05, "loss": 0.551, "step": 3368 }, { "epoch": 0.932401577527157, "grad_norm": 0.1674841195344925, "learning_rate": 1e-05, "loss": 0.5338, "step": 3369 }, { "epoch": 0.9326783366775064, "grad_norm": 0.17400752007961273, "learning_rate": 1e-05, "loss": 0.5582, "step": 3370 }, { "epoch": 0.9329550958278559, "grad_norm": 0.17123587429523468, "learning_rate": 1e-05, "loss": 0.5326, "step": 3371 }, { "epoch": 0.9332318549782053, "grad_norm": 0.16128404438495636, "learning_rate": 1e-05, "loss": 0.5237, "step": 3372 }, { "epoch": 0.9335086141285546, "grad_norm": 0.16674937307834625, "learning_rate": 1e-05, "loss": 0.5438, "step": 3373 }, { "epoch": 0.933785373278904, "grad_norm": 0.16588592529296875, "learning_rate": 1e-05, "loss": 0.5305, "step": 3374 }, { "epoch": 0.9340621324292534, "grad_norm": 0.1645144522190094, "learning_rate": 1e-05, "loss": 0.5032, "step": 3375 }, { "epoch": 0.9343388915796028, "grad_norm": 0.166624054312706, "learning_rate": 1e-05, "loss": 0.5319, "step": 3376 }, { "epoch": 0.9346156507299522, "grad_norm": 0.18043109774589539, "learning_rate": 1e-05, "loss": 0.5649, "step": 3377 }, { "epoch": 0.9348924098803016, "grad_norm": 0.17677709460258484, "learning_rate": 1e-05, "loss": 0.546, "step": 3378 }, { "epoch": 0.9351691690306511, "grad_norm": 0.1694483906030655, "learning_rate": 1e-05, "loss": 0.5423, "step": 3379 }, { "epoch": 0.9354459281810005, "grad_norm": 0.17203600704669952, "learning_rate": 1e-05, "loss": 0.5715, "step": 3380 }, { "epoch": 0.9357226873313499, "grad_norm": 0.16477076709270477, "learning_rate": 1e-05, "loss": 0.5573, "step": 3381 }, { "epoch": 0.9359994464816993, "grad_norm": 0.16646306216716766, "learning_rate": 1e-05, "loss": 0.5422, "step": 3382 }, { "epoch": 0.9362762056320487, "grad_norm": 0.17473988234996796, "learning_rate": 1e-05, "loss": 0.5505, "step": 3383 }, { "epoch": 0.9365529647823981, "grad_norm": 0.17782072722911835, "learning_rate": 1e-05, "loss": 0.5602, "step": 3384 }, { "epoch": 0.9368297239327475, "grad_norm": 0.16958922147750854, "learning_rate": 1e-05, "loss": 0.5277, "step": 3385 }, { "epoch": 0.937106483083097, "grad_norm": 0.16871270537376404, "learning_rate": 1e-05, "loss": 0.5327, "step": 3386 }, { "epoch": 0.9373832422334464, "grad_norm": 0.1749904751777649, "learning_rate": 1e-05, "loss": 0.5745, "step": 3387 }, { "epoch": 0.9376600013837958, "grad_norm": 0.17120586335659027, "learning_rate": 1e-05, "loss": 0.5491, "step": 3388 }, { "epoch": 0.9379367605341452, "grad_norm": 0.16141566634178162, "learning_rate": 1e-05, "loss": 0.5439, "step": 3389 }, { "epoch": 0.9382135196844946, "grad_norm": 0.17016489803791046, "learning_rate": 1e-05, "loss": 0.5477, "step": 3390 }, { "epoch": 0.9384902788348439, "grad_norm": 0.16721980273723602, "learning_rate": 1e-05, "loss": 0.5474, "step": 3391 }, { "epoch": 0.9387670379851933, "grad_norm": 0.16229058802127838, "learning_rate": 1e-05, "loss": 0.5245, "step": 3392 }, { "epoch": 0.9390437971355428, "grad_norm": 0.16939112544059753, "learning_rate": 1e-05, "loss": 0.5252, "step": 3393 }, { "epoch": 0.9393205562858922, "grad_norm": 0.16119900345802307, "learning_rate": 1e-05, "loss": 0.5328, "step": 3394 }, { "epoch": 0.9395973154362416, "grad_norm": 0.17447206377983093, "learning_rate": 1e-05, "loss": 0.5263, "step": 3395 }, { "epoch": 0.939874074586591, "grad_norm": 0.17132039368152618, "learning_rate": 1e-05, "loss": 0.5433, "step": 3396 }, { "epoch": 0.9401508337369404, "grad_norm": 0.16800035536289215, "learning_rate": 1e-05, "loss": 0.523, "step": 3397 }, { "epoch": 0.9404275928872898, "grad_norm": 0.17002421617507935, "learning_rate": 1e-05, "loss": 0.556, "step": 3398 }, { "epoch": 0.9407043520376392, "grad_norm": 0.17547306418418884, "learning_rate": 1e-05, "loss": 0.5932, "step": 3399 }, { "epoch": 0.9409811111879887, "grad_norm": 0.16967163980007172, "learning_rate": 1e-05, "loss": 0.5524, "step": 3400 }, { "epoch": 0.9412578703383381, "grad_norm": 0.16707497835159302, "learning_rate": 1e-05, "loss": 0.5224, "step": 3401 }, { "epoch": 0.9415346294886875, "grad_norm": 0.1706283986568451, "learning_rate": 1e-05, "loss": 0.5131, "step": 3402 }, { "epoch": 0.9418113886390369, "grad_norm": 0.16800454258918762, "learning_rate": 1e-05, "loss": 0.5366, "step": 3403 }, { "epoch": 0.9420881477893863, "grad_norm": 0.15897150337696075, "learning_rate": 1e-05, "loss": 0.5318, "step": 3404 }, { "epoch": 0.9423649069397357, "grad_norm": 0.1716947704553604, "learning_rate": 1e-05, "loss": 0.5382, "step": 3405 }, { "epoch": 0.9426416660900852, "grad_norm": 0.1742442548274994, "learning_rate": 1e-05, "loss": 0.5548, "step": 3406 }, { "epoch": 0.9429184252404346, "grad_norm": 0.17461800575256348, "learning_rate": 1e-05, "loss": 0.5333, "step": 3407 }, { "epoch": 0.9431951843907839, "grad_norm": 0.16585108637809753, "learning_rate": 1e-05, "loss": 0.5157, "step": 3408 }, { "epoch": 0.9434719435411333, "grad_norm": 0.17142076790332794, "learning_rate": 1e-05, "loss": 0.5453, "step": 3409 }, { "epoch": 0.9437487026914827, "grad_norm": 0.16597479581832886, "learning_rate": 1e-05, "loss": 0.5433, "step": 3410 }, { "epoch": 0.9440254618418321, "grad_norm": 0.16695468127727509, "learning_rate": 1e-05, "loss": 0.5311, "step": 3411 }, { "epoch": 0.9443022209921815, "grad_norm": 0.17548467218875885, "learning_rate": 1e-05, "loss": 0.5543, "step": 3412 }, { "epoch": 0.9445789801425309, "grad_norm": 0.1707478016614914, "learning_rate": 1e-05, "loss": 0.5561, "step": 3413 }, { "epoch": 0.9448557392928804, "grad_norm": 0.17509537935256958, "learning_rate": 1e-05, "loss": 0.5481, "step": 3414 }, { "epoch": 0.9451324984432298, "grad_norm": 0.16433705389499664, "learning_rate": 1e-05, "loss": 0.5452, "step": 3415 }, { "epoch": 0.9454092575935792, "grad_norm": 0.16854450106620789, "learning_rate": 1e-05, "loss": 0.5697, "step": 3416 }, { "epoch": 0.9456860167439286, "grad_norm": 0.16866306960582733, "learning_rate": 1e-05, "loss": 0.5323, "step": 3417 }, { "epoch": 0.945962775894278, "grad_norm": 0.16945576667785645, "learning_rate": 1e-05, "loss": 0.521, "step": 3418 }, { "epoch": 0.9462395350446274, "grad_norm": 0.16609059274196625, "learning_rate": 1e-05, "loss": 0.5367, "step": 3419 }, { "epoch": 0.9465162941949768, "grad_norm": 0.17351281642913818, "learning_rate": 1e-05, "loss": 0.5521, "step": 3420 }, { "epoch": 0.9467930533453263, "grad_norm": 0.1705666184425354, "learning_rate": 1e-05, "loss": 0.5435, "step": 3421 }, { "epoch": 0.9470698124956757, "grad_norm": 0.1728280782699585, "learning_rate": 1e-05, "loss": 0.5643, "step": 3422 }, { "epoch": 0.9473465716460251, "grad_norm": 0.1675185263156891, "learning_rate": 1e-05, "loss": 0.5328, "step": 3423 }, { "epoch": 0.9476233307963745, "grad_norm": 0.1664801985025406, "learning_rate": 1e-05, "loss": 0.5348, "step": 3424 }, { "epoch": 0.9479000899467238, "grad_norm": 0.1701073795557022, "learning_rate": 1e-05, "loss": 0.5573, "step": 3425 }, { "epoch": 0.9481768490970732, "grad_norm": 0.17418810725212097, "learning_rate": 1e-05, "loss": 0.5534, "step": 3426 }, { "epoch": 0.9484536082474226, "grad_norm": 0.16856218874454498, "learning_rate": 1e-05, "loss": 0.5235, "step": 3427 }, { "epoch": 0.9487303673977721, "grad_norm": 0.17289721965789795, "learning_rate": 1e-05, "loss": 0.587, "step": 3428 }, { "epoch": 0.9490071265481215, "grad_norm": 0.16689081490039825, "learning_rate": 1e-05, "loss": 0.5107, "step": 3429 }, { "epoch": 0.9492838856984709, "grad_norm": 0.17592859268188477, "learning_rate": 1e-05, "loss": 0.5399, "step": 3430 }, { "epoch": 0.9495606448488203, "grad_norm": 0.16813308000564575, "learning_rate": 1e-05, "loss": 0.5208, "step": 3431 }, { "epoch": 0.9498374039991697, "grad_norm": 0.16734789311885834, "learning_rate": 1e-05, "loss": 0.5332, "step": 3432 }, { "epoch": 0.9501141631495191, "grad_norm": 0.17125126719474792, "learning_rate": 1e-05, "loss": 0.5404, "step": 3433 }, { "epoch": 0.9503909222998685, "grad_norm": 0.17883720993995667, "learning_rate": 1e-05, "loss": 0.5684, "step": 3434 }, { "epoch": 0.950667681450218, "grad_norm": 0.16713517904281616, "learning_rate": 1e-05, "loss": 0.5336, "step": 3435 }, { "epoch": 0.9509444406005674, "grad_norm": 0.1779557764530182, "learning_rate": 1e-05, "loss": 0.5346, "step": 3436 }, { "epoch": 0.9512211997509168, "grad_norm": 0.17000818252563477, "learning_rate": 1e-05, "loss": 0.5483, "step": 3437 }, { "epoch": 0.9514979589012662, "grad_norm": 0.1674477905035019, "learning_rate": 1e-05, "loss": 0.5573, "step": 3438 }, { "epoch": 0.9517747180516156, "grad_norm": 0.16897541284561157, "learning_rate": 1e-05, "loss": 0.5343, "step": 3439 }, { "epoch": 0.952051477201965, "grad_norm": 0.16590097546577454, "learning_rate": 1e-05, "loss": 0.5395, "step": 3440 }, { "epoch": 0.9523282363523145, "grad_norm": 0.16822479665279388, "learning_rate": 1e-05, "loss": 0.5225, "step": 3441 }, { "epoch": 0.9526049955026638, "grad_norm": 0.16731834411621094, "learning_rate": 1e-05, "loss": 0.5234, "step": 3442 }, { "epoch": 0.9528817546530132, "grad_norm": 0.1762356013059616, "learning_rate": 1e-05, "loss": 0.5858, "step": 3443 }, { "epoch": 0.9531585138033626, "grad_norm": 0.16718678176403046, "learning_rate": 1e-05, "loss": 0.5549, "step": 3444 }, { "epoch": 0.953435272953712, "grad_norm": 0.16447919607162476, "learning_rate": 1e-05, "loss": 0.5393, "step": 3445 }, { "epoch": 0.9537120321040614, "grad_norm": 0.1611536145210266, "learning_rate": 1e-05, "loss": 0.5182, "step": 3446 }, { "epoch": 0.9539887912544108, "grad_norm": 0.1695062667131424, "learning_rate": 1e-05, "loss": 0.528, "step": 3447 }, { "epoch": 0.9542655504047602, "grad_norm": 0.16342775523662567, "learning_rate": 1e-05, "loss": 0.5155, "step": 3448 }, { "epoch": 0.9545423095551097, "grad_norm": 0.16300009191036224, "learning_rate": 1e-05, "loss": 0.5175, "step": 3449 }, { "epoch": 0.9548190687054591, "grad_norm": 0.16552573442459106, "learning_rate": 1e-05, "loss": 0.5483, "step": 3450 }, { "epoch": 0.9550958278558085, "grad_norm": 0.16864214837551117, "learning_rate": 1e-05, "loss": 0.5803, "step": 3451 }, { "epoch": 0.9553725870061579, "grad_norm": 0.17189839482307434, "learning_rate": 1e-05, "loss": 0.5566, "step": 3452 }, { "epoch": 0.9556493461565073, "grad_norm": 0.1722382754087448, "learning_rate": 1e-05, "loss": 0.5696, "step": 3453 }, { "epoch": 0.9559261053068567, "grad_norm": 0.1674593687057495, "learning_rate": 1e-05, "loss": 0.5644, "step": 3454 }, { "epoch": 0.9562028644572061, "grad_norm": 0.16978122293949127, "learning_rate": 1e-05, "loss": 0.5722, "step": 3455 }, { "epoch": 0.9564796236075556, "grad_norm": 0.1686001867055893, "learning_rate": 1e-05, "loss": 0.5518, "step": 3456 }, { "epoch": 0.956756382757905, "grad_norm": 0.16482311487197876, "learning_rate": 1e-05, "loss": 0.538, "step": 3457 }, { "epoch": 0.9570331419082544, "grad_norm": 0.16769716143608093, "learning_rate": 1e-05, "loss": 0.5243, "step": 3458 }, { "epoch": 0.9573099010586037, "grad_norm": 0.16902495920658112, "learning_rate": 1e-05, "loss": 0.5323, "step": 3459 }, { "epoch": 0.9575866602089531, "grad_norm": 0.17172539234161377, "learning_rate": 1e-05, "loss": 0.5767, "step": 3460 }, { "epoch": 0.9578634193593025, "grad_norm": 0.17062580585479736, "learning_rate": 1e-05, "loss": 0.5394, "step": 3461 }, { "epoch": 0.9581401785096519, "grad_norm": 0.17377741634845734, "learning_rate": 1e-05, "loss": 0.5217, "step": 3462 }, { "epoch": 0.9584169376600014, "grad_norm": 0.16136440634727478, "learning_rate": 1e-05, "loss": 0.5183, "step": 3463 }, { "epoch": 0.9586936968103508, "grad_norm": 0.1741923689842224, "learning_rate": 1e-05, "loss": 0.5657, "step": 3464 }, { "epoch": 0.9589704559607002, "grad_norm": 0.16620716452598572, "learning_rate": 1e-05, "loss": 0.5262, "step": 3465 }, { "epoch": 0.9592472151110496, "grad_norm": 0.1632644087076187, "learning_rate": 1e-05, "loss": 0.5399, "step": 3466 }, { "epoch": 0.959523974261399, "grad_norm": 0.1661975234746933, "learning_rate": 1e-05, "loss": 0.5318, "step": 3467 }, { "epoch": 0.9598007334117484, "grad_norm": 0.16997785866260529, "learning_rate": 1e-05, "loss": 0.531, "step": 3468 }, { "epoch": 0.9600774925620978, "grad_norm": 0.16599199175834656, "learning_rate": 1e-05, "loss": 0.5259, "step": 3469 }, { "epoch": 0.9603542517124473, "grad_norm": 0.1754201054573059, "learning_rate": 1e-05, "loss": 0.5478, "step": 3470 }, { "epoch": 0.9606310108627967, "grad_norm": 0.17012611031532288, "learning_rate": 1e-05, "loss": 0.5758, "step": 3471 }, { "epoch": 0.9609077700131461, "grad_norm": 0.16784164309501648, "learning_rate": 1e-05, "loss": 0.5535, "step": 3472 }, { "epoch": 0.9611845291634955, "grad_norm": 0.16479025781154633, "learning_rate": 1e-05, "loss": 0.5417, "step": 3473 }, { "epoch": 0.9614612883138449, "grad_norm": 0.17500363290309906, "learning_rate": 1e-05, "loss": 0.5575, "step": 3474 }, { "epoch": 0.9617380474641943, "grad_norm": 0.17314574122428894, "learning_rate": 1e-05, "loss": 0.5306, "step": 3475 }, { "epoch": 0.9620148066145436, "grad_norm": 0.17077718675136566, "learning_rate": 1e-05, "loss": 0.5375, "step": 3476 }, { "epoch": 0.962291565764893, "grad_norm": 0.16783659160137177, "learning_rate": 1e-05, "loss": 0.5583, "step": 3477 }, { "epoch": 0.9625683249152425, "grad_norm": 0.17546430230140686, "learning_rate": 1e-05, "loss": 0.5683, "step": 3478 }, { "epoch": 0.9628450840655919, "grad_norm": 0.179429292678833, "learning_rate": 1e-05, "loss": 0.5608, "step": 3479 }, { "epoch": 0.9631218432159413, "grad_norm": 0.1719292849302292, "learning_rate": 1e-05, "loss": 0.535, "step": 3480 }, { "epoch": 0.9633986023662907, "grad_norm": 0.17224003374576569, "learning_rate": 1e-05, "loss": 0.5398, "step": 3481 }, { "epoch": 0.9636753615166401, "grad_norm": 0.17509762942790985, "learning_rate": 1e-05, "loss": 0.5387, "step": 3482 }, { "epoch": 0.9639521206669895, "grad_norm": 0.17472128570079803, "learning_rate": 1e-05, "loss": 0.5494, "step": 3483 }, { "epoch": 0.964228879817339, "grad_norm": 0.16995227336883545, "learning_rate": 1e-05, "loss": 0.54, "step": 3484 }, { "epoch": 0.9645056389676884, "grad_norm": 0.17050407826900482, "learning_rate": 1e-05, "loss": 0.5398, "step": 3485 }, { "epoch": 0.9647823981180378, "grad_norm": 0.1747102439403534, "learning_rate": 1e-05, "loss": 0.5383, "step": 3486 }, { "epoch": 0.9650591572683872, "grad_norm": 0.17330418527126312, "learning_rate": 1e-05, "loss": 0.519, "step": 3487 }, { "epoch": 0.9653359164187366, "grad_norm": 0.16863791644573212, "learning_rate": 1e-05, "loss": 0.5035, "step": 3488 }, { "epoch": 0.965612675569086, "grad_norm": 0.17119047045707703, "learning_rate": 1e-05, "loss": 0.5383, "step": 3489 }, { "epoch": 0.9658894347194354, "grad_norm": 0.1725587695837021, "learning_rate": 1e-05, "loss": 0.5625, "step": 3490 }, { "epoch": 0.9661661938697849, "grad_norm": 0.17543521523475647, "learning_rate": 1e-05, "loss": 0.5119, "step": 3491 }, { "epoch": 0.9664429530201343, "grad_norm": 0.16443248093128204, "learning_rate": 1e-05, "loss": 0.5344, "step": 3492 }, { "epoch": 0.9667197121704837, "grad_norm": 0.16692331433296204, "learning_rate": 1e-05, "loss": 0.5277, "step": 3493 }, { "epoch": 0.966996471320833, "grad_norm": 0.170894593000412, "learning_rate": 1e-05, "loss": 0.5484, "step": 3494 }, { "epoch": 0.9672732304711824, "grad_norm": 0.17785751819610596, "learning_rate": 1e-05, "loss": 0.5566, "step": 3495 }, { "epoch": 0.9675499896215318, "grad_norm": 0.17345981299877167, "learning_rate": 1e-05, "loss": 0.565, "step": 3496 }, { "epoch": 0.9678267487718812, "grad_norm": 0.16564685106277466, "learning_rate": 1e-05, "loss": 0.5151, "step": 3497 }, { "epoch": 0.9681035079222307, "grad_norm": 0.1710185706615448, "learning_rate": 1e-05, "loss": 0.5314, "step": 3498 }, { "epoch": 0.9683802670725801, "grad_norm": 0.17828059196472168, "learning_rate": 1e-05, "loss": 0.5643, "step": 3499 }, { "epoch": 0.9686570262229295, "grad_norm": 0.16813771426677704, "learning_rate": 1e-05, "loss": 0.5229, "step": 3500 }, { "epoch": 0.9689337853732789, "grad_norm": 0.17441152036190033, "learning_rate": 1e-05, "loss": 0.54, "step": 3501 }, { "epoch": 0.9692105445236283, "grad_norm": 0.16613122820854187, "learning_rate": 1e-05, "loss": 0.5426, "step": 3502 }, { "epoch": 0.9694873036739777, "grad_norm": 0.1714065968990326, "learning_rate": 1e-05, "loss": 0.5519, "step": 3503 }, { "epoch": 0.9697640628243271, "grad_norm": 0.1685042828321457, "learning_rate": 1e-05, "loss": 0.5367, "step": 3504 }, { "epoch": 0.9700408219746766, "grad_norm": 0.17650435864925385, "learning_rate": 1e-05, "loss": 0.5321, "step": 3505 }, { "epoch": 0.970317581125026, "grad_norm": 0.17209060490131378, "learning_rate": 1e-05, "loss": 0.5532, "step": 3506 }, { "epoch": 0.9705943402753754, "grad_norm": 0.18909010291099548, "learning_rate": 1e-05, "loss": 0.5563, "step": 3507 }, { "epoch": 0.9708710994257248, "grad_norm": 0.16695404052734375, "learning_rate": 1e-05, "loss": 0.5391, "step": 3508 }, { "epoch": 0.9711478585760742, "grad_norm": 0.1750575453042984, "learning_rate": 1e-05, "loss": 0.5409, "step": 3509 }, { "epoch": 0.9714246177264236, "grad_norm": 0.16230811178684235, "learning_rate": 1e-05, "loss": 0.5298, "step": 3510 }, { "epoch": 0.9717013768767729, "grad_norm": 0.17080330848693848, "learning_rate": 1e-05, "loss": 0.5409, "step": 3511 }, { "epoch": 0.9719781360271224, "grad_norm": 0.16735337674617767, "learning_rate": 1e-05, "loss": 0.5138, "step": 3512 }, { "epoch": 0.9722548951774718, "grad_norm": 0.1668839007616043, "learning_rate": 1e-05, "loss": 0.5157, "step": 3513 }, { "epoch": 0.9725316543278212, "grad_norm": 0.17122098803520203, "learning_rate": 1e-05, "loss": 0.5373, "step": 3514 }, { "epoch": 0.9728084134781706, "grad_norm": 0.17451071739196777, "learning_rate": 1e-05, "loss": 0.567, "step": 3515 }, { "epoch": 0.97308517262852, "grad_norm": 0.17419670522212982, "learning_rate": 1e-05, "loss": 0.5705, "step": 3516 }, { "epoch": 0.9733619317788694, "grad_norm": 0.1655901074409485, "learning_rate": 1e-05, "loss": 0.5202, "step": 3517 }, { "epoch": 0.9736386909292188, "grad_norm": 0.17177245020866394, "learning_rate": 1e-05, "loss": 0.5643, "step": 3518 }, { "epoch": 0.9739154500795683, "grad_norm": 0.16841335594654083, "learning_rate": 1e-05, "loss": 0.5256, "step": 3519 }, { "epoch": 0.9741922092299177, "grad_norm": 0.1618887186050415, "learning_rate": 1e-05, "loss": 0.528, "step": 3520 }, { "epoch": 0.9744689683802671, "grad_norm": 0.15670058131217957, "learning_rate": 1e-05, "loss": 0.5261, "step": 3521 }, { "epoch": 0.9747457275306165, "grad_norm": 0.1714606136083603, "learning_rate": 1e-05, "loss": 0.5605, "step": 3522 }, { "epoch": 0.9750224866809659, "grad_norm": 0.16776889562606812, "learning_rate": 1e-05, "loss": 0.5547, "step": 3523 }, { "epoch": 0.9752992458313153, "grad_norm": 0.17533230781555176, "learning_rate": 1e-05, "loss": 0.5613, "step": 3524 }, { "epoch": 0.9755760049816647, "grad_norm": 0.1704351305961609, "learning_rate": 1e-05, "loss": 0.5342, "step": 3525 }, { "epoch": 0.9758527641320142, "grad_norm": 0.1734245866537094, "learning_rate": 1e-05, "loss": 0.5747, "step": 3526 }, { "epoch": 0.9761295232823636, "grad_norm": 0.17419317364692688, "learning_rate": 1e-05, "loss": 0.5684, "step": 3527 }, { "epoch": 0.9764062824327129, "grad_norm": 0.15802384912967682, "learning_rate": 1e-05, "loss": 0.5172, "step": 3528 }, { "epoch": 0.9766830415830623, "grad_norm": 0.1718502640724182, "learning_rate": 1e-05, "loss": 0.5686, "step": 3529 }, { "epoch": 0.9769598007334117, "grad_norm": 0.1696770042181015, "learning_rate": 1e-05, "loss": 0.57, "step": 3530 }, { "epoch": 0.9772365598837611, "grad_norm": 0.16244417428970337, "learning_rate": 1e-05, "loss": 0.5469, "step": 3531 }, { "epoch": 0.9775133190341105, "grad_norm": 0.1666814535856247, "learning_rate": 1e-05, "loss": 0.5409, "step": 3532 }, { "epoch": 0.97779007818446, "grad_norm": 0.17093060910701752, "learning_rate": 1e-05, "loss": 0.5414, "step": 3533 }, { "epoch": 0.9780668373348094, "grad_norm": 0.16909991204738617, "learning_rate": 1e-05, "loss": 0.5284, "step": 3534 }, { "epoch": 0.9783435964851588, "grad_norm": 0.17360661923885345, "learning_rate": 1e-05, "loss": 0.5252, "step": 3535 }, { "epoch": 0.9786203556355082, "grad_norm": 0.16070978343486786, "learning_rate": 1e-05, "loss": 0.5385, "step": 3536 }, { "epoch": 0.9788971147858576, "grad_norm": 0.1707097887992859, "learning_rate": 1e-05, "loss": 0.5544, "step": 3537 }, { "epoch": 0.979173873936207, "grad_norm": 0.17576654255390167, "learning_rate": 1e-05, "loss": 0.5414, "step": 3538 }, { "epoch": 0.9794506330865564, "grad_norm": 0.16507233679294586, "learning_rate": 1e-05, "loss": 0.5181, "step": 3539 }, { "epoch": 0.9797273922369059, "grad_norm": 0.1743052452802658, "learning_rate": 1e-05, "loss": 0.5692, "step": 3540 }, { "epoch": 0.9800041513872553, "grad_norm": 0.16693474352359772, "learning_rate": 1e-05, "loss": 0.5261, "step": 3541 }, { "epoch": 0.9802809105376047, "grad_norm": 0.1669008582830429, "learning_rate": 1e-05, "loss": 0.5376, "step": 3542 }, { "epoch": 0.9805576696879541, "grad_norm": 0.16859027743339539, "learning_rate": 1e-05, "loss": 0.5618, "step": 3543 }, { "epoch": 0.9808344288383035, "grad_norm": 0.17042018473148346, "learning_rate": 1e-05, "loss": 0.5435, "step": 3544 }, { "epoch": 0.9811111879886528, "grad_norm": 0.16671307384967804, "learning_rate": 1e-05, "loss": 0.5505, "step": 3545 }, { "epoch": 0.9813879471390022, "grad_norm": 0.16654905676841736, "learning_rate": 1e-05, "loss": 0.5295, "step": 3546 }, { "epoch": 0.9816647062893517, "grad_norm": 0.16884858906269073, "learning_rate": 1e-05, "loss": 0.5157, "step": 3547 }, { "epoch": 0.9819414654397011, "grad_norm": 0.16638557612895966, "learning_rate": 1e-05, "loss": 0.5496, "step": 3548 }, { "epoch": 0.9822182245900505, "grad_norm": 0.17008720338344574, "learning_rate": 1e-05, "loss": 0.5576, "step": 3549 }, { "epoch": 0.9824949837403999, "grad_norm": 0.1714371293783188, "learning_rate": 1e-05, "loss": 0.5263, "step": 3550 }, { "epoch": 0.9827717428907493, "grad_norm": 0.16811761260032654, "learning_rate": 1e-05, "loss": 0.557, "step": 3551 }, { "epoch": 0.9830485020410987, "grad_norm": 0.179793581366539, "learning_rate": 1e-05, "loss": 0.5589, "step": 3552 }, { "epoch": 0.9833252611914481, "grad_norm": 0.1674426943063736, "learning_rate": 1e-05, "loss": 0.5257, "step": 3553 }, { "epoch": 0.9836020203417976, "grad_norm": 0.16674821078777313, "learning_rate": 1e-05, "loss": 0.5448, "step": 3554 }, { "epoch": 0.983878779492147, "grad_norm": 0.17681767046451569, "learning_rate": 1e-05, "loss": 0.5306, "step": 3555 }, { "epoch": 0.9841555386424964, "grad_norm": 0.1726534515619278, "learning_rate": 1e-05, "loss": 0.5586, "step": 3556 }, { "epoch": 0.9844322977928458, "grad_norm": 0.1702272593975067, "learning_rate": 1e-05, "loss": 0.5397, "step": 3557 }, { "epoch": 0.9847090569431952, "grad_norm": 0.17116419970989227, "learning_rate": 1e-05, "loss": 0.5317, "step": 3558 }, { "epoch": 0.9849858160935446, "grad_norm": 0.16776727139949799, "learning_rate": 1e-05, "loss": 0.5728, "step": 3559 }, { "epoch": 0.985262575243894, "grad_norm": 0.17268861830234528, "learning_rate": 1e-05, "loss": 0.5264, "step": 3560 }, { "epoch": 0.9855393343942435, "grad_norm": 0.1713593751192093, "learning_rate": 1e-05, "loss": 0.5498, "step": 3561 }, { "epoch": 0.9858160935445928, "grad_norm": 0.1724894642829895, "learning_rate": 1e-05, "loss": 0.561, "step": 3562 }, { "epoch": 0.9860928526949422, "grad_norm": 0.17067082226276398, "learning_rate": 1e-05, "loss": 0.5522, "step": 3563 }, { "epoch": 0.9863696118452916, "grad_norm": 0.1723136007785797, "learning_rate": 1e-05, "loss": 0.5264, "step": 3564 }, { "epoch": 0.986646370995641, "grad_norm": 0.17053766548633575, "learning_rate": 1e-05, "loss": 0.5435, "step": 3565 }, { "epoch": 0.9869231301459904, "grad_norm": 0.17646051943302155, "learning_rate": 1e-05, "loss": 0.5446, "step": 3566 }, { "epoch": 0.9871998892963398, "grad_norm": 0.18429245054721832, "learning_rate": 1e-05, "loss": 0.5165, "step": 3567 }, { "epoch": 0.9874766484466893, "grad_norm": 0.16673152148723602, "learning_rate": 1e-05, "loss": 0.5461, "step": 3568 }, { "epoch": 0.9877534075970387, "grad_norm": 0.1662590354681015, "learning_rate": 1e-05, "loss": 0.5494, "step": 3569 }, { "epoch": 0.9880301667473881, "grad_norm": 0.17327527701854706, "learning_rate": 1e-05, "loss": 0.5376, "step": 3570 }, { "epoch": 0.9883069258977375, "grad_norm": 0.16488540172576904, "learning_rate": 1e-05, "loss": 0.5278, "step": 3571 }, { "epoch": 0.9885836850480869, "grad_norm": 0.1706630140542984, "learning_rate": 1e-05, "loss": 0.5289, "step": 3572 }, { "epoch": 0.9888604441984363, "grad_norm": 0.1672530621290207, "learning_rate": 1e-05, "loss": 0.5359, "step": 3573 }, { "epoch": 0.9891372033487857, "grad_norm": 0.16673777997493744, "learning_rate": 1e-05, "loss": 0.5288, "step": 3574 }, { "epoch": 0.9894139624991352, "grad_norm": 0.17167928814888, "learning_rate": 1e-05, "loss": 0.5255, "step": 3575 }, { "epoch": 0.9896907216494846, "grad_norm": 0.17811356484889984, "learning_rate": 1e-05, "loss": 0.5532, "step": 3576 }, { "epoch": 0.989967480799834, "grad_norm": 0.166414275765419, "learning_rate": 1e-05, "loss": 0.5733, "step": 3577 }, { "epoch": 0.9902442399501834, "grad_norm": 0.17240504920482635, "learning_rate": 1e-05, "loss": 0.5766, "step": 3578 }, { "epoch": 0.9905209991005327, "grad_norm": 0.16933532059192657, "learning_rate": 1e-05, "loss": 0.5288, "step": 3579 }, { "epoch": 0.9907977582508821, "grad_norm": 0.1880798190832138, "learning_rate": 1e-05, "loss": 0.561, "step": 3580 }, { "epoch": 0.9910745174012315, "grad_norm": 0.1703583002090454, "learning_rate": 1e-05, "loss": 0.5507, "step": 3581 }, { "epoch": 0.991351276551581, "grad_norm": 0.16839370131492615, "learning_rate": 1e-05, "loss": 0.5555, "step": 3582 }, { "epoch": 0.9916280357019304, "grad_norm": 0.18275347352027893, "learning_rate": 1e-05, "loss": 0.5413, "step": 3583 }, { "epoch": 0.9919047948522798, "grad_norm": 0.15866027772426605, "learning_rate": 1e-05, "loss": 0.5149, "step": 3584 }, { "epoch": 0.9921815540026292, "grad_norm": 0.16782283782958984, "learning_rate": 1e-05, "loss": 0.5507, "step": 3585 }, { "epoch": 0.9924583131529786, "grad_norm": 0.1619407683610916, "learning_rate": 1e-05, "loss": 0.5496, "step": 3586 }, { "epoch": 0.992735072303328, "grad_norm": 0.1654723882675171, "learning_rate": 1e-05, "loss": 0.5339, "step": 3587 }, { "epoch": 0.9930118314536774, "grad_norm": 0.17055782675743103, "learning_rate": 1e-05, "loss": 0.526, "step": 3588 }, { "epoch": 0.9932885906040269, "grad_norm": 0.17727522552013397, "learning_rate": 1e-05, "loss": 0.5397, "step": 3589 }, { "epoch": 0.9935653497543763, "grad_norm": 0.17070142924785614, "learning_rate": 1e-05, "loss": 0.5377, "step": 3590 }, { "epoch": 0.9938421089047257, "grad_norm": 0.1761288195848465, "learning_rate": 1e-05, "loss": 0.5457, "step": 3591 }, { "epoch": 0.9941188680550751, "grad_norm": 0.1699192076921463, "learning_rate": 1e-05, "loss": 0.5588, "step": 3592 }, { "epoch": 0.9943956272054245, "grad_norm": 0.16552209854125977, "learning_rate": 1e-05, "loss": 0.5329, "step": 3593 }, { "epoch": 0.9946723863557739, "grad_norm": 0.1694563627243042, "learning_rate": 1e-05, "loss": 0.5366, "step": 3594 }, { "epoch": 0.9949491455061233, "grad_norm": 0.1584758162498474, "learning_rate": 1e-05, "loss": 0.5245, "step": 3595 }, { "epoch": 0.9952259046564728, "grad_norm": 0.16825169324874878, "learning_rate": 1e-05, "loss": 0.5539, "step": 3596 }, { "epoch": 0.9955026638068221, "grad_norm": 0.17538471519947052, "learning_rate": 1e-05, "loss": 0.5371, "step": 3597 }, { "epoch": 0.9957794229571715, "grad_norm": 0.16259516775608063, "learning_rate": 1e-05, "loss": 0.5203, "step": 3598 }, { "epoch": 0.9960561821075209, "grad_norm": 0.17140352725982666, "learning_rate": 1e-05, "loss": 0.5577, "step": 3599 }, { "epoch": 0.9963329412578703, "grad_norm": 0.17109085619449615, "learning_rate": 1e-05, "loss": 0.5603, "step": 3600 }, { "epoch": 0.9966097004082197, "grad_norm": 0.1616726666688919, "learning_rate": 1e-05, "loss": 0.5366, "step": 3601 }, { "epoch": 0.9968864595585691, "grad_norm": 0.16573292016983032, "learning_rate": 1e-05, "loss": 0.5416, "step": 3602 }, { "epoch": 0.9971632187089186, "grad_norm": 0.17184901237487793, "learning_rate": 1e-05, "loss": 0.5299, "step": 3603 }, { "epoch": 0.997439977859268, "grad_norm": 0.16825814545154572, "learning_rate": 1e-05, "loss": 0.5344, "step": 3604 }, { "epoch": 0.9977167370096174, "grad_norm": 0.17052219808101654, "learning_rate": 1e-05, "loss": 0.5397, "step": 3605 }, { "epoch": 0.9979934961599668, "grad_norm": 0.17102794349193573, "learning_rate": 1e-05, "loss": 0.5505, "step": 3606 }, { "epoch": 0.9982702553103162, "grad_norm": 0.17833705246448517, "learning_rate": 1e-05, "loss": 0.5903, "step": 3607 }, { "epoch": 0.9985470144606656, "grad_norm": 0.1576048880815506, "learning_rate": 1e-05, "loss": 0.5251, "step": 3608 }, { "epoch": 0.998823773611015, "grad_norm": 0.1681240350008011, "learning_rate": 1e-05, "loss": 0.5331, "step": 3609 }, { "epoch": 0.9991005327613645, "grad_norm": 0.17074739933013916, "learning_rate": 1e-05, "loss": 0.5441, "step": 3610 }, { "epoch": 0.9993772919117139, "grad_norm": 0.17178772389888763, "learning_rate": 1e-05, "loss": 0.5264, "step": 3611 }, { "epoch": 0.9996540510620633, "grad_norm": 0.164947047829628, "learning_rate": 1e-05, "loss": 0.5543, "step": 3612 }, { "epoch": 0.9999308102124127, "grad_norm": 0.15906451642513275, "learning_rate": 1e-05, "loss": 0.5124, "step": 3613 }, { "epoch": 1.0, "grad_norm": 0.15906451642513275, "learning_rate": 1e-05, "loss": 0.5352, "step": 3614 }, { "epoch": 1.0, "eval_loss": 0.5397945046424866, "eval_runtime": 3252.7082, "eval_samples_per_second": 75.056, "eval_steps_per_second": 2.346, "step": 3614 }, { "epoch": 1.0002767591503494, "grad_norm": 0.5111564993858337, "learning_rate": 1e-05, "loss": 0.5298, "step": 3615 }, { "epoch": 1.0005535183006988, "grad_norm": 0.16439443826675415, "learning_rate": 1e-05, "loss": 0.5583, "step": 3616 }, { "epoch": 1.0008302774510482, "grad_norm": 0.17278634011745453, "learning_rate": 1e-05, "loss": 0.5753, "step": 3617 }, { "epoch": 1.0011070366013977, "grad_norm": 0.1725550889968872, "learning_rate": 1e-05, "loss": 0.5445, "step": 3618 }, { "epoch": 1.001383795751747, "grad_norm": 0.17536941170692444, "learning_rate": 1e-05, "loss": 0.5422, "step": 3619 }, { "epoch": 1.0016605549020965, "grad_norm": 0.17179793119430542, "learning_rate": 1e-05, "loss": 0.5686, "step": 3620 }, { "epoch": 1.001937314052446, "grad_norm": 0.174508735537529, "learning_rate": 1e-05, "loss": 0.5511, "step": 3621 }, { "epoch": 1.0022140732027953, "grad_norm": 0.1741226464509964, "learning_rate": 1e-05, "loss": 0.5116, "step": 3622 }, { "epoch": 1.0024908323531447, "grad_norm": 0.1696409285068512, "learning_rate": 1e-05, "loss": 0.5298, "step": 3623 }, { "epoch": 1.0027675915034941, "grad_norm": 0.16684554517269135, "learning_rate": 1e-05, "loss": 0.5385, "step": 3624 }, { "epoch": 1.0030443506538436, "grad_norm": 0.17434176802635193, "learning_rate": 1e-05, "loss": 0.554, "step": 3625 }, { "epoch": 1.003321109804193, "grad_norm": 0.16937439143657684, "learning_rate": 1e-05, "loss": 0.5351, "step": 3626 }, { "epoch": 1.0035978689545424, "grad_norm": 0.16844968497753143, "learning_rate": 1e-05, "loss": 0.5473, "step": 3627 }, { "epoch": 1.0038746281048918, "grad_norm": 0.16664525866508484, "learning_rate": 1e-05, "loss": 0.5249, "step": 3628 }, { "epoch": 1.0041513872552412, "grad_norm": 0.16090108454227448, "learning_rate": 1e-05, "loss": 0.5367, "step": 3629 }, { "epoch": 1.0044281464055906, "grad_norm": 0.17184454202651978, "learning_rate": 1e-05, "loss": 0.5382, "step": 3630 }, { "epoch": 1.00470490555594, "grad_norm": 0.16549216210842133, "learning_rate": 1e-05, "loss": 0.5102, "step": 3631 }, { "epoch": 1.0049816647062892, "grad_norm": 0.1648939847946167, "learning_rate": 1e-05, "loss": 0.4985, "step": 3632 }, { "epoch": 1.0052584238566387, "grad_norm": 0.16731025278568268, "learning_rate": 1e-05, "loss": 0.5723, "step": 3633 }, { "epoch": 1.005535183006988, "grad_norm": 0.17768718302249908, "learning_rate": 1e-05, "loss": 0.5367, "step": 3634 }, { "epoch": 1.0058119421573375, "grad_norm": 0.16767635941505432, "learning_rate": 1e-05, "loss": 0.545, "step": 3635 }, { "epoch": 1.006088701307687, "grad_norm": 0.16868141293525696, "learning_rate": 1e-05, "loss": 0.52, "step": 3636 }, { "epoch": 1.0063654604580363, "grad_norm": 0.1732926368713379, "learning_rate": 1e-05, "loss": 0.569, "step": 3637 }, { "epoch": 1.0066422196083857, "grad_norm": 0.17520032823085785, "learning_rate": 1e-05, "loss": 0.5422, "step": 3638 }, { "epoch": 1.0069189787587352, "grad_norm": 0.16436642408370972, "learning_rate": 1e-05, "loss": 0.5542, "step": 3639 }, { "epoch": 1.0071957379090846, "grad_norm": 0.1638520359992981, "learning_rate": 1e-05, "loss": 0.5368, "step": 3640 }, { "epoch": 1.007472497059434, "grad_norm": 0.1651402860879898, "learning_rate": 1e-05, "loss": 0.5529, "step": 3641 }, { "epoch": 1.0077492562097834, "grad_norm": 0.16817224025726318, "learning_rate": 1e-05, "loss": 0.5248, "step": 3642 }, { "epoch": 1.0080260153601328, "grad_norm": 0.17331349849700928, "learning_rate": 1e-05, "loss": 0.551, "step": 3643 }, { "epoch": 1.0083027745104822, "grad_norm": 0.1740770787000656, "learning_rate": 1e-05, "loss": 0.5025, "step": 3644 }, { "epoch": 1.0085795336608316, "grad_norm": 0.1659717559814453, "learning_rate": 1e-05, "loss": 0.5428, "step": 3645 }, { "epoch": 1.008856292811181, "grad_norm": 0.1704796850681305, "learning_rate": 1e-05, "loss": 0.5351, "step": 3646 }, { "epoch": 1.0091330519615305, "grad_norm": 0.17026790976524353, "learning_rate": 1e-05, "loss": 0.5505, "step": 3647 }, { "epoch": 1.0094098111118799, "grad_norm": 0.16864734888076782, "learning_rate": 1e-05, "loss": 0.5379, "step": 3648 }, { "epoch": 1.0096865702622293, "grad_norm": 0.17210513353347778, "learning_rate": 1e-05, "loss": 0.55, "step": 3649 }, { "epoch": 1.0099633294125787, "grad_norm": 0.16873227059841156, "learning_rate": 1e-05, "loss": 0.5272, "step": 3650 }, { "epoch": 1.0102400885629281, "grad_norm": 0.16879241168498993, "learning_rate": 1e-05, "loss": 0.5455, "step": 3651 }, { "epoch": 1.0105168477132775, "grad_norm": 0.17144784331321716, "learning_rate": 1e-05, "loss": 0.5448, "step": 3652 }, { "epoch": 1.010793606863627, "grad_norm": 0.16881835460662842, "learning_rate": 1e-05, "loss": 0.5506, "step": 3653 }, { "epoch": 1.0110703660139764, "grad_norm": 0.17603245377540588, "learning_rate": 1e-05, "loss": 0.5335, "step": 3654 }, { "epoch": 1.0113471251643258, "grad_norm": 0.16803188621997833, "learning_rate": 1e-05, "loss": 0.5397, "step": 3655 }, { "epoch": 1.0116238843146752, "grad_norm": 0.1647128313779831, "learning_rate": 1e-05, "loss": 0.5662, "step": 3656 }, { "epoch": 1.0119006434650246, "grad_norm": 0.17010729014873505, "learning_rate": 1e-05, "loss": 0.5658, "step": 3657 }, { "epoch": 1.012177402615374, "grad_norm": 0.17033351957798004, "learning_rate": 1e-05, "loss": 0.5285, "step": 3658 }, { "epoch": 1.0124541617657234, "grad_norm": 0.16753801703453064, "learning_rate": 1e-05, "loss": 0.5615, "step": 3659 }, { "epoch": 1.0127309209160729, "grad_norm": 0.17469660937786102, "learning_rate": 1e-05, "loss": 0.5144, "step": 3660 }, { "epoch": 1.0130076800664223, "grad_norm": 0.17392659187316895, "learning_rate": 1e-05, "loss": 0.5252, "step": 3661 }, { "epoch": 1.0132844392167717, "grad_norm": 0.1649034172296524, "learning_rate": 1e-05, "loss": 0.55, "step": 3662 }, { "epoch": 1.013561198367121, "grad_norm": 0.16982442140579224, "learning_rate": 1e-05, "loss": 0.5419, "step": 3663 }, { "epoch": 1.0138379575174705, "grad_norm": 0.16972438991069794, "learning_rate": 1e-05, "loss": 0.5621, "step": 3664 }, { "epoch": 1.01411471666782, "grad_norm": 0.17479851841926575, "learning_rate": 1e-05, "loss": 0.5438, "step": 3665 }, { "epoch": 1.0143914758181691, "grad_norm": 0.16780491173267365, "learning_rate": 1e-05, "loss": 0.5089, "step": 3666 }, { "epoch": 1.0146682349685185, "grad_norm": 0.16349555552005768, "learning_rate": 1e-05, "loss": 0.5137, "step": 3667 }, { "epoch": 1.014944994118868, "grad_norm": 0.16430974006652832, "learning_rate": 1e-05, "loss": 0.5235, "step": 3668 }, { "epoch": 1.0152217532692174, "grad_norm": 0.16545265913009644, "learning_rate": 1e-05, "loss": 0.5175, "step": 3669 }, { "epoch": 1.0154985124195668, "grad_norm": 0.17449548840522766, "learning_rate": 1e-05, "loss": 0.5294, "step": 3670 }, { "epoch": 1.0157752715699162, "grad_norm": 0.17024005949497223, "learning_rate": 1e-05, "loss": 0.5488, "step": 3671 }, { "epoch": 1.0160520307202656, "grad_norm": 0.16056880354881287, "learning_rate": 1e-05, "loss": 0.529, "step": 3672 }, { "epoch": 1.016328789870615, "grad_norm": 0.17867863178253174, "learning_rate": 1e-05, "loss": 0.572, "step": 3673 }, { "epoch": 1.0166055490209644, "grad_norm": 0.17195114493370056, "learning_rate": 1e-05, "loss": 0.5449, "step": 3674 }, { "epoch": 1.0168823081713139, "grad_norm": 0.16949765384197235, "learning_rate": 1e-05, "loss": 0.5359, "step": 3675 }, { "epoch": 1.0171590673216633, "grad_norm": 0.1794911026954651, "learning_rate": 1e-05, "loss": 0.5632, "step": 3676 }, { "epoch": 1.0174358264720127, "grad_norm": 0.1717853546142578, "learning_rate": 1e-05, "loss": 0.504, "step": 3677 }, { "epoch": 1.017712585622362, "grad_norm": 0.17529436945915222, "learning_rate": 1e-05, "loss": 0.5296, "step": 3678 }, { "epoch": 1.0179893447727115, "grad_norm": 0.1781584918498993, "learning_rate": 1e-05, "loss": 0.5322, "step": 3679 }, { "epoch": 1.018266103923061, "grad_norm": 0.17214755713939667, "learning_rate": 1e-05, "loss": 0.5035, "step": 3680 }, { "epoch": 1.0185428630734104, "grad_norm": 0.17302533984184265, "learning_rate": 1e-05, "loss": 0.5237, "step": 3681 }, { "epoch": 1.0188196222237598, "grad_norm": 0.17131632566452026, "learning_rate": 1e-05, "loss": 0.537, "step": 3682 }, { "epoch": 1.0190963813741092, "grad_norm": 0.1702253669500351, "learning_rate": 1e-05, "loss": 0.5145, "step": 3683 }, { "epoch": 1.0193731405244586, "grad_norm": 0.16818369925022125, "learning_rate": 1e-05, "loss": 0.551, "step": 3684 }, { "epoch": 1.019649899674808, "grad_norm": 0.1663605272769928, "learning_rate": 1e-05, "loss": 0.5367, "step": 3685 }, { "epoch": 1.0199266588251574, "grad_norm": 0.17731544375419617, "learning_rate": 1e-05, "loss": 0.5429, "step": 3686 }, { "epoch": 1.0202034179755068, "grad_norm": 0.16770899295806885, "learning_rate": 1e-05, "loss": 0.5143, "step": 3687 }, { "epoch": 1.0204801771258563, "grad_norm": 0.16413912177085876, "learning_rate": 1e-05, "loss": 0.5152, "step": 3688 }, { "epoch": 1.0207569362762057, "grad_norm": 0.16823819279670715, "learning_rate": 1e-05, "loss": 0.5486, "step": 3689 }, { "epoch": 1.021033695426555, "grad_norm": 0.17645135521888733, "learning_rate": 1e-05, "loss": 0.5434, "step": 3690 }, { "epoch": 1.0213104545769045, "grad_norm": 0.1849326491355896, "learning_rate": 1e-05, "loss": 0.5326, "step": 3691 }, { "epoch": 1.021587213727254, "grad_norm": 0.17081326246261597, "learning_rate": 1e-05, "loss": 0.5358, "step": 3692 }, { "epoch": 1.0218639728776033, "grad_norm": 0.16840530931949615, "learning_rate": 1e-05, "loss": 0.5583, "step": 3693 }, { "epoch": 1.0221407320279527, "grad_norm": 0.17947471141815186, "learning_rate": 1e-05, "loss": 0.4906, "step": 3694 }, { "epoch": 1.0224174911783022, "grad_norm": 0.16708627343177795, "learning_rate": 1e-05, "loss": 0.5348, "step": 3695 }, { "epoch": 1.0226942503286516, "grad_norm": 0.17549946904182434, "learning_rate": 1e-05, "loss": 0.556, "step": 3696 }, { "epoch": 1.022971009479001, "grad_norm": 0.17752443253993988, "learning_rate": 1e-05, "loss": 0.5506, "step": 3697 }, { "epoch": 1.0232477686293504, "grad_norm": 0.1726832538843155, "learning_rate": 1e-05, "loss": 0.5583, "step": 3698 }, { "epoch": 1.0235245277796998, "grad_norm": 0.17151376605033875, "learning_rate": 1e-05, "loss": 0.5342, "step": 3699 }, { "epoch": 1.0238012869300492, "grad_norm": 0.1593811810016632, "learning_rate": 1e-05, "loss": 0.501, "step": 3700 }, { "epoch": 1.0240780460803984, "grad_norm": 0.18029974400997162, "learning_rate": 1e-05, "loss": 0.5203, "step": 3701 }, { "epoch": 1.0243548052307478, "grad_norm": 0.172781303524971, "learning_rate": 1e-05, "loss": 0.5259, "step": 3702 }, { "epoch": 1.0246315643810973, "grad_norm": 0.167284294962883, "learning_rate": 1e-05, "loss": 0.525, "step": 3703 }, { "epoch": 1.0249083235314467, "grad_norm": 0.1652655005455017, "learning_rate": 1e-05, "loss": 0.5048, "step": 3704 }, { "epoch": 1.025185082681796, "grad_norm": 0.16284741461277008, "learning_rate": 1e-05, "loss": 0.5391, "step": 3705 }, { "epoch": 1.0254618418321455, "grad_norm": 0.17112019658088684, "learning_rate": 1e-05, "loss": 0.5147, "step": 3706 }, { "epoch": 1.025738600982495, "grad_norm": 0.18300481140613556, "learning_rate": 1e-05, "loss": 0.5289, "step": 3707 }, { "epoch": 1.0260153601328443, "grad_norm": 0.17054148018360138, "learning_rate": 1e-05, "loss": 0.5308, "step": 3708 }, { "epoch": 1.0262921192831937, "grad_norm": 0.1819465309381485, "learning_rate": 1e-05, "loss": 0.546, "step": 3709 }, { "epoch": 1.0265688784335432, "grad_norm": 0.18287694454193115, "learning_rate": 1e-05, "loss": 0.5434, "step": 3710 }, { "epoch": 1.0268456375838926, "grad_norm": 0.166224405169487, "learning_rate": 1e-05, "loss": 0.5242, "step": 3711 }, { "epoch": 1.027122396734242, "grad_norm": 0.17898425459861755, "learning_rate": 1e-05, "loss": 0.5179, "step": 3712 }, { "epoch": 1.0273991558845914, "grad_norm": 0.1707315444946289, "learning_rate": 1e-05, "loss": 0.5283, "step": 3713 }, { "epoch": 1.0276759150349408, "grad_norm": 0.1716984063386917, "learning_rate": 1e-05, "loss": 0.5156, "step": 3714 }, { "epoch": 1.0279526741852902, "grad_norm": 0.17470920085906982, "learning_rate": 1e-05, "loss": 0.5066, "step": 3715 }, { "epoch": 1.0282294333356397, "grad_norm": 0.1737779676914215, "learning_rate": 1e-05, "loss": 0.5358, "step": 3716 }, { "epoch": 1.028506192485989, "grad_norm": 0.18384715914726257, "learning_rate": 1e-05, "loss": 0.5157, "step": 3717 }, { "epoch": 1.0287829516363385, "grad_norm": 0.17775560915470123, "learning_rate": 1e-05, "loss": 0.5276, "step": 3718 }, { "epoch": 1.029059710786688, "grad_norm": 0.16766174137592316, "learning_rate": 1e-05, "loss": 0.5147, "step": 3719 }, { "epoch": 1.0293364699370373, "grad_norm": 0.17187778651714325, "learning_rate": 1e-05, "loss": 0.549, "step": 3720 }, { "epoch": 1.0296132290873867, "grad_norm": 0.16918475925922394, "learning_rate": 1e-05, "loss": 0.5241, "step": 3721 }, { "epoch": 1.0298899882377361, "grad_norm": 0.18188147246837616, "learning_rate": 1e-05, "loss": 0.5483, "step": 3722 }, { "epoch": 1.0301667473880856, "grad_norm": 0.16736213862895966, "learning_rate": 1e-05, "loss": 0.5407, "step": 3723 }, { "epoch": 1.030443506538435, "grad_norm": 0.17081904411315918, "learning_rate": 1e-05, "loss": 0.5063, "step": 3724 }, { "epoch": 1.0307202656887844, "grad_norm": 0.1646646410226822, "learning_rate": 1e-05, "loss": 0.5306, "step": 3725 }, { "epoch": 1.0309970248391338, "grad_norm": 0.17848549783229828, "learning_rate": 1e-05, "loss": 0.5013, "step": 3726 }, { "epoch": 1.0312737839894832, "grad_norm": 0.16905786097049713, "learning_rate": 1e-05, "loss": 0.4985, "step": 3727 }, { "epoch": 1.0315505431398326, "grad_norm": 0.17394641041755676, "learning_rate": 1e-05, "loss": 0.5299, "step": 3728 }, { "epoch": 1.031827302290182, "grad_norm": 0.1764105260372162, "learning_rate": 1e-05, "loss": 0.5143, "step": 3729 }, { "epoch": 1.0321040614405315, "grad_norm": 0.17228826880455017, "learning_rate": 1e-05, "loss": 0.5003, "step": 3730 }, { "epoch": 1.0323808205908809, "grad_norm": 0.1657646745443344, "learning_rate": 1e-05, "loss": 0.5189, "step": 3731 }, { "epoch": 1.0326575797412303, "grad_norm": 0.1609165519475937, "learning_rate": 1e-05, "loss": 0.499, "step": 3732 }, { "epoch": 1.0329343388915797, "grad_norm": 0.1667010635137558, "learning_rate": 1e-05, "loss": 0.512, "step": 3733 }, { "epoch": 1.033211098041929, "grad_norm": 0.17265327274799347, "learning_rate": 1e-05, "loss": 0.5432, "step": 3734 }, { "epoch": 1.0334878571922783, "grad_norm": 0.1717497557401657, "learning_rate": 1e-05, "loss": 0.5304, "step": 3735 }, { "epoch": 1.0337646163426277, "grad_norm": 0.1694965660572052, "learning_rate": 1e-05, "loss": 0.5074, "step": 3736 }, { "epoch": 1.0340413754929771, "grad_norm": 0.16672742366790771, "learning_rate": 1e-05, "loss": 0.5086, "step": 3737 }, { "epoch": 1.0343181346433266, "grad_norm": 0.17293193936347961, "learning_rate": 1e-05, "loss": 0.5276, "step": 3738 }, { "epoch": 1.034594893793676, "grad_norm": 0.17026087641716003, "learning_rate": 1e-05, "loss": 0.5393, "step": 3739 }, { "epoch": 1.0348716529440254, "grad_norm": 0.1740133911371231, "learning_rate": 1e-05, "loss": 0.5477, "step": 3740 }, { "epoch": 1.0351484120943748, "grad_norm": 0.17486630380153656, "learning_rate": 1e-05, "loss": 0.5361, "step": 3741 }, { "epoch": 1.0354251712447242, "grad_norm": 0.17075933516025543, "learning_rate": 1e-05, "loss": 0.5543, "step": 3742 }, { "epoch": 1.0357019303950736, "grad_norm": 0.16469399631023407, "learning_rate": 1e-05, "loss": 0.558, "step": 3743 }, { "epoch": 1.035978689545423, "grad_norm": 0.1692454218864441, "learning_rate": 1e-05, "loss": 0.5066, "step": 3744 }, { "epoch": 1.0362554486957725, "grad_norm": 0.176067054271698, "learning_rate": 1e-05, "loss": 0.5317, "step": 3745 }, { "epoch": 1.0365322078461219, "grad_norm": 0.17846925556659698, "learning_rate": 1e-05, "loss": 0.5374, "step": 3746 }, { "epoch": 1.0368089669964713, "grad_norm": 0.17296992242336273, "learning_rate": 1e-05, "loss": 0.5037, "step": 3747 }, { "epoch": 1.0370857261468207, "grad_norm": 0.16974127292633057, "learning_rate": 1e-05, "loss": 0.5326, "step": 3748 }, { "epoch": 1.0373624852971701, "grad_norm": 0.16531811654567719, "learning_rate": 1e-05, "loss": 0.5106, "step": 3749 }, { "epoch": 1.0376392444475195, "grad_norm": 0.1611355096101761, "learning_rate": 1e-05, "loss": 0.496, "step": 3750 }, { "epoch": 1.037916003597869, "grad_norm": 0.17294961214065552, "learning_rate": 1e-05, "loss": 0.5452, "step": 3751 }, { "epoch": 1.0381927627482184, "grad_norm": 0.1703031361103058, "learning_rate": 1e-05, "loss": 0.5279, "step": 3752 }, { "epoch": 1.0384695218985678, "grad_norm": 0.17370623350143433, "learning_rate": 1e-05, "loss": 0.541, "step": 3753 }, { "epoch": 1.0387462810489172, "grad_norm": 0.1758372038602829, "learning_rate": 1e-05, "loss": 0.5052, "step": 3754 }, { "epoch": 1.0390230401992666, "grad_norm": 0.18007460236549377, "learning_rate": 1e-05, "loss": 0.5263, "step": 3755 }, { "epoch": 1.039299799349616, "grad_norm": 0.17565254867076874, "learning_rate": 1e-05, "loss": 0.5108, "step": 3756 }, { "epoch": 1.0395765584999654, "grad_norm": 0.16599008440971375, "learning_rate": 1e-05, "loss": 0.5452, "step": 3757 }, { "epoch": 1.0398533176503149, "grad_norm": 0.17035403847694397, "learning_rate": 1e-05, "loss": 0.5097, "step": 3758 }, { "epoch": 1.0401300768006643, "grad_norm": 0.17640703916549683, "learning_rate": 1e-05, "loss": 0.5341, "step": 3759 }, { "epoch": 1.0404068359510137, "grad_norm": 0.16365770995616913, "learning_rate": 1e-05, "loss": 0.5244, "step": 3760 }, { "epoch": 1.040683595101363, "grad_norm": 0.17723345756530762, "learning_rate": 1e-05, "loss": 0.5523, "step": 3761 }, { "epoch": 1.0409603542517125, "grad_norm": 0.1639426052570343, "learning_rate": 1e-05, "loss": 0.4838, "step": 3762 }, { "epoch": 1.041237113402062, "grad_norm": 0.17396296560764313, "learning_rate": 1e-05, "loss": 0.5016, "step": 3763 }, { "epoch": 1.0415138725524113, "grad_norm": 0.16535323858261108, "learning_rate": 1e-05, "loss": 0.505, "step": 3764 }, { "epoch": 1.0417906317027608, "grad_norm": 0.17635810375213623, "learning_rate": 1e-05, "loss": 0.5098, "step": 3765 }, { "epoch": 1.0420673908531102, "grad_norm": 0.1723949909210205, "learning_rate": 1e-05, "loss": 0.5237, "step": 3766 }, { "epoch": 1.0423441500034596, "grad_norm": 0.17498955130577087, "learning_rate": 1e-05, "loss": 0.5127, "step": 3767 }, { "epoch": 1.042620909153809, "grad_norm": 0.1653994619846344, "learning_rate": 1e-05, "loss": 0.5203, "step": 3768 }, { "epoch": 1.0428976683041582, "grad_norm": 0.17172271013259888, "learning_rate": 1e-05, "loss": 0.5244, "step": 3769 }, { "epoch": 1.0431744274545076, "grad_norm": 0.1684475690126419, "learning_rate": 1e-05, "loss": 0.5203, "step": 3770 }, { "epoch": 1.043451186604857, "grad_norm": 0.17128080129623413, "learning_rate": 1e-05, "loss": 0.5245, "step": 3771 }, { "epoch": 1.0437279457552064, "grad_norm": 0.17172189056873322, "learning_rate": 1e-05, "loss": 0.533, "step": 3772 }, { "epoch": 1.0440047049055559, "grad_norm": 0.1726509928703308, "learning_rate": 1e-05, "loss": 0.4939, "step": 3773 }, { "epoch": 1.0442814640559053, "grad_norm": 0.16536180675029755, "learning_rate": 1e-05, "loss": 0.5263, "step": 3774 }, { "epoch": 1.0445582232062547, "grad_norm": 0.16790342330932617, "learning_rate": 1e-05, "loss": 0.522, "step": 3775 }, { "epoch": 1.044834982356604, "grad_norm": 0.1745743751525879, "learning_rate": 1e-05, "loss": 0.5175, "step": 3776 }, { "epoch": 1.0451117415069535, "grad_norm": 0.1638154834508896, "learning_rate": 1e-05, "loss": 0.4995, "step": 3777 }, { "epoch": 1.045388500657303, "grad_norm": 0.16249670088291168, "learning_rate": 1e-05, "loss": 0.4991, "step": 3778 }, { "epoch": 1.0456652598076523, "grad_norm": 0.17314192652702332, "learning_rate": 1e-05, "loss": 0.5438, "step": 3779 }, { "epoch": 1.0459420189580018, "grad_norm": 0.16754205524921417, "learning_rate": 1e-05, "loss": 0.5298, "step": 3780 }, { "epoch": 1.0462187781083512, "grad_norm": 0.1706458032131195, "learning_rate": 1e-05, "loss": 0.5313, "step": 3781 }, { "epoch": 1.0464955372587006, "grad_norm": 0.17701727151870728, "learning_rate": 1e-05, "loss": 0.537, "step": 3782 }, { "epoch": 1.04677229640905, "grad_norm": 0.16511736810207367, "learning_rate": 1e-05, "loss": 0.5147, "step": 3783 }, { "epoch": 1.0470490555593994, "grad_norm": 0.1751195639371872, "learning_rate": 1e-05, "loss": 0.5218, "step": 3784 }, { "epoch": 1.0473258147097488, "grad_norm": 0.17187879979610443, "learning_rate": 1e-05, "loss": 0.5115, "step": 3785 }, { "epoch": 1.0476025738600983, "grad_norm": 0.17827503383159637, "learning_rate": 1e-05, "loss": 0.5367, "step": 3786 }, { "epoch": 1.0478793330104477, "grad_norm": 0.17982681095600128, "learning_rate": 1e-05, "loss": 0.5606, "step": 3787 }, { "epoch": 1.048156092160797, "grad_norm": 0.17143718898296356, "learning_rate": 1e-05, "loss": 0.5284, "step": 3788 }, { "epoch": 1.0484328513111465, "grad_norm": 0.17282956838607788, "learning_rate": 1e-05, "loss": 0.5089, "step": 3789 }, { "epoch": 1.048709610461496, "grad_norm": 0.16633853316307068, "learning_rate": 1e-05, "loss": 0.5321, "step": 3790 }, { "epoch": 1.0489863696118453, "grad_norm": 0.17347338795661926, "learning_rate": 1e-05, "loss": 0.5364, "step": 3791 }, { "epoch": 1.0492631287621947, "grad_norm": 0.17690832912921906, "learning_rate": 1e-05, "loss": 0.51, "step": 3792 }, { "epoch": 1.0495398879125442, "grad_norm": 0.16255837678909302, "learning_rate": 1e-05, "loss": 0.5407, "step": 3793 }, { "epoch": 1.0498166470628936, "grad_norm": 1.2757257223129272, "learning_rate": 1e-05, "loss": 0.4858, "step": 3794 }, { "epoch": 1.050093406213243, "grad_norm": 0.16105104982852936, "learning_rate": 1e-05, "loss": 0.4817, "step": 3795 }, { "epoch": 1.0503701653635924, "grad_norm": 0.1659732311964035, "learning_rate": 1e-05, "loss": 0.524, "step": 3796 }, { "epoch": 1.0506469245139418, "grad_norm": 0.17383232712745667, "learning_rate": 1e-05, "loss": 0.5073, "step": 3797 }, { "epoch": 1.0509236836642912, "grad_norm": 0.1711413711309433, "learning_rate": 1e-05, "loss": 0.5037, "step": 3798 }, { "epoch": 1.0512004428146406, "grad_norm": 0.1658843457698822, "learning_rate": 1e-05, "loss": 0.5104, "step": 3799 }, { "epoch": 1.05147720196499, "grad_norm": 0.16376768052577972, "learning_rate": 1e-05, "loss": 0.5189, "step": 3800 }, { "epoch": 1.0517539611153395, "grad_norm": 0.1793701946735382, "learning_rate": 1e-05, "loss": 0.4984, "step": 3801 }, { "epoch": 1.052030720265689, "grad_norm": 0.16791008412837982, "learning_rate": 1e-05, "loss": 0.5063, "step": 3802 }, { "epoch": 1.0523074794160383, "grad_norm": 0.1745382845401764, "learning_rate": 1e-05, "loss": 0.5442, "step": 3803 }, { "epoch": 1.0525842385663875, "grad_norm": 0.180571511387825, "learning_rate": 1e-05, "loss": 0.5028, "step": 3804 }, { "epoch": 1.052860997716737, "grad_norm": 0.17454473674297333, "learning_rate": 1e-05, "loss": 0.5181, "step": 3805 }, { "epoch": 1.0531377568670863, "grad_norm": 0.17271283268928528, "learning_rate": 1e-05, "loss": 0.5032, "step": 3806 }, { "epoch": 1.0534145160174357, "grad_norm": 0.17826732993125916, "learning_rate": 1e-05, "loss": 0.4953, "step": 3807 }, { "epoch": 1.0536912751677852, "grad_norm": 0.170669287443161, "learning_rate": 1e-05, "loss": 0.5519, "step": 3808 }, { "epoch": 1.0539680343181346, "grad_norm": 0.1705491542816162, "learning_rate": 1e-05, "loss": 0.5223, "step": 3809 }, { "epoch": 1.054244793468484, "grad_norm": 0.17483235895633698, "learning_rate": 1e-05, "loss": 0.5508, "step": 3810 }, { "epoch": 1.0545215526188334, "grad_norm": 0.17351382970809937, "learning_rate": 1e-05, "loss": 0.5347, "step": 3811 }, { "epoch": 1.0547983117691828, "grad_norm": 0.1689210683107376, "learning_rate": 1e-05, "loss": 0.5129, "step": 3812 }, { "epoch": 1.0550750709195322, "grad_norm": 0.1804484724998474, "learning_rate": 1e-05, "loss": 0.4973, "step": 3813 }, { "epoch": 1.0553518300698816, "grad_norm": 0.17068852484226227, "learning_rate": 1e-05, "loss": 0.5349, "step": 3814 }, { "epoch": 1.055628589220231, "grad_norm": 0.17645716667175293, "learning_rate": 1e-05, "loss": 0.5273, "step": 3815 }, { "epoch": 1.0559053483705805, "grad_norm": 0.17989499866962433, "learning_rate": 1e-05, "loss": 0.5146, "step": 3816 }, { "epoch": 1.05618210752093, "grad_norm": 0.1750141829252243, "learning_rate": 1e-05, "loss": 0.5295, "step": 3817 }, { "epoch": 1.0564588666712793, "grad_norm": 0.17169634997844696, "learning_rate": 1e-05, "loss": 0.5341, "step": 3818 }, { "epoch": 1.0567356258216287, "grad_norm": 0.17300809919834137, "learning_rate": 1e-05, "loss": 0.5115, "step": 3819 }, { "epoch": 1.0570123849719781, "grad_norm": 0.1675388365983963, "learning_rate": 1e-05, "loss": 0.5151, "step": 3820 }, { "epoch": 1.0572891441223276, "grad_norm": 0.17571160197257996, "learning_rate": 1e-05, "loss": 0.5116, "step": 3821 }, { "epoch": 1.057565903272677, "grad_norm": 0.17203490436077118, "learning_rate": 1e-05, "loss": 0.5021, "step": 3822 }, { "epoch": 1.0578426624230264, "grad_norm": 0.16938450932502747, "learning_rate": 1e-05, "loss": 0.511, "step": 3823 }, { "epoch": 1.0581194215733758, "grad_norm": 0.17335069179534912, "learning_rate": 1e-05, "loss": 0.4953, "step": 3824 }, { "epoch": 1.0583961807237252, "grad_norm": 0.171132892370224, "learning_rate": 1e-05, "loss": 0.503, "step": 3825 }, { "epoch": 1.0586729398740746, "grad_norm": 0.16832388937473297, "learning_rate": 1e-05, "loss": 0.499, "step": 3826 }, { "epoch": 1.058949699024424, "grad_norm": 0.1708265095949173, "learning_rate": 1e-05, "loss": 0.5262, "step": 3827 }, { "epoch": 1.0592264581747735, "grad_norm": 0.1812867522239685, "learning_rate": 1e-05, "loss": 0.5029, "step": 3828 }, { "epoch": 1.0595032173251229, "grad_norm": 0.16918474435806274, "learning_rate": 1e-05, "loss": 0.4979, "step": 3829 }, { "epoch": 1.0597799764754723, "grad_norm": 0.16443578898906708, "learning_rate": 1e-05, "loss": 0.5069, "step": 3830 }, { "epoch": 1.0600567356258217, "grad_norm": 0.17422865331172943, "learning_rate": 1e-05, "loss": 0.5523, "step": 3831 }, { "epoch": 1.0603334947761711, "grad_norm": 0.17174972593784332, "learning_rate": 1e-05, "loss": 0.506, "step": 3832 }, { "epoch": 1.0606102539265205, "grad_norm": 0.17448990046977997, "learning_rate": 1e-05, "loss": 0.5279, "step": 3833 }, { "epoch": 1.06088701307687, "grad_norm": 0.18012084066867828, "learning_rate": 1e-05, "loss": 0.5118, "step": 3834 }, { "epoch": 1.0611637722272194, "grad_norm": 0.1793694645166397, "learning_rate": 1e-05, "loss": 0.5381, "step": 3835 }, { "epoch": 1.0614405313775688, "grad_norm": 0.17197869718074799, "learning_rate": 1e-05, "loss": 0.5202, "step": 3836 }, { "epoch": 1.061717290527918, "grad_norm": 0.1606757938861847, "learning_rate": 1e-05, "loss": 0.4882, "step": 3837 }, { "epoch": 1.0619940496782674, "grad_norm": 0.1666472852230072, "learning_rate": 1e-05, "loss": 0.5126, "step": 3838 }, { "epoch": 1.0622708088286168, "grad_norm": 0.17480847239494324, "learning_rate": 1e-05, "loss": 0.5224, "step": 3839 }, { "epoch": 1.0625475679789662, "grad_norm": 0.16451847553253174, "learning_rate": 1e-05, "loss": 0.5546, "step": 3840 }, { "epoch": 1.0628243271293156, "grad_norm": 0.16199113428592682, "learning_rate": 1e-05, "loss": 0.4979, "step": 3841 }, { "epoch": 1.063101086279665, "grad_norm": 0.1608266830444336, "learning_rate": 1e-05, "loss": 0.4859, "step": 3842 }, { "epoch": 1.0633778454300145, "grad_norm": 0.1607472002506256, "learning_rate": 1e-05, "loss": 0.5003, "step": 3843 }, { "epoch": 1.0636546045803639, "grad_norm": 0.1649647355079651, "learning_rate": 1e-05, "loss": 0.5035, "step": 3844 }, { "epoch": 1.0639313637307133, "grad_norm": 0.1623869091272354, "learning_rate": 1e-05, "loss": 0.4903, "step": 3845 }, { "epoch": 1.0642081228810627, "grad_norm": 0.16854099929332733, "learning_rate": 1e-05, "loss": 0.5093, "step": 3846 }, { "epoch": 1.0644848820314121, "grad_norm": 0.17102287709712982, "learning_rate": 1e-05, "loss": 0.5038, "step": 3847 }, { "epoch": 1.0647616411817615, "grad_norm": 0.17976215481758118, "learning_rate": 1e-05, "loss": 0.5325, "step": 3848 }, { "epoch": 1.065038400332111, "grad_norm": 0.16933703422546387, "learning_rate": 1e-05, "loss": 0.5184, "step": 3849 }, { "epoch": 1.0653151594824604, "grad_norm": 0.1743323802947998, "learning_rate": 1e-05, "loss": 0.5151, "step": 3850 }, { "epoch": 1.0655919186328098, "grad_norm": 0.17314733564853668, "learning_rate": 1e-05, "loss": 0.5507, "step": 3851 }, { "epoch": 1.0658686777831592, "grad_norm": 0.1735357940196991, "learning_rate": 1e-05, "loss": 0.5248, "step": 3852 }, { "epoch": 1.0661454369335086, "grad_norm": 0.16982653737068176, "learning_rate": 1e-05, "loss": 0.5029, "step": 3853 }, { "epoch": 1.066422196083858, "grad_norm": 0.1668095886707306, "learning_rate": 1e-05, "loss": 0.5188, "step": 3854 }, { "epoch": 1.0666989552342074, "grad_norm": 0.17244157195091248, "learning_rate": 1e-05, "loss": 0.5061, "step": 3855 }, { "epoch": 1.0669757143845569, "grad_norm": 0.17418572306632996, "learning_rate": 1e-05, "loss": 0.5204, "step": 3856 }, { "epoch": 1.0672524735349063, "grad_norm": 0.16743910312652588, "learning_rate": 1e-05, "loss": 0.506, "step": 3857 }, { "epoch": 1.0675292326852557, "grad_norm": 0.16913504898548126, "learning_rate": 1e-05, "loss": 0.541, "step": 3858 }, { "epoch": 1.067805991835605, "grad_norm": 0.17637784779071808, "learning_rate": 1e-05, "loss": 0.5463, "step": 3859 }, { "epoch": 1.0680827509859545, "grad_norm": 0.1778058558702469, "learning_rate": 1e-05, "loss": 0.5206, "step": 3860 }, { "epoch": 1.068359510136304, "grad_norm": 0.17958419024944305, "learning_rate": 1e-05, "loss": 0.5009, "step": 3861 }, { "epoch": 1.0686362692866533, "grad_norm": 0.1831725388765335, "learning_rate": 1e-05, "loss": 0.5137, "step": 3862 }, { "epoch": 1.0689130284370028, "grad_norm": 0.17097756266593933, "learning_rate": 1e-05, "loss": 0.4981, "step": 3863 }, { "epoch": 1.0691897875873522, "grad_norm": 0.1659739762544632, "learning_rate": 1e-05, "loss": 0.5152, "step": 3864 }, { "epoch": 1.0694665467377016, "grad_norm": 0.16541282832622528, "learning_rate": 1e-05, "loss": 0.4954, "step": 3865 }, { "epoch": 1.069743305888051, "grad_norm": 0.17133717238903046, "learning_rate": 1e-05, "loss": 0.5342, "step": 3866 }, { "epoch": 1.0700200650384004, "grad_norm": 0.17288516461849213, "learning_rate": 1e-05, "loss": 0.5045, "step": 3867 }, { "epoch": 1.0702968241887498, "grad_norm": 0.17091144621372223, "learning_rate": 1e-05, "loss": 0.5319, "step": 3868 }, { "epoch": 1.0705735833390992, "grad_norm": 0.17433258891105652, "learning_rate": 1e-05, "loss": 0.5357, "step": 3869 }, { "epoch": 1.0708503424894487, "grad_norm": 0.176503524184227, "learning_rate": 1e-05, "loss": 0.5253, "step": 3870 }, { "epoch": 1.071127101639798, "grad_norm": 0.17366451025009155, "learning_rate": 1e-05, "loss": 0.5016, "step": 3871 }, { "epoch": 1.0714038607901473, "grad_norm": 0.17165929079055786, "learning_rate": 1e-05, "loss": 0.5233, "step": 3872 }, { "epoch": 1.0716806199404967, "grad_norm": 0.1724453866481781, "learning_rate": 1e-05, "loss": 0.5119, "step": 3873 }, { "epoch": 1.071957379090846, "grad_norm": 0.16842982172966003, "learning_rate": 1e-05, "loss": 0.5407, "step": 3874 }, { "epoch": 1.0722341382411955, "grad_norm": 0.17151065170764923, "learning_rate": 1e-05, "loss": 0.4973, "step": 3875 }, { "epoch": 1.072510897391545, "grad_norm": 0.17102548480033875, "learning_rate": 1e-05, "loss": 0.4912, "step": 3876 }, { "epoch": 1.0727876565418943, "grad_norm": 0.16499359905719757, "learning_rate": 1e-05, "loss": 0.4961, "step": 3877 }, { "epoch": 1.0730644156922438, "grad_norm": 0.1705777496099472, "learning_rate": 1e-05, "loss": 0.4945, "step": 3878 }, { "epoch": 1.0733411748425932, "grad_norm": 0.17541418969631195, "learning_rate": 1e-05, "loss": 0.5271, "step": 3879 }, { "epoch": 1.0736179339929426, "grad_norm": 0.17590366303920746, "learning_rate": 1e-05, "loss": 0.5106, "step": 3880 }, { "epoch": 1.073894693143292, "grad_norm": 0.16521532833576202, "learning_rate": 1e-05, "loss": 0.5223, "step": 3881 }, { "epoch": 1.0741714522936414, "grad_norm": 0.1760053038597107, "learning_rate": 1e-05, "loss": 0.5216, "step": 3882 }, { "epoch": 1.0744482114439908, "grad_norm": 0.17275340855121613, "learning_rate": 1e-05, "loss": 0.5137, "step": 3883 }, { "epoch": 1.0747249705943402, "grad_norm": 0.16915255784988403, "learning_rate": 1e-05, "loss": 0.546, "step": 3884 }, { "epoch": 1.0750017297446897, "grad_norm": 0.1789306104183197, "learning_rate": 1e-05, "loss": 0.5232, "step": 3885 }, { "epoch": 1.075278488895039, "grad_norm": 0.16968898475170135, "learning_rate": 1e-05, "loss": 0.5167, "step": 3886 }, { "epoch": 1.0755552480453885, "grad_norm": 0.16860705614089966, "learning_rate": 1e-05, "loss": 0.5086, "step": 3887 }, { "epoch": 1.075832007195738, "grad_norm": 0.1705164760351181, "learning_rate": 1e-05, "loss": 0.5125, "step": 3888 }, { "epoch": 1.0761087663460873, "grad_norm": 0.17659853398799896, "learning_rate": 1e-05, "loss": 0.5415, "step": 3889 }, { "epoch": 1.0763855254964367, "grad_norm": 0.16412471234798431, "learning_rate": 1e-05, "loss": 0.5325, "step": 3890 }, { "epoch": 1.0766622846467861, "grad_norm": 0.1647065281867981, "learning_rate": 1e-05, "loss": 0.4897, "step": 3891 }, { "epoch": 1.0769390437971356, "grad_norm": 0.17734752595424652, "learning_rate": 1e-05, "loss": 0.515, "step": 3892 }, { "epoch": 1.077215802947485, "grad_norm": 0.16683010756969452, "learning_rate": 1e-05, "loss": 0.5182, "step": 3893 }, { "epoch": 1.0774925620978344, "grad_norm": 0.1748536378145218, "learning_rate": 1e-05, "loss": 0.5058, "step": 3894 }, { "epoch": 1.0777693212481838, "grad_norm": 0.17044177651405334, "learning_rate": 1e-05, "loss": 0.5193, "step": 3895 }, { "epoch": 1.0780460803985332, "grad_norm": 0.16322652995586395, "learning_rate": 1e-05, "loss": 0.5375, "step": 3896 }, { "epoch": 1.0783228395488826, "grad_norm": 0.1740478277206421, "learning_rate": 1e-05, "loss": 0.5586, "step": 3897 }, { "epoch": 1.078599598699232, "grad_norm": 0.16514870524406433, "learning_rate": 1e-05, "loss": 0.5207, "step": 3898 }, { "epoch": 1.0788763578495815, "grad_norm": 0.16741570830345154, "learning_rate": 1e-05, "loss": 0.5025, "step": 3899 }, { "epoch": 1.0791531169999309, "grad_norm": 0.16411729156970978, "learning_rate": 1e-05, "loss": 0.5054, "step": 3900 }, { "epoch": 1.0794298761502803, "grad_norm": 0.1654236614704132, "learning_rate": 1e-05, "loss": 0.5029, "step": 3901 }, { "epoch": 1.0797066353006297, "grad_norm": 0.17457766830921173, "learning_rate": 1e-05, "loss": 0.5176, "step": 3902 }, { "epoch": 1.0799833944509791, "grad_norm": 0.16673055291175842, "learning_rate": 1e-05, "loss": 0.4939, "step": 3903 }, { "epoch": 1.0802601536013285, "grad_norm": 0.1733260601758957, "learning_rate": 1e-05, "loss": 0.531, "step": 3904 }, { "epoch": 1.0805369127516777, "grad_norm": 0.17371343076229095, "learning_rate": 1e-05, "loss": 0.4998, "step": 3905 }, { "epoch": 1.0808136719020274, "grad_norm": 0.17402607202529907, "learning_rate": 1e-05, "loss": 0.5301, "step": 3906 }, { "epoch": 1.0810904310523766, "grad_norm": 0.17882195115089417, "learning_rate": 1e-05, "loss": 0.5047, "step": 3907 }, { "epoch": 1.081367190202726, "grad_norm": 0.17091892659664154, "learning_rate": 1e-05, "loss": 0.5082, "step": 3908 }, { "epoch": 1.0816439493530754, "grad_norm": 0.1793992817401886, "learning_rate": 1e-05, "loss": 0.5349, "step": 3909 }, { "epoch": 1.0819207085034248, "grad_norm": 0.1627446562051773, "learning_rate": 1e-05, "loss": 0.5061, "step": 3910 }, { "epoch": 1.0821974676537742, "grad_norm": 0.16984744369983673, "learning_rate": 1e-05, "loss": 0.5501, "step": 3911 }, { "epoch": 1.0824742268041236, "grad_norm": 0.16982217133045197, "learning_rate": 1e-05, "loss": 0.5237, "step": 3912 }, { "epoch": 1.082750985954473, "grad_norm": 0.1671629250049591, "learning_rate": 1e-05, "loss": 0.5173, "step": 3913 }, { "epoch": 1.0830277451048225, "grad_norm": 0.1808978021144867, "learning_rate": 1e-05, "loss": 0.4993, "step": 3914 }, { "epoch": 1.0833045042551719, "grad_norm": 0.1656077653169632, "learning_rate": 1e-05, "loss": 0.5091, "step": 3915 }, { "epoch": 1.0835812634055213, "grad_norm": 0.19991473853588104, "learning_rate": 1e-05, "loss": 0.4966, "step": 3916 }, { "epoch": 1.0838580225558707, "grad_norm": 0.1711592674255371, "learning_rate": 1e-05, "loss": 0.5102, "step": 3917 }, { "epoch": 1.0841347817062201, "grad_norm": 0.17350587248802185, "learning_rate": 1e-05, "loss": 0.5525, "step": 3918 }, { "epoch": 1.0844115408565695, "grad_norm": 0.16915859282016754, "learning_rate": 1e-05, "loss": 0.511, "step": 3919 }, { "epoch": 1.084688300006919, "grad_norm": 0.16965614259243011, "learning_rate": 1e-05, "loss": 0.5026, "step": 3920 }, { "epoch": 1.0849650591572684, "grad_norm": 0.1773822009563446, "learning_rate": 1e-05, "loss": 0.5296, "step": 3921 }, { "epoch": 1.0852418183076178, "grad_norm": 0.17231984436511993, "learning_rate": 1e-05, "loss": 0.5389, "step": 3922 }, { "epoch": 1.0855185774579672, "grad_norm": 0.17350323498249054, "learning_rate": 1e-05, "loss": 0.5112, "step": 3923 }, { "epoch": 1.0857953366083166, "grad_norm": 0.16813410818576813, "learning_rate": 1e-05, "loss": 0.5233, "step": 3924 }, { "epoch": 1.086072095758666, "grad_norm": 0.16600972414016724, "learning_rate": 1e-05, "loss": 0.4821, "step": 3925 }, { "epoch": 1.0863488549090154, "grad_norm": 0.17881911993026733, "learning_rate": 1e-05, "loss": 0.5218, "step": 3926 }, { "epoch": 1.0866256140593649, "grad_norm": 0.17213506996631622, "learning_rate": 1e-05, "loss": 0.5249, "step": 3927 }, { "epoch": 1.0869023732097143, "grad_norm": 0.17517268657684326, "learning_rate": 1e-05, "loss": 0.5238, "step": 3928 }, { "epoch": 1.0871791323600637, "grad_norm": 0.17014354467391968, "learning_rate": 1e-05, "loss": 0.5009, "step": 3929 }, { "epoch": 1.087455891510413, "grad_norm": 0.1642422378063202, "learning_rate": 1e-05, "loss": 0.5371, "step": 3930 }, { "epoch": 1.0877326506607625, "grad_norm": 0.16987884044647217, "learning_rate": 1e-05, "loss": 0.5075, "step": 3931 }, { "epoch": 1.088009409811112, "grad_norm": 0.16618363559246063, "learning_rate": 1e-05, "loss": 0.5256, "step": 3932 }, { "epoch": 1.0882861689614614, "grad_norm": 0.17043350636959076, "learning_rate": 1e-05, "loss": 0.5322, "step": 3933 }, { "epoch": 1.0885629281118108, "grad_norm": 0.1719798594713211, "learning_rate": 1e-05, "loss": 0.5133, "step": 3934 }, { "epoch": 1.0888396872621602, "grad_norm": 0.16775013506412506, "learning_rate": 1e-05, "loss": 0.5265, "step": 3935 }, { "epoch": 1.0891164464125096, "grad_norm": 0.16787582635879517, "learning_rate": 1e-05, "loss": 0.5317, "step": 3936 }, { "epoch": 1.089393205562859, "grad_norm": 0.1661364883184433, "learning_rate": 1e-05, "loss": 0.5213, "step": 3937 }, { "epoch": 1.0896699647132084, "grad_norm": 0.17601051926612854, "learning_rate": 1e-05, "loss": 0.5054, "step": 3938 }, { "epoch": 1.0899467238635578, "grad_norm": 0.16672064363956451, "learning_rate": 1e-05, "loss": 0.5039, "step": 3939 }, { "epoch": 1.090223483013907, "grad_norm": 0.17488856613636017, "learning_rate": 1e-05, "loss": 0.5174, "step": 3940 }, { "epoch": 1.0905002421642567, "grad_norm": 0.16573038697242737, "learning_rate": 1e-05, "loss": 0.4961, "step": 3941 }, { "epoch": 1.0907770013146059, "grad_norm": 0.17318788170814514, "learning_rate": 1e-05, "loss": 0.5061, "step": 3942 }, { "epoch": 1.0910537604649553, "grad_norm": 0.16685999929904938, "learning_rate": 1e-05, "loss": 0.5049, "step": 3943 }, { "epoch": 1.0913305196153047, "grad_norm": 0.16451004147529602, "learning_rate": 1e-05, "loss": 0.5183, "step": 3944 }, { "epoch": 1.091607278765654, "grad_norm": 0.17699562013149261, "learning_rate": 1e-05, "loss": 0.5458, "step": 3945 }, { "epoch": 1.0918840379160035, "grad_norm": 0.18287375569343567, "learning_rate": 1e-05, "loss": 0.5212, "step": 3946 }, { "epoch": 1.092160797066353, "grad_norm": 0.1626761257648468, "learning_rate": 1e-05, "loss": 0.511, "step": 3947 }, { "epoch": 1.0924375562167024, "grad_norm": 0.1765318661928177, "learning_rate": 1e-05, "loss": 0.5216, "step": 3948 }, { "epoch": 1.0927143153670518, "grad_norm": 0.18391066789627075, "learning_rate": 1e-05, "loss": 0.5112, "step": 3949 }, { "epoch": 1.0929910745174012, "grad_norm": 0.16589893400669098, "learning_rate": 1e-05, "loss": 0.5126, "step": 3950 }, { "epoch": 1.0932678336677506, "grad_norm": 0.17393876612186432, "learning_rate": 1e-05, "loss": 0.5132, "step": 3951 }, { "epoch": 1.0935445928181, "grad_norm": 0.16833285987377167, "learning_rate": 1e-05, "loss": 0.525, "step": 3952 }, { "epoch": 1.0938213519684494, "grad_norm": 0.18525747954845428, "learning_rate": 1e-05, "loss": 0.5022, "step": 3953 }, { "epoch": 1.0940981111187988, "grad_norm": 0.17460638284683228, "learning_rate": 1e-05, "loss": 0.5187, "step": 3954 }, { "epoch": 1.0943748702691483, "grad_norm": 0.17335021495819092, "learning_rate": 1e-05, "loss": 0.5032, "step": 3955 }, { "epoch": 1.0946516294194977, "grad_norm": 0.1694558709859848, "learning_rate": 1e-05, "loss": 0.5137, "step": 3956 }, { "epoch": 1.094928388569847, "grad_norm": 0.18453726172447205, "learning_rate": 1e-05, "loss": 0.5223, "step": 3957 }, { "epoch": 1.0952051477201965, "grad_norm": 0.1743597537279129, "learning_rate": 1e-05, "loss": 0.4952, "step": 3958 }, { "epoch": 1.095481906870546, "grad_norm": 0.1748226284980774, "learning_rate": 1e-05, "loss": 0.5271, "step": 3959 }, { "epoch": 1.0957586660208953, "grad_norm": 0.16875241696834564, "learning_rate": 1e-05, "loss": 0.5555, "step": 3960 }, { "epoch": 1.0960354251712447, "grad_norm": 0.169444277882576, "learning_rate": 1e-05, "loss": 0.5227, "step": 3961 }, { "epoch": 1.0963121843215942, "grad_norm": 0.17057859897613525, "learning_rate": 1e-05, "loss": 0.5316, "step": 3962 }, { "epoch": 1.0965889434719436, "grad_norm": 0.17908594012260437, "learning_rate": 1e-05, "loss": 0.5088, "step": 3963 }, { "epoch": 1.096865702622293, "grad_norm": 0.1752442717552185, "learning_rate": 1e-05, "loss": 0.5103, "step": 3964 }, { "epoch": 1.0971424617726424, "grad_norm": 0.16954727470874786, "learning_rate": 1e-05, "loss": 0.5299, "step": 3965 }, { "epoch": 1.0974192209229918, "grad_norm": 0.16262339055538177, "learning_rate": 1e-05, "loss": 0.4994, "step": 3966 }, { "epoch": 1.0976959800733412, "grad_norm": 0.17183803021907806, "learning_rate": 1e-05, "loss": 0.4916, "step": 3967 }, { "epoch": 1.0979727392236907, "grad_norm": 0.1737654060125351, "learning_rate": 1e-05, "loss": 0.5168, "step": 3968 }, { "epoch": 1.09824949837404, "grad_norm": 0.16419273614883423, "learning_rate": 1e-05, "loss": 0.495, "step": 3969 }, { "epoch": 1.0985262575243895, "grad_norm": 0.17522796988487244, "learning_rate": 1e-05, "loss": 0.5179, "step": 3970 }, { "epoch": 1.098803016674739, "grad_norm": 0.16596123576164246, "learning_rate": 1e-05, "loss": 0.5109, "step": 3971 }, { "epoch": 1.0990797758250883, "grad_norm": 0.16427922248840332, "learning_rate": 1e-05, "loss": 0.4956, "step": 3972 }, { "epoch": 1.0993565349754377, "grad_norm": 0.15961799025535583, "learning_rate": 1e-05, "loss": 0.528, "step": 3973 }, { "epoch": 1.0996332941257871, "grad_norm": 0.17489512264728546, "learning_rate": 1e-05, "loss": 0.5285, "step": 3974 }, { "epoch": 1.0999100532761363, "grad_norm": 0.17103201150894165, "learning_rate": 1e-05, "loss": 0.5215, "step": 3975 }, { "epoch": 1.1001868124264857, "grad_norm": 0.1730172485113144, "learning_rate": 1e-05, "loss": 0.5328, "step": 3976 }, { "epoch": 1.1004635715768352, "grad_norm": 0.17379075288772583, "learning_rate": 1e-05, "loss": 0.5641, "step": 3977 }, { "epoch": 1.1007403307271846, "grad_norm": 0.18123199045658112, "learning_rate": 1e-05, "loss": 0.507, "step": 3978 }, { "epoch": 1.101017089877534, "grad_norm": 0.17384381592273712, "learning_rate": 1e-05, "loss": 0.5112, "step": 3979 }, { "epoch": 1.1012938490278834, "grad_norm": 0.17244021594524384, "learning_rate": 1e-05, "loss": 0.5189, "step": 3980 }, { "epoch": 1.1015706081782328, "grad_norm": 0.16354502737522125, "learning_rate": 1e-05, "loss": 0.526, "step": 3981 }, { "epoch": 1.1018473673285822, "grad_norm": 0.17030122876167297, "learning_rate": 1e-05, "loss": 0.5192, "step": 3982 }, { "epoch": 1.1021241264789317, "grad_norm": 0.17471885681152344, "learning_rate": 1e-05, "loss": 0.5305, "step": 3983 }, { "epoch": 1.102400885629281, "grad_norm": 0.16745564341545105, "learning_rate": 1e-05, "loss": 0.5236, "step": 3984 }, { "epoch": 1.1026776447796305, "grad_norm": 0.17791979014873505, "learning_rate": 1e-05, "loss": 0.5078, "step": 3985 }, { "epoch": 1.10295440392998, "grad_norm": 0.16643719375133514, "learning_rate": 1e-05, "loss": 0.5055, "step": 3986 }, { "epoch": 1.1032311630803293, "grad_norm": 0.17678645253181458, "learning_rate": 1e-05, "loss": 0.4929, "step": 3987 }, { "epoch": 1.1035079222306787, "grad_norm": 0.17692217230796814, "learning_rate": 1e-05, "loss": 0.5343, "step": 3988 }, { "epoch": 1.1037846813810281, "grad_norm": 0.17783735692501068, "learning_rate": 1e-05, "loss": 0.545, "step": 3989 }, { "epoch": 1.1040614405313776, "grad_norm": 0.17212824523448944, "learning_rate": 1e-05, "loss": 0.5005, "step": 3990 }, { "epoch": 1.104338199681727, "grad_norm": 0.18189306557178497, "learning_rate": 1e-05, "loss": 0.5163, "step": 3991 }, { "epoch": 1.1046149588320764, "grad_norm": 0.17160017788410187, "learning_rate": 1e-05, "loss": 0.5085, "step": 3992 }, { "epoch": 1.1048917179824258, "grad_norm": 0.16803663969039917, "learning_rate": 1e-05, "loss": 0.5474, "step": 3993 }, { "epoch": 1.1051684771327752, "grad_norm": 0.1681232452392578, "learning_rate": 1e-05, "loss": 0.5327, "step": 3994 }, { "epoch": 1.1054452362831246, "grad_norm": 0.17842383682727814, "learning_rate": 1e-05, "loss": 0.5183, "step": 3995 }, { "epoch": 1.105721995433474, "grad_norm": 0.17265205085277557, "learning_rate": 1e-05, "loss": 0.5011, "step": 3996 }, { "epoch": 1.1059987545838235, "grad_norm": 0.17226260900497437, "learning_rate": 1e-05, "loss": 0.5176, "step": 3997 }, { "epoch": 1.1062755137341729, "grad_norm": 0.1790546327829361, "learning_rate": 1e-05, "loss": 0.5072, "step": 3998 }, { "epoch": 1.1065522728845223, "grad_norm": 0.17239093780517578, "learning_rate": 1e-05, "loss": 0.5404, "step": 3999 }, { "epoch": 1.1068290320348717, "grad_norm": 0.1735827475786209, "learning_rate": 1e-05, "loss": 0.5193, "step": 4000 }, { "epoch": 1.1071057911852211, "grad_norm": 0.16418901085853577, "learning_rate": 1e-05, "loss": 0.5259, "step": 4001 }, { "epoch": 1.1073825503355705, "grad_norm": 0.17394320666790009, "learning_rate": 1e-05, "loss": 0.5478, "step": 4002 }, { "epoch": 1.10765930948592, "grad_norm": 0.17640212178230286, "learning_rate": 1e-05, "loss": 0.4964, "step": 4003 }, { "epoch": 1.1079360686362694, "grad_norm": 0.17123377323150635, "learning_rate": 1e-05, "loss": 0.5403, "step": 4004 }, { "epoch": 1.1082128277866188, "grad_norm": 0.1744101196527481, "learning_rate": 1e-05, "loss": 0.5049, "step": 4005 }, { "epoch": 1.1084895869369682, "grad_norm": 0.1706363707780838, "learning_rate": 1e-05, "loss": 0.4847, "step": 4006 }, { "epoch": 1.1087663460873176, "grad_norm": 0.17349255084991455, "learning_rate": 1e-05, "loss": 0.4956, "step": 4007 }, { "epoch": 1.1090431052376668, "grad_norm": 0.17746607959270477, "learning_rate": 1e-05, "loss": 0.5029, "step": 4008 }, { "epoch": 1.1093198643880164, "grad_norm": 0.16963939368724823, "learning_rate": 1e-05, "loss": 0.4881, "step": 4009 }, { "epoch": 1.1095966235383656, "grad_norm": 0.16791820526123047, "learning_rate": 1e-05, "loss": 0.5366, "step": 4010 }, { "epoch": 1.109873382688715, "grad_norm": 0.17388026416301727, "learning_rate": 1e-05, "loss": 0.5167, "step": 4011 }, { "epoch": 1.1101501418390645, "grad_norm": 0.16983060538768768, "learning_rate": 1e-05, "loss": 0.5247, "step": 4012 }, { "epoch": 1.1104269009894139, "grad_norm": 0.18046225607395172, "learning_rate": 1e-05, "loss": 0.558, "step": 4013 }, { "epoch": 1.1107036601397633, "grad_norm": 0.18531522154808044, "learning_rate": 1e-05, "loss": 0.53, "step": 4014 }, { "epoch": 1.1109804192901127, "grad_norm": 0.16721253097057343, "learning_rate": 1e-05, "loss": 0.5241, "step": 4015 }, { "epoch": 1.1112571784404621, "grad_norm": 0.17279212176799774, "learning_rate": 1e-05, "loss": 0.5177, "step": 4016 }, { "epoch": 1.1115339375908115, "grad_norm": 0.16450102627277374, "learning_rate": 1e-05, "loss": 0.4975, "step": 4017 }, { "epoch": 1.111810696741161, "grad_norm": 0.17751450836658478, "learning_rate": 1e-05, "loss": 0.5354, "step": 4018 }, { "epoch": 1.1120874558915104, "grad_norm": 0.1709497720003128, "learning_rate": 1e-05, "loss": 0.4969, "step": 4019 }, { "epoch": 1.1123642150418598, "grad_norm": 0.17258799076080322, "learning_rate": 1e-05, "loss": 0.5267, "step": 4020 }, { "epoch": 1.1126409741922092, "grad_norm": 0.18473103642463684, "learning_rate": 1e-05, "loss": 0.518, "step": 4021 }, { "epoch": 1.1129177333425586, "grad_norm": 0.17434953153133392, "learning_rate": 1e-05, "loss": 0.4983, "step": 4022 }, { "epoch": 1.113194492492908, "grad_norm": 0.18928059935569763, "learning_rate": 1e-05, "loss": 0.5341, "step": 4023 }, { "epoch": 1.1134712516432574, "grad_norm": 0.17463476955890656, "learning_rate": 1e-05, "loss": 0.5428, "step": 4024 }, { "epoch": 1.1137480107936069, "grad_norm": 0.18120542168617249, "learning_rate": 1e-05, "loss": 0.515, "step": 4025 }, { "epoch": 1.1140247699439563, "grad_norm": 0.17350751161575317, "learning_rate": 1e-05, "loss": 0.5103, "step": 4026 }, { "epoch": 1.1143015290943057, "grad_norm": 0.1701623946428299, "learning_rate": 1e-05, "loss": 0.496, "step": 4027 }, { "epoch": 1.114578288244655, "grad_norm": 0.15835218131542206, "learning_rate": 1e-05, "loss": 0.4842, "step": 4028 }, { "epoch": 1.1148550473950045, "grad_norm": 0.17277675867080688, "learning_rate": 1e-05, "loss": 0.516, "step": 4029 }, { "epoch": 1.115131806545354, "grad_norm": 0.1716863512992859, "learning_rate": 1e-05, "loss": 0.4967, "step": 4030 }, { "epoch": 1.1154085656957033, "grad_norm": 0.16785162687301636, "learning_rate": 1e-05, "loss": 0.5399, "step": 4031 }, { "epoch": 1.1156853248460528, "grad_norm": 0.16819527745246887, "learning_rate": 1e-05, "loss": 0.5222, "step": 4032 }, { "epoch": 1.1159620839964022, "grad_norm": 0.1636781394481659, "learning_rate": 1e-05, "loss": 0.5008, "step": 4033 }, { "epoch": 1.1162388431467516, "grad_norm": 0.16271738708019257, "learning_rate": 1e-05, "loss": 0.5175, "step": 4034 }, { "epoch": 1.116515602297101, "grad_norm": 0.18402265012264252, "learning_rate": 1e-05, "loss": 0.5666, "step": 4035 }, { "epoch": 1.1167923614474504, "grad_norm": 0.17271728813648224, "learning_rate": 1e-05, "loss": 0.5253, "step": 4036 }, { "epoch": 1.1170691205977998, "grad_norm": 0.16609670221805573, "learning_rate": 1e-05, "loss": 0.5139, "step": 4037 }, { "epoch": 1.1173458797481493, "grad_norm": 0.17967364192008972, "learning_rate": 1e-05, "loss": 0.5506, "step": 4038 }, { "epoch": 1.1176226388984987, "grad_norm": 0.18081800639629364, "learning_rate": 1e-05, "loss": 0.5188, "step": 4039 }, { "epoch": 1.117899398048848, "grad_norm": 0.17375054955482483, "learning_rate": 1e-05, "loss": 0.4861, "step": 4040 }, { "epoch": 1.1181761571991975, "grad_norm": 0.17584523558616638, "learning_rate": 1e-05, "loss": 0.5332, "step": 4041 }, { "epoch": 1.118452916349547, "grad_norm": 0.1758279949426651, "learning_rate": 1e-05, "loss": 0.5631, "step": 4042 }, { "epoch": 1.118729675499896, "grad_norm": 0.17792776226997375, "learning_rate": 1e-05, "loss": 0.4929, "step": 4043 }, { "epoch": 1.1190064346502457, "grad_norm": 0.18537555634975433, "learning_rate": 1e-05, "loss": 0.5136, "step": 4044 }, { "epoch": 1.119283193800595, "grad_norm": 0.1724126935005188, "learning_rate": 1e-05, "loss": 0.5013, "step": 4045 }, { "epoch": 1.1195599529509443, "grad_norm": 0.16953310370445251, "learning_rate": 1e-05, "loss": 0.5155, "step": 4046 }, { "epoch": 1.1198367121012938, "grad_norm": 0.17226636409759521, "learning_rate": 1e-05, "loss": 0.5033, "step": 4047 }, { "epoch": 1.1201134712516432, "grad_norm": 0.17802292108535767, "learning_rate": 1e-05, "loss": 0.5613, "step": 4048 }, { "epoch": 1.1203902304019926, "grad_norm": 0.16872070729732513, "learning_rate": 1e-05, "loss": 0.4961, "step": 4049 }, { "epoch": 1.120666989552342, "grad_norm": 0.1729227751493454, "learning_rate": 1e-05, "loss": 0.5309, "step": 4050 }, { "epoch": 1.1209437487026914, "grad_norm": 0.16954649984836578, "learning_rate": 1e-05, "loss": 0.5132, "step": 4051 }, { "epoch": 1.1212205078530408, "grad_norm": 0.17199578881263733, "learning_rate": 1e-05, "loss": 0.5056, "step": 4052 }, { "epoch": 1.1214972670033903, "grad_norm": 0.17372283339500427, "learning_rate": 1e-05, "loss": 0.5332, "step": 4053 }, { "epoch": 1.1217740261537397, "grad_norm": 0.1806822568178177, "learning_rate": 1e-05, "loss": 0.5073, "step": 4054 }, { "epoch": 1.122050785304089, "grad_norm": 0.18208271265029907, "learning_rate": 1e-05, "loss": 0.5184, "step": 4055 }, { "epoch": 1.1223275444544385, "grad_norm": 0.17358702421188354, "learning_rate": 1e-05, "loss": 0.5235, "step": 4056 }, { "epoch": 1.122604303604788, "grad_norm": 0.1699797362089157, "learning_rate": 1e-05, "loss": 0.5142, "step": 4057 }, { "epoch": 1.1228810627551373, "grad_norm": 0.17954248189926147, "learning_rate": 1e-05, "loss": 0.5205, "step": 4058 }, { "epoch": 1.1231578219054867, "grad_norm": 0.17476852238178253, "learning_rate": 1e-05, "loss": 0.5366, "step": 4059 }, { "epoch": 1.1234345810558362, "grad_norm": 0.1713910847902298, "learning_rate": 1e-05, "loss": 0.5137, "step": 4060 }, { "epoch": 1.1237113402061856, "grad_norm": 0.17098991572856903, "learning_rate": 1e-05, "loss": 0.5153, "step": 4061 }, { "epoch": 1.123988099356535, "grad_norm": 0.17058713734149933, "learning_rate": 1e-05, "loss": 0.5021, "step": 4062 }, { "epoch": 1.1242648585068844, "grad_norm": 0.18189173936843872, "learning_rate": 1e-05, "loss": 0.4994, "step": 4063 }, { "epoch": 1.1245416176572338, "grad_norm": 0.1766139566898346, "learning_rate": 1e-05, "loss": 0.4946, "step": 4064 }, { "epoch": 1.1248183768075832, "grad_norm": 0.16736042499542236, "learning_rate": 1e-05, "loss": 0.4972, "step": 4065 }, { "epoch": 1.1250951359579326, "grad_norm": 0.16484786570072174, "learning_rate": 1e-05, "loss": 0.4953, "step": 4066 }, { "epoch": 1.125371895108282, "grad_norm": 0.17480988800525665, "learning_rate": 1e-05, "loss": 0.5246, "step": 4067 }, { "epoch": 1.1256486542586315, "grad_norm": 0.17369483411312103, "learning_rate": 1e-05, "loss": 0.5178, "step": 4068 }, { "epoch": 1.125925413408981, "grad_norm": 0.18393592536449432, "learning_rate": 1e-05, "loss": 0.5271, "step": 4069 }, { "epoch": 1.1262021725593303, "grad_norm": 0.18002063035964966, "learning_rate": 1e-05, "loss": 0.5283, "step": 4070 }, { "epoch": 1.1264789317096797, "grad_norm": 0.17061714828014374, "learning_rate": 1e-05, "loss": 0.5014, "step": 4071 }, { "epoch": 1.1267556908600291, "grad_norm": 0.17702075839042664, "learning_rate": 1e-05, "loss": 0.5395, "step": 4072 }, { "epoch": 1.1270324500103786, "grad_norm": 0.17604058980941772, "learning_rate": 1e-05, "loss": 0.5521, "step": 4073 }, { "epoch": 1.127309209160728, "grad_norm": 0.18024879693984985, "learning_rate": 1e-05, "loss": 0.5069, "step": 4074 }, { "epoch": 1.1275859683110774, "grad_norm": 0.1744735836982727, "learning_rate": 1e-05, "loss": 0.5463, "step": 4075 }, { "epoch": 1.1278627274614266, "grad_norm": 0.18208055198192596, "learning_rate": 1e-05, "loss": 0.5367, "step": 4076 }, { "epoch": 1.1281394866117762, "grad_norm": 0.17792052030563354, "learning_rate": 1e-05, "loss": 0.5044, "step": 4077 }, { "epoch": 1.1284162457621254, "grad_norm": 0.17578701674938202, "learning_rate": 1e-05, "loss": 0.5264, "step": 4078 }, { "epoch": 1.128693004912475, "grad_norm": 0.16761551797389984, "learning_rate": 1e-05, "loss": 0.5299, "step": 4079 }, { "epoch": 1.1289697640628242, "grad_norm": 0.16861550509929657, "learning_rate": 1e-05, "loss": 0.5305, "step": 4080 }, { "epoch": 1.1292465232131736, "grad_norm": 0.16835778951644897, "learning_rate": 1e-05, "loss": 0.5185, "step": 4081 }, { "epoch": 1.129523282363523, "grad_norm": 0.181551992893219, "learning_rate": 1e-05, "loss": 0.5071, "step": 4082 }, { "epoch": 1.1298000415138725, "grad_norm": 0.16861292719841003, "learning_rate": 1e-05, "loss": 0.4943, "step": 4083 }, { "epoch": 1.130076800664222, "grad_norm": 0.16593408584594727, "learning_rate": 1e-05, "loss": 0.5375, "step": 4084 }, { "epoch": 1.1303535598145713, "grad_norm": 0.17733436822891235, "learning_rate": 1e-05, "loss": 0.5285, "step": 4085 }, { "epoch": 1.1306303189649207, "grad_norm": 0.1733834445476532, "learning_rate": 1e-05, "loss": 0.5338, "step": 4086 }, { "epoch": 1.1309070781152701, "grad_norm": 0.16758568584918976, "learning_rate": 1e-05, "loss": 0.5455, "step": 4087 }, { "epoch": 1.1311838372656196, "grad_norm": 0.17362384498119354, "learning_rate": 1e-05, "loss": 0.5237, "step": 4088 }, { "epoch": 1.131460596415969, "grad_norm": 0.16955198347568512, "learning_rate": 1e-05, "loss": 0.5097, "step": 4089 }, { "epoch": 1.1317373555663184, "grad_norm": 0.16508308053016663, "learning_rate": 1e-05, "loss": 0.5182, "step": 4090 }, { "epoch": 1.1320141147166678, "grad_norm": 0.1667427122592926, "learning_rate": 1e-05, "loss": 0.5279, "step": 4091 }, { "epoch": 1.1322908738670172, "grad_norm": 0.16866427659988403, "learning_rate": 1e-05, "loss": 0.4817, "step": 4092 }, { "epoch": 1.1325676330173666, "grad_norm": 0.1654849797487259, "learning_rate": 1e-05, "loss": 0.5338, "step": 4093 }, { "epoch": 1.132844392167716, "grad_norm": 0.17869599163532257, "learning_rate": 1e-05, "loss": 0.554, "step": 4094 }, { "epoch": 1.1331211513180655, "grad_norm": 0.1786649525165558, "learning_rate": 1e-05, "loss": 0.4916, "step": 4095 }, { "epoch": 1.1333979104684149, "grad_norm": 0.16092990338802338, "learning_rate": 1e-05, "loss": 0.4725, "step": 4096 }, { "epoch": 1.1336746696187643, "grad_norm": 0.18083089590072632, "learning_rate": 1e-05, "loss": 0.5214, "step": 4097 }, { "epoch": 1.1339514287691137, "grad_norm": 0.17164017260074615, "learning_rate": 1e-05, "loss": 0.5383, "step": 4098 }, { "epoch": 1.1342281879194631, "grad_norm": 0.17427849769592285, "learning_rate": 1e-05, "loss": 0.491, "step": 4099 }, { "epoch": 1.1345049470698125, "grad_norm": 0.17062413692474365, "learning_rate": 1e-05, "loss": 0.5057, "step": 4100 }, { "epoch": 1.134781706220162, "grad_norm": 0.17844507098197937, "learning_rate": 1e-05, "loss": 0.5268, "step": 4101 }, { "epoch": 1.1350584653705114, "grad_norm": 0.1741037219762802, "learning_rate": 1e-05, "loss": 0.5044, "step": 4102 }, { "epoch": 1.1353352245208608, "grad_norm": 0.16470949351787567, "learning_rate": 1e-05, "loss": 0.4865, "step": 4103 }, { "epoch": 1.1356119836712102, "grad_norm": 0.1721188724040985, "learning_rate": 1e-05, "loss": 0.5214, "step": 4104 }, { "epoch": 1.1358887428215596, "grad_norm": 0.17884492874145508, "learning_rate": 1e-05, "loss": 0.486, "step": 4105 }, { "epoch": 1.136165501971909, "grad_norm": 0.17891058325767517, "learning_rate": 1e-05, "loss": 0.5169, "step": 4106 }, { "epoch": 1.1364422611222584, "grad_norm": 0.17120586335659027, "learning_rate": 1e-05, "loss": 0.5022, "step": 4107 }, { "epoch": 1.1367190202726078, "grad_norm": 0.16023525595664978, "learning_rate": 1e-05, "loss": 0.5085, "step": 4108 }, { "epoch": 1.1369957794229573, "grad_norm": 0.1740776002407074, "learning_rate": 1e-05, "loss": 0.4995, "step": 4109 }, { "epoch": 1.1372725385733067, "grad_norm": 0.16774702072143555, "learning_rate": 1e-05, "loss": 0.4968, "step": 4110 }, { "epoch": 1.1375492977236559, "grad_norm": 0.1728389412164688, "learning_rate": 1e-05, "loss": 0.5185, "step": 4111 }, { "epoch": 1.1378260568740055, "grad_norm": 0.1713773012161255, "learning_rate": 1e-05, "loss": 0.5169, "step": 4112 }, { "epoch": 1.1381028160243547, "grad_norm": 0.1713906228542328, "learning_rate": 1e-05, "loss": 0.5336, "step": 4113 }, { "epoch": 1.1383795751747041, "grad_norm": 0.17600221931934357, "learning_rate": 1e-05, "loss": 0.5169, "step": 4114 }, { "epoch": 1.1386563343250535, "grad_norm": 0.16886916756629944, "learning_rate": 1e-05, "loss": 0.5295, "step": 4115 }, { "epoch": 1.138933093475403, "grad_norm": 0.1706850379705429, "learning_rate": 1e-05, "loss": 0.535, "step": 4116 }, { "epoch": 1.1392098526257524, "grad_norm": 0.17344388365745544, "learning_rate": 1e-05, "loss": 0.5296, "step": 4117 }, { "epoch": 1.1394866117761018, "grad_norm": 0.1700541079044342, "learning_rate": 1e-05, "loss": 0.5043, "step": 4118 }, { "epoch": 1.1397633709264512, "grad_norm": 0.16312798857688904, "learning_rate": 1e-05, "loss": 0.5184, "step": 4119 }, { "epoch": 1.1400401300768006, "grad_norm": 0.16966953873634338, "learning_rate": 1e-05, "loss": 0.4978, "step": 4120 }, { "epoch": 1.14031688922715, "grad_norm": 0.17619198560714722, "learning_rate": 1e-05, "loss": 0.5421, "step": 4121 }, { "epoch": 1.1405936483774994, "grad_norm": 0.17313168942928314, "learning_rate": 1e-05, "loss": 0.5299, "step": 4122 }, { "epoch": 1.1408704075278489, "grad_norm": 0.16432714462280273, "learning_rate": 1e-05, "loss": 0.5343, "step": 4123 }, { "epoch": 1.1411471666781983, "grad_norm": 0.16582728922367096, "learning_rate": 1e-05, "loss": 0.5322, "step": 4124 }, { "epoch": 1.1414239258285477, "grad_norm": 0.17435325682163239, "learning_rate": 1e-05, "loss": 0.5189, "step": 4125 }, { "epoch": 1.141700684978897, "grad_norm": 0.17715565860271454, "learning_rate": 1e-05, "loss": 0.5244, "step": 4126 }, { "epoch": 1.1419774441292465, "grad_norm": 0.17424854636192322, "learning_rate": 1e-05, "loss": 0.5057, "step": 4127 }, { "epoch": 1.142254203279596, "grad_norm": 0.18508116900920868, "learning_rate": 1e-05, "loss": 0.522, "step": 4128 }, { "epoch": 1.1425309624299453, "grad_norm": 0.17523756623268127, "learning_rate": 1e-05, "loss": 0.5081, "step": 4129 }, { "epoch": 1.1428077215802948, "grad_norm": 0.16558365523815155, "learning_rate": 1e-05, "loss": 0.539, "step": 4130 }, { "epoch": 1.1430844807306442, "grad_norm": 0.17034827172756195, "learning_rate": 1e-05, "loss": 0.497, "step": 4131 }, { "epoch": 1.1433612398809936, "grad_norm": 0.174336239695549, "learning_rate": 1e-05, "loss": 0.5153, "step": 4132 }, { "epoch": 1.143637999031343, "grad_norm": 0.17542728781700134, "learning_rate": 1e-05, "loss": 0.5209, "step": 4133 }, { "epoch": 1.1439147581816924, "grad_norm": 0.16739331185817719, "learning_rate": 1e-05, "loss": 0.4673, "step": 4134 }, { "epoch": 1.1441915173320418, "grad_norm": 0.1723574548959732, "learning_rate": 1e-05, "loss": 0.5239, "step": 4135 }, { "epoch": 1.1444682764823912, "grad_norm": 0.17359893023967743, "learning_rate": 1e-05, "loss": 0.5139, "step": 4136 }, { "epoch": 1.1447450356327407, "grad_norm": 0.1756354570388794, "learning_rate": 1e-05, "loss": 0.5263, "step": 4137 }, { "epoch": 1.14502179478309, "grad_norm": 0.17271068692207336, "learning_rate": 1e-05, "loss": 0.5121, "step": 4138 }, { "epoch": 1.1452985539334395, "grad_norm": 0.1700959950685501, "learning_rate": 1e-05, "loss": 0.5029, "step": 4139 }, { "epoch": 1.145575313083789, "grad_norm": 0.1604171246290207, "learning_rate": 1e-05, "loss": 0.4917, "step": 4140 }, { "epoch": 1.1458520722341383, "grad_norm": 0.181277334690094, "learning_rate": 1e-05, "loss": 0.5241, "step": 4141 }, { "epoch": 1.1461288313844877, "grad_norm": 0.16966894268989563, "learning_rate": 1e-05, "loss": 0.5109, "step": 4142 }, { "epoch": 1.1464055905348371, "grad_norm": 0.1640961617231369, "learning_rate": 1e-05, "loss": 0.5096, "step": 4143 }, { "epoch": 1.1466823496851863, "grad_norm": 0.16520541906356812, "learning_rate": 1e-05, "loss": 0.5156, "step": 4144 }, { "epoch": 1.146959108835536, "grad_norm": 0.17094843089580536, "learning_rate": 1e-05, "loss": 0.5375, "step": 4145 }, { "epoch": 1.1472358679858852, "grad_norm": 0.17395305633544922, "learning_rate": 1e-05, "loss": 0.5089, "step": 4146 }, { "epoch": 1.1475126271362348, "grad_norm": 0.16533471643924713, "learning_rate": 1e-05, "loss": 0.5066, "step": 4147 }, { "epoch": 1.147789386286584, "grad_norm": 0.16924361884593964, "learning_rate": 1e-05, "loss": 0.4999, "step": 4148 }, { "epoch": 1.1480661454369334, "grad_norm": 0.17058496177196503, "learning_rate": 1e-05, "loss": 0.5274, "step": 4149 }, { "epoch": 1.1483429045872828, "grad_norm": 0.17257040739059448, "learning_rate": 1e-05, "loss": 0.5366, "step": 4150 }, { "epoch": 1.1486196637376322, "grad_norm": 0.17677202820777893, "learning_rate": 1e-05, "loss": 0.545, "step": 4151 }, { "epoch": 1.1488964228879817, "grad_norm": 0.16962391138076782, "learning_rate": 1e-05, "loss": 0.5235, "step": 4152 }, { "epoch": 1.149173182038331, "grad_norm": 0.17118516564369202, "learning_rate": 1e-05, "loss": 0.5264, "step": 4153 }, { "epoch": 1.1494499411886805, "grad_norm": 0.1767408847808838, "learning_rate": 1e-05, "loss": 0.5147, "step": 4154 }, { "epoch": 1.14972670033903, "grad_norm": 0.17382875084877014, "learning_rate": 1e-05, "loss": 0.4979, "step": 4155 }, { "epoch": 1.1500034594893793, "grad_norm": 0.16385382413864136, "learning_rate": 1e-05, "loss": 0.5103, "step": 4156 }, { "epoch": 1.1502802186397287, "grad_norm": 0.1691775619983673, "learning_rate": 1e-05, "loss": 0.5331, "step": 4157 }, { "epoch": 1.1505569777900782, "grad_norm": 0.1694396436214447, "learning_rate": 1e-05, "loss": 0.5017, "step": 4158 }, { "epoch": 1.1508337369404276, "grad_norm": 0.1664024442434311, "learning_rate": 1e-05, "loss": 0.5204, "step": 4159 }, { "epoch": 1.151110496090777, "grad_norm": 0.18004994094371796, "learning_rate": 1e-05, "loss": 0.5319, "step": 4160 }, { "epoch": 1.1513872552411264, "grad_norm": 0.17982478439807892, "learning_rate": 1e-05, "loss": 0.4868, "step": 4161 }, { "epoch": 1.1516640143914758, "grad_norm": 0.17160964012145996, "learning_rate": 1e-05, "loss": 0.5081, "step": 4162 }, { "epoch": 1.1519407735418252, "grad_norm": 0.1729656606912613, "learning_rate": 1e-05, "loss": 0.5322, "step": 4163 }, { "epoch": 1.1522175326921746, "grad_norm": 0.1673382669687271, "learning_rate": 1e-05, "loss": 0.4794, "step": 4164 }, { "epoch": 1.152494291842524, "grad_norm": 0.17143993079662323, "learning_rate": 1e-05, "loss": 0.5226, "step": 4165 }, { "epoch": 1.1527710509928735, "grad_norm": 0.17746573686599731, "learning_rate": 1e-05, "loss": 0.507, "step": 4166 }, { "epoch": 1.1530478101432229, "grad_norm": 0.16558395326137543, "learning_rate": 1e-05, "loss": 0.5261, "step": 4167 }, { "epoch": 1.1533245692935723, "grad_norm": 0.17268435657024384, "learning_rate": 1e-05, "loss": 0.4856, "step": 4168 }, { "epoch": 1.1536013284439217, "grad_norm": 0.1739988476037979, "learning_rate": 1e-05, "loss": 0.511, "step": 4169 }, { "epoch": 1.1538780875942711, "grad_norm": 0.1725340634584427, "learning_rate": 1e-05, "loss": 0.5494, "step": 4170 }, { "epoch": 1.1541548467446205, "grad_norm": 0.17087383568286896, "learning_rate": 1e-05, "loss": 0.5145, "step": 4171 }, { "epoch": 1.15443160589497, "grad_norm": 0.1679050624370575, "learning_rate": 1e-05, "loss": 0.5419, "step": 4172 }, { "epoch": 1.1547083650453194, "grad_norm": 0.16805393993854523, "learning_rate": 1e-05, "loss": 0.5025, "step": 4173 }, { "epoch": 1.1549851241956688, "grad_norm": 0.15968447923660278, "learning_rate": 1e-05, "loss": 0.4989, "step": 4174 }, { "epoch": 1.1552618833460182, "grad_norm": 0.18035654723644257, "learning_rate": 1e-05, "loss": 0.4969, "step": 4175 }, { "epoch": 1.1555386424963676, "grad_norm": 0.1690439134836197, "learning_rate": 1e-05, "loss": 0.5462, "step": 4176 }, { "epoch": 1.155815401646717, "grad_norm": 0.17308031022548676, "learning_rate": 1e-05, "loss": 0.4998, "step": 4177 }, { "epoch": 1.1560921607970664, "grad_norm": 0.16997094452381134, "learning_rate": 1e-05, "loss": 0.5035, "step": 4178 }, { "epoch": 1.1563689199474156, "grad_norm": 0.1673530787229538, "learning_rate": 1e-05, "loss": 0.5209, "step": 4179 }, { "epoch": 1.1566456790977653, "grad_norm": 0.1764545440673828, "learning_rate": 1e-05, "loss": 0.5323, "step": 4180 }, { "epoch": 1.1569224382481145, "grad_norm": 0.17585675418376923, "learning_rate": 1e-05, "loss": 0.5189, "step": 4181 }, { "epoch": 1.157199197398464, "grad_norm": 0.1672927290201187, "learning_rate": 1e-05, "loss": 0.4988, "step": 4182 }, { "epoch": 1.1574759565488133, "grad_norm": 0.17203086614608765, "learning_rate": 1e-05, "loss": 0.4935, "step": 4183 }, { "epoch": 1.1577527156991627, "grad_norm": 0.16796250641345978, "learning_rate": 1e-05, "loss": 0.4951, "step": 4184 }, { "epoch": 1.1580294748495121, "grad_norm": 0.16844289004802704, "learning_rate": 1e-05, "loss": 0.4831, "step": 4185 }, { "epoch": 1.1583062339998615, "grad_norm": 0.16799256205558777, "learning_rate": 1e-05, "loss": 0.5302, "step": 4186 }, { "epoch": 1.158582993150211, "grad_norm": 0.1683177500963211, "learning_rate": 1e-05, "loss": 0.4993, "step": 4187 }, { "epoch": 1.1588597523005604, "grad_norm": 0.16656531393527985, "learning_rate": 1e-05, "loss": 0.5012, "step": 4188 }, { "epoch": 1.1591365114509098, "grad_norm": 0.16481180489063263, "learning_rate": 1e-05, "loss": 0.5336, "step": 4189 }, { "epoch": 1.1594132706012592, "grad_norm": 0.17071667313575745, "learning_rate": 1e-05, "loss": 0.4881, "step": 4190 }, { "epoch": 1.1596900297516086, "grad_norm": 0.1669653356075287, "learning_rate": 1e-05, "loss": 0.4955, "step": 4191 }, { "epoch": 1.159966788901958, "grad_norm": 0.17962978780269623, "learning_rate": 1e-05, "loss": 0.5465, "step": 4192 }, { "epoch": 1.1602435480523074, "grad_norm": 0.16869264841079712, "learning_rate": 1e-05, "loss": 0.5353, "step": 4193 }, { "epoch": 1.1605203072026569, "grad_norm": 0.1746317744255066, "learning_rate": 1e-05, "loss": 0.5159, "step": 4194 }, { "epoch": 1.1607970663530063, "grad_norm": 0.17314189672470093, "learning_rate": 1e-05, "loss": 0.5345, "step": 4195 }, { "epoch": 1.1610738255033557, "grad_norm": 0.17354509234428406, "learning_rate": 1e-05, "loss": 0.5253, "step": 4196 }, { "epoch": 1.161350584653705, "grad_norm": 0.17244990170001984, "learning_rate": 1e-05, "loss": 0.5208, "step": 4197 }, { "epoch": 1.1616273438040545, "grad_norm": 0.18155184388160706, "learning_rate": 1e-05, "loss": 0.53, "step": 4198 }, { "epoch": 1.161904102954404, "grad_norm": 0.1688210815191269, "learning_rate": 1e-05, "loss": 0.5131, "step": 4199 }, { "epoch": 1.1621808621047534, "grad_norm": 0.1660311371088028, "learning_rate": 1e-05, "loss": 0.4973, "step": 4200 }, { "epoch": 1.1624576212551028, "grad_norm": 0.17087212204933167, "learning_rate": 1e-05, "loss": 0.4875, "step": 4201 }, { "epoch": 1.1627343804054522, "grad_norm": 0.16993387043476105, "learning_rate": 1e-05, "loss": 0.5101, "step": 4202 }, { "epoch": 1.1630111395558016, "grad_norm": 0.16993796825408936, "learning_rate": 1e-05, "loss": 0.5224, "step": 4203 }, { "epoch": 1.163287898706151, "grad_norm": 0.17470669746398926, "learning_rate": 1e-05, "loss": 0.4972, "step": 4204 }, { "epoch": 1.1635646578565004, "grad_norm": 0.18211008608341217, "learning_rate": 1e-05, "loss": 0.4981, "step": 4205 }, { "epoch": 1.1638414170068498, "grad_norm": 0.1661873608827591, "learning_rate": 1e-05, "loss": 0.5087, "step": 4206 }, { "epoch": 1.1641181761571993, "grad_norm": 0.17136840522289276, "learning_rate": 1e-05, "loss": 0.5044, "step": 4207 }, { "epoch": 1.1643949353075487, "grad_norm": 0.17491783201694489, "learning_rate": 1e-05, "loss": 0.5145, "step": 4208 }, { "epoch": 1.164671694457898, "grad_norm": 0.17275342345237732, "learning_rate": 1e-05, "loss": 0.5606, "step": 4209 }, { "epoch": 1.1649484536082475, "grad_norm": 0.17597803473472595, "learning_rate": 1e-05, "loss": 0.515, "step": 4210 }, { "epoch": 1.165225212758597, "grad_norm": 0.1715412437915802, "learning_rate": 1e-05, "loss": 0.5132, "step": 4211 }, { "epoch": 1.1655019719089463, "grad_norm": 0.17125172913074493, "learning_rate": 1e-05, "loss": 0.5114, "step": 4212 }, { "epoch": 1.1657787310592957, "grad_norm": 0.17297132313251495, "learning_rate": 1e-05, "loss": 0.5219, "step": 4213 }, { "epoch": 1.166055490209645, "grad_norm": 0.16386865079402924, "learning_rate": 1e-05, "loss": 0.4886, "step": 4214 }, { "epoch": 1.1663322493599946, "grad_norm": 0.16651712357997894, "learning_rate": 1e-05, "loss": 0.5122, "step": 4215 }, { "epoch": 1.1666090085103438, "grad_norm": 0.16780123114585876, "learning_rate": 1e-05, "loss": 0.485, "step": 4216 }, { "epoch": 1.1668857676606932, "grad_norm": 0.17094266414642334, "learning_rate": 1e-05, "loss": 0.5407, "step": 4217 }, { "epoch": 1.1671625268110426, "grad_norm": 0.17890259623527527, "learning_rate": 1e-05, "loss": 0.519, "step": 4218 }, { "epoch": 1.167439285961392, "grad_norm": 0.17692647874355316, "learning_rate": 1e-05, "loss": 0.5036, "step": 4219 }, { "epoch": 1.1677160451117414, "grad_norm": 0.17594631016254425, "learning_rate": 1e-05, "loss": 0.5409, "step": 4220 }, { "epoch": 1.1679928042620908, "grad_norm": 0.17161163687705994, "learning_rate": 1e-05, "loss": 0.5082, "step": 4221 }, { "epoch": 1.1682695634124403, "grad_norm": 0.1738453358411789, "learning_rate": 1e-05, "loss": 0.5234, "step": 4222 }, { "epoch": 1.1685463225627897, "grad_norm": 0.1594688445329666, "learning_rate": 1e-05, "loss": 0.51, "step": 4223 }, { "epoch": 1.168823081713139, "grad_norm": 0.17259423434734344, "learning_rate": 1e-05, "loss": 0.5011, "step": 4224 }, { "epoch": 1.1690998408634885, "grad_norm": 0.17342595756053925, "learning_rate": 1e-05, "loss": 0.5019, "step": 4225 }, { "epoch": 1.169376600013838, "grad_norm": 0.17316821217536926, "learning_rate": 1e-05, "loss": 0.5165, "step": 4226 }, { "epoch": 1.1696533591641873, "grad_norm": 0.1761951893568039, "learning_rate": 1e-05, "loss": 0.5019, "step": 4227 }, { "epoch": 1.1699301183145367, "grad_norm": 0.16353365778923035, "learning_rate": 1e-05, "loss": 0.4811, "step": 4228 }, { "epoch": 1.1702068774648862, "grad_norm": 0.17390656471252441, "learning_rate": 1e-05, "loss": 0.5336, "step": 4229 }, { "epoch": 1.1704836366152356, "grad_norm": 0.17525067925453186, "learning_rate": 1e-05, "loss": 0.5197, "step": 4230 }, { "epoch": 1.170760395765585, "grad_norm": 0.17432326078414917, "learning_rate": 1e-05, "loss": 0.5081, "step": 4231 }, { "epoch": 1.1710371549159344, "grad_norm": 0.16988441348075867, "learning_rate": 1e-05, "loss": 0.5056, "step": 4232 }, { "epoch": 1.1713139140662838, "grad_norm": 0.17347674071788788, "learning_rate": 1e-05, "loss": 0.5212, "step": 4233 }, { "epoch": 1.1715906732166332, "grad_norm": 0.17142224311828613, "learning_rate": 1e-05, "loss": 0.5025, "step": 4234 }, { "epoch": 1.1718674323669827, "grad_norm": 0.17264048755168915, "learning_rate": 1e-05, "loss": 0.5214, "step": 4235 }, { "epoch": 1.172144191517332, "grad_norm": 0.17906621098518372, "learning_rate": 1e-05, "loss": 0.5326, "step": 4236 }, { "epoch": 1.1724209506676815, "grad_norm": 0.1839480996131897, "learning_rate": 1e-05, "loss": 0.509, "step": 4237 }, { "epoch": 1.172697709818031, "grad_norm": 0.1721736639738083, "learning_rate": 1e-05, "loss": 0.5229, "step": 4238 }, { "epoch": 1.1729744689683803, "grad_norm": 0.1726016253232956, "learning_rate": 1e-05, "loss": 0.5261, "step": 4239 }, { "epoch": 1.1732512281187297, "grad_norm": 0.17016121745109558, "learning_rate": 1e-05, "loss": 0.5283, "step": 4240 }, { "epoch": 1.1735279872690791, "grad_norm": 0.1723291426897049, "learning_rate": 1e-05, "loss": 0.5092, "step": 4241 }, { "epoch": 1.1738047464194286, "grad_norm": 0.16640178859233856, "learning_rate": 1e-05, "loss": 0.499, "step": 4242 }, { "epoch": 1.174081505569778, "grad_norm": 0.17119255661964417, "learning_rate": 1e-05, "loss": 0.5102, "step": 4243 }, { "epoch": 1.1743582647201274, "grad_norm": 0.16557462513446808, "learning_rate": 1e-05, "loss": 0.5213, "step": 4244 }, { "epoch": 1.1746350238704768, "grad_norm": 0.17537568509578705, "learning_rate": 1e-05, "loss": 0.5146, "step": 4245 }, { "epoch": 1.1749117830208262, "grad_norm": 0.16305966675281525, "learning_rate": 1e-05, "loss": 0.534, "step": 4246 }, { "epoch": 1.1751885421711754, "grad_norm": 0.17066259682178497, "learning_rate": 1e-05, "loss": 0.4999, "step": 4247 }, { "epoch": 1.175465301321525, "grad_norm": 0.17053043842315674, "learning_rate": 1e-05, "loss": 0.4831, "step": 4248 }, { "epoch": 1.1757420604718742, "grad_norm": 0.16949616372585297, "learning_rate": 1e-05, "loss": 0.5251, "step": 4249 }, { "epoch": 1.1760188196222239, "grad_norm": 0.1682741641998291, "learning_rate": 1e-05, "loss": 0.5389, "step": 4250 }, { "epoch": 1.176295578772573, "grad_norm": 0.16515906155109406, "learning_rate": 1e-05, "loss": 0.4943, "step": 4251 }, { "epoch": 1.1765723379229225, "grad_norm": 0.17016692459583282, "learning_rate": 1e-05, "loss": 0.5254, "step": 4252 }, { "epoch": 1.176849097073272, "grad_norm": 0.1718224138021469, "learning_rate": 1e-05, "loss": 0.4974, "step": 4253 }, { "epoch": 1.1771258562236213, "grad_norm": 0.1690206080675125, "learning_rate": 1e-05, "loss": 0.5112, "step": 4254 }, { "epoch": 1.1774026153739707, "grad_norm": 0.16231167316436768, "learning_rate": 1e-05, "loss": 0.5067, "step": 4255 }, { "epoch": 1.1776793745243201, "grad_norm": 0.17158319056034088, "learning_rate": 1e-05, "loss": 0.5203, "step": 4256 }, { "epoch": 1.1779561336746696, "grad_norm": 0.16760413348674774, "learning_rate": 1e-05, "loss": 0.5303, "step": 4257 }, { "epoch": 1.178232892825019, "grad_norm": 0.17144376039505005, "learning_rate": 1e-05, "loss": 0.5112, "step": 4258 }, { "epoch": 1.1785096519753684, "grad_norm": 0.16775117814540863, "learning_rate": 1e-05, "loss": 0.5298, "step": 4259 }, { "epoch": 1.1787864111257178, "grad_norm": 0.1745121330022812, "learning_rate": 1e-05, "loss": 0.5359, "step": 4260 }, { "epoch": 1.1790631702760672, "grad_norm": 0.17644403874874115, "learning_rate": 1e-05, "loss": 0.5051, "step": 4261 }, { "epoch": 1.1793399294264166, "grad_norm": 0.1717997044324875, "learning_rate": 1e-05, "loss": 0.5239, "step": 4262 }, { "epoch": 1.179616688576766, "grad_norm": 0.17023271322250366, "learning_rate": 1e-05, "loss": 0.5117, "step": 4263 }, { "epoch": 1.1798934477271155, "grad_norm": 0.16669492423534393, "learning_rate": 1e-05, "loss": 0.4553, "step": 4264 }, { "epoch": 1.1801702068774649, "grad_norm": 0.1679883748292923, "learning_rate": 1e-05, "loss": 0.4921, "step": 4265 }, { "epoch": 1.1804469660278143, "grad_norm": 0.17618975043296814, "learning_rate": 1e-05, "loss": 0.5121, "step": 4266 }, { "epoch": 1.1807237251781637, "grad_norm": 0.1723889857530594, "learning_rate": 1e-05, "loss": 0.4959, "step": 4267 }, { "epoch": 1.1810004843285131, "grad_norm": 0.1705620288848877, "learning_rate": 1e-05, "loss": 0.5271, "step": 4268 }, { "epoch": 1.1812772434788625, "grad_norm": 0.17401325702667236, "learning_rate": 1e-05, "loss": 0.5087, "step": 4269 }, { "epoch": 1.181554002629212, "grad_norm": 0.1816609650850296, "learning_rate": 1e-05, "loss": 0.5175, "step": 4270 }, { "epoch": 1.1818307617795614, "grad_norm": 0.16967333853244781, "learning_rate": 1e-05, "loss": 0.511, "step": 4271 }, { "epoch": 1.1821075209299108, "grad_norm": 0.17098867893218994, "learning_rate": 1e-05, "loss": 0.52, "step": 4272 }, { "epoch": 1.1823842800802602, "grad_norm": 0.1697845607995987, "learning_rate": 1e-05, "loss": 0.4917, "step": 4273 }, { "epoch": 1.1826610392306096, "grad_norm": 0.17099763453006744, "learning_rate": 1e-05, "loss": 0.5101, "step": 4274 }, { "epoch": 1.182937798380959, "grad_norm": 0.17352411150932312, "learning_rate": 1e-05, "loss": 0.5371, "step": 4275 }, { "epoch": 1.1832145575313084, "grad_norm": 0.16865667700767517, "learning_rate": 1e-05, "loss": 0.5179, "step": 4276 }, { "epoch": 1.1834913166816579, "grad_norm": 0.18431659042835236, "learning_rate": 1e-05, "loss": 0.5047, "step": 4277 }, { "epoch": 1.1837680758320073, "grad_norm": 0.1667119413614273, "learning_rate": 1e-05, "loss": 0.539, "step": 4278 }, { "epoch": 1.1840448349823567, "grad_norm": 0.1633312851190567, "learning_rate": 1e-05, "loss": 0.4905, "step": 4279 }, { "epoch": 1.184321594132706, "grad_norm": 0.16899864375591278, "learning_rate": 1e-05, "loss": 0.5087, "step": 4280 }, { "epoch": 1.1845983532830555, "grad_norm": 0.17091906070709229, "learning_rate": 1e-05, "loss": 0.5325, "step": 4281 }, { "epoch": 1.1848751124334047, "grad_norm": 0.1814160943031311, "learning_rate": 1e-05, "loss": 0.5263, "step": 4282 }, { "epoch": 1.1851518715837543, "grad_norm": 0.17779973149299622, "learning_rate": 1e-05, "loss": 0.5551, "step": 4283 }, { "epoch": 1.1854286307341035, "grad_norm": 0.16447800397872925, "learning_rate": 1e-05, "loss": 0.5305, "step": 4284 }, { "epoch": 1.1857053898844532, "grad_norm": 0.17508216202259064, "learning_rate": 1e-05, "loss": 0.5296, "step": 4285 }, { "epoch": 1.1859821490348024, "grad_norm": 0.17370671033859253, "learning_rate": 1e-05, "loss": 0.5047, "step": 4286 }, { "epoch": 1.1862589081851518, "grad_norm": 0.1701652854681015, "learning_rate": 1e-05, "loss": 0.5068, "step": 4287 }, { "epoch": 1.1865356673355012, "grad_norm": 0.16542723774909973, "learning_rate": 1e-05, "loss": 0.5287, "step": 4288 }, { "epoch": 1.1868124264858506, "grad_norm": 0.1696820706129074, "learning_rate": 1e-05, "loss": 0.5392, "step": 4289 }, { "epoch": 1.1870891856362, "grad_norm": 0.17620940506458282, "learning_rate": 1e-05, "loss": 0.4956, "step": 4290 }, { "epoch": 1.1873659447865494, "grad_norm": 0.17505691945552826, "learning_rate": 1e-05, "loss": 0.5295, "step": 4291 }, { "epoch": 1.1876427039368989, "grad_norm": 0.1700696051120758, "learning_rate": 1e-05, "loss": 0.5566, "step": 4292 }, { "epoch": 1.1879194630872483, "grad_norm": 0.17972449958324432, "learning_rate": 1e-05, "loss": 0.5308, "step": 4293 }, { "epoch": 1.1881962222375977, "grad_norm": 0.17966485023498535, "learning_rate": 1e-05, "loss": 0.5162, "step": 4294 }, { "epoch": 1.188472981387947, "grad_norm": 0.16390426456928253, "learning_rate": 1e-05, "loss": 0.5153, "step": 4295 }, { "epoch": 1.1887497405382965, "grad_norm": 0.16442452371120453, "learning_rate": 1e-05, "loss": 0.494, "step": 4296 }, { "epoch": 1.189026499688646, "grad_norm": 0.1675289422273636, "learning_rate": 1e-05, "loss": 0.5327, "step": 4297 }, { "epoch": 1.1893032588389953, "grad_norm": 0.17909836769104004, "learning_rate": 1e-05, "loss": 0.5278, "step": 4298 }, { "epoch": 1.1895800179893448, "grad_norm": 0.1766996830701828, "learning_rate": 1e-05, "loss": 0.533, "step": 4299 }, { "epoch": 1.1898567771396942, "grad_norm": 0.16934533417224884, "learning_rate": 1e-05, "loss": 0.5022, "step": 4300 }, { "epoch": 1.1901335362900436, "grad_norm": 0.16647210717201233, "learning_rate": 1e-05, "loss": 0.5239, "step": 4301 }, { "epoch": 1.190410295440393, "grad_norm": 0.17385104298591614, "learning_rate": 1e-05, "loss": 0.523, "step": 4302 }, { "epoch": 1.1906870545907424, "grad_norm": 0.163399800658226, "learning_rate": 1e-05, "loss": 0.5319, "step": 4303 }, { "epoch": 1.1909638137410918, "grad_norm": 0.1693728119134903, "learning_rate": 1e-05, "loss": 0.51, "step": 4304 }, { "epoch": 1.1912405728914413, "grad_norm": 0.17456026375293732, "learning_rate": 1e-05, "loss": 0.5304, "step": 4305 }, { "epoch": 1.1915173320417907, "grad_norm": 0.16769897937774658, "learning_rate": 1e-05, "loss": 0.5426, "step": 4306 }, { "epoch": 1.19179409119214, "grad_norm": 0.1714518815279007, "learning_rate": 1e-05, "loss": 0.4991, "step": 4307 }, { "epoch": 1.1920708503424895, "grad_norm": 0.1731996387243271, "learning_rate": 1e-05, "loss": 0.5323, "step": 4308 }, { "epoch": 1.192347609492839, "grad_norm": 0.16796809434890747, "learning_rate": 1e-05, "loss": 0.5134, "step": 4309 }, { "epoch": 1.1926243686431883, "grad_norm": 0.17111855745315552, "learning_rate": 1e-05, "loss": 0.5038, "step": 4310 }, { "epoch": 1.1929011277935377, "grad_norm": 0.1701020896434784, "learning_rate": 1e-05, "loss": 0.4834, "step": 4311 }, { "epoch": 1.1931778869438872, "grad_norm": 0.1634359210729599, "learning_rate": 1e-05, "loss": 0.5095, "step": 4312 }, { "epoch": 1.1934546460942366, "grad_norm": 0.16258397698402405, "learning_rate": 1e-05, "loss": 0.4947, "step": 4313 }, { "epoch": 1.193731405244586, "grad_norm": 0.16446974873542786, "learning_rate": 1e-05, "loss": 0.5651, "step": 4314 }, { "epoch": 1.1940081643949354, "grad_norm": 0.17882046103477478, "learning_rate": 1e-05, "loss": 0.5217, "step": 4315 }, { "epoch": 1.1942849235452848, "grad_norm": 0.17594590783119202, "learning_rate": 1e-05, "loss": 0.5061, "step": 4316 }, { "epoch": 1.194561682695634, "grad_norm": 0.17065851390361786, "learning_rate": 1e-05, "loss": 0.4989, "step": 4317 }, { "epoch": 1.1948384418459836, "grad_norm": 0.17247478663921356, "learning_rate": 1e-05, "loss": 0.5074, "step": 4318 }, { "epoch": 1.1951152009963328, "grad_norm": 0.16823481023311615, "learning_rate": 1e-05, "loss": 0.5171, "step": 4319 }, { "epoch": 1.1953919601466823, "grad_norm": 0.168323814868927, "learning_rate": 1e-05, "loss": 0.5128, "step": 4320 }, { "epoch": 1.1956687192970317, "grad_norm": 0.1677667200565338, "learning_rate": 1e-05, "loss": 0.5039, "step": 4321 }, { "epoch": 1.195945478447381, "grad_norm": 0.163526713848114, "learning_rate": 1e-05, "loss": 0.5119, "step": 4322 }, { "epoch": 1.1962222375977305, "grad_norm": 0.17108918726444244, "learning_rate": 1e-05, "loss": 0.5161, "step": 4323 }, { "epoch": 1.19649899674808, "grad_norm": 0.16641978919506073, "learning_rate": 1e-05, "loss": 0.5206, "step": 4324 }, { "epoch": 1.1967757558984293, "grad_norm": 0.16803735494613647, "learning_rate": 1e-05, "loss": 0.5303, "step": 4325 }, { "epoch": 1.1970525150487787, "grad_norm": 0.1706712245941162, "learning_rate": 1e-05, "loss": 0.4819, "step": 4326 }, { "epoch": 1.1973292741991282, "grad_norm": 0.176355242729187, "learning_rate": 1e-05, "loss": 0.5258, "step": 4327 }, { "epoch": 1.1976060333494776, "grad_norm": 0.18534424901008606, "learning_rate": 1e-05, "loss": 0.5502, "step": 4328 }, { "epoch": 1.197882792499827, "grad_norm": 0.17398682236671448, "learning_rate": 1e-05, "loss": 0.5341, "step": 4329 }, { "epoch": 1.1981595516501764, "grad_norm": 0.17532098293304443, "learning_rate": 1e-05, "loss": 0.5132, "step": 4330 }, { "epoch": 1.1984363108005258, "grad_norm": 0.16776777803897858, "learning_rate": 1e-05, "loss": 0.5144, "step": 4331 }, { "epoch": 1.1987130699508752, "grad_norm": 0.1726902276277542, "learning_rate": 1e-05, "loss": 0.4983, "step": 4332 }, { "epoch": 1.1989898291012246, "grad_norm": 0.17095732688903809, "learning_rate": 1e-05, "loss": 0.5273, "step": 4333 }, { "epoch": 1.199266588251574, "grad_norm": 0.15824931859970093, "learning_rate": 1e-05, "loss": 0.5058, "step": 4334 }, { "epoch": 1.1995433474019235, "grad_norm": 0.1732596755027771, "learning_rate": 1e-05, "loss": 0.5331, "step": 4335 }, { "epoch": 1.199820106552273, "grad_norm": 0.163439080119133, "learning_rate": 1e-05, "loss": 0.4952, "step": 4336 }, { "epoch": 1.2000968657026223, "grad_norm": 0.17004719376564026, "learning_rate": 1e-05, "loss": 0.5212, "step": 4337 }, { "epoch": 1.2003736248529717, "grad_norm": 0.1634022295475006, "learning_rate": 1e-05, "loss": 0.5139, "step": 4338 }, { "epoch": 1.2006503840033211, "grad_norm": 0.17348670959472656, "learning_rate": 1e-05, "loss": 0.5291, "step": 4339 }, { "epoch": 1.2009271431536706, "grad_norm": 0.1720629781484604, "learning_rate": 1e-05, "loss": 0.504, "step": 4340 }, { "epoch": 1.20120390230402, "grad_norm": 0.16494852304458618, "learning_rate": 1e-05, "loss": 0.5078, "step": 4341 }, { "epoch": 1.2014806614543694, "grad_norm": 0.1791168749332428, "learning_rate": 1e-05, "loss": 0.4982, "step": 4342 }, { "epoch": 1.2017574206047188, "grad_norm": 0.16476622223854065, "learning_rate": 1e-05, "loss": 0.5182, "step": 4343 }, { "epoch": 1.2020341797550682, "grad_norm": 0.17295417189598083, "learning_rate": 1e-05, "loss": 0.48, "step": 4344 }, { "epoch": 1.2023109389054176, "grad_norm": 0.17120344936847687, "learning_rate": 1e-05, "loss": 0.53, "step": 4345 }, { "epoch": 1.202587698055767, "grad_norm": 0.16605202853679657, "learning_rate": 1e-05, "loss": 0.4964, "step": 4346 }, { "epoch": 1.2028644572061165, "grad_norm": 0.15967141091823578, "learning_rate": 1e-05, "loss": 0.5249, "step": 4347 }, { "epoch": 1.2031412163564659, "grad_norm": 0.17594896256923676, "learning_rate": 1e-05, "loss": 0.4934, "step": 4348 }, { "epoch": 1.2034179755068153, "grad_norm": 0.1638675034046173, "learning_rate": 1e-05, "loss": 0.48, "step": 4349 }, { "epoch": 1.2036947346571645, "grad_norm": 0.17067168653011322, "learning_rate": 1e-05, "loss": 0.5089, "step": 4350 }, { "epoch": 1.2039714938075141, "grad_norm": 0.17201724648475647, "learning_rate": 1e-05, "loss": 0.5087, "step": 4351 }, { "epoch": 1.2042482529578633, "grad_norm": 0.16197387874126434, "learning_rate": 1e-05, "loss": 0.512, "step": 4352 }, { "epoch": 1.204525012108213, "grad_norm": 0.17143496870994568, "learning_rate": 1e-05, "loss": 0.4919, "step": 4353 }, { "epoch": 1.2048017712585621, "grad_norm": 0.17703959345817566, "learning_rate": 1e-05, "loss": 0.5235, "step": 4354 }, { "epoch": 1.2050785304089116, "grad_norm": 0.17113281786441803, "learning_rate": 1e-05, "loss": 0.5233, "step": 4355 }, { "epoch": 1.205355289559261, "grad_norm": 0.17221058905124664, "learning_rate": 1e-05, "loss": 0.5152, "step": 4356 }, { "epoch": 1.2056320487096104, "grad_norm": 0.1708170771598816, "learning_rate": 1e-05, "loss": 0.4991, "step": 4357 }, { "epoch": 1.2059088078599598, "grad_norm": 0.1739739179611206, "learning_rate": 1e-05, "loss": 0.5294, "step": 4358 }, { "epoch": 1.2061855670103092, "grad_norm": 0.17044535279273987, "learning_rate": 1e-05, "loss": 0.5288, "step": 4359 }, { "epoch": 1.2064623261606586, "grad_norm": 0.177761971950531, "learning_rate": 1e-05, "loss": 0.5254, "step": 4360 }, { "epoch": 1.206739085311008, "grad_norm": 0.17731976509094238, "learning_rate": 1e-05, "loss": 0.5327, "step": 4361 }, { "epoch": 1.2070158444613575, "grad_norm": 0.17056161165237427, "learning_rate": 1e-05, "loss": 0.4944, "step": 4362 }, { "epoch": 1.2072926036117069, "grad_norm": 0.18254756927490234, "learning_rate": 1e-05, "loss": 0.5305, "step": 4363 }, { "epoch": 1.2075693627620563, "grad_norm": 0.17444205284118652, "learning_rate": 1e-05, "loss": 0.5116, "step": 4364 }, { "epoch": 1.2078461219124057, "grad_norm": 0.16369763016700745, "learning_rate": 1e-05, "loss": 0.5213, "step": 4365 }, { "epoch": 1.2081228810627551, "grad_norm": 0.1785779893398285, "learning_rate": 1e-05, "loss": 0.4879, "step": 4366 }, { "epoch": 1.2083996402131045, "grad_norm": 0.16489389538764954, "learning_rate": 1e-05, "loss": 0.4993, "step": 4367 }, { "epoch": 1.208676399363454, "grad_norm": 0.17415811121463776, "learning_rate": 1e-05, "loss": 0.5178, "step": 4368 }, { "epoch": 1.2089531585138034, "grad_norm": 0.1720353364944458, "learning_rate": 1e-05, "loss": 0.5087, "step": 4369 }, { "epoch": 1.2092299176641528, "grad_norm": 0.17369578778743744, "learning_rate": 1e-05, "loss": 0.4974, "step": 4370 }, { "epoch": 1.2095066768145022, "grad_norm": 0.17636023461818695, "learning_rate": 1e-05, "loss": 0.4787, "step": 4371 }, { "epoch": 1.2097834359648516, "grad_norm": 0.16429902613162994, "learning_rate": 1e-05, "loss": 0.5033, "step": 4372 }, { "epoch": 1.210060195115201, "grad_norm": 0.16715218126773834, "learning_rate": 1e-05, "loss": 0.5404, "step": 4373 }, { "epoch": 1.2103369542655504, "grad_norm": 0.1684907078742981, "learning_rate": 1e-05, "loss": 0.5322, "step": 4374 }, { "epoch": 1.2106137134158999, "grad_norm": 0.17556115984916687, "learning_rate": 1e-05, "loss": 0.5075, "step": 4375 }, { "epoch": 1.2108904725662493, "grad_norm": 0.1798630803823471, "learning_rate": 1e-05, "loss": 0.5224, "step": 4376 }, { "epoch": 1.2111672317165987, "grad_norm": 0.17475254833698273, "learning_rate": 1e-05, "loss": 0.5002, "step": 4377 }, { "epoch": 1.211443990866948, "grad_norm": 0.17178748548030853, "learning_rate": 1e-05, "loss": 0.5284, "step": 4378 }, { "epoch": 1.2117207500172975, "grad_norm": 0.1652837097644806, "learning_rate": 1e-05, "loss": 0.4908, "step": 4379 }, { "epoch": 1.211997509167647, "grad_norm": 0.1655794233083725, "learning_rate": 1e-05, "loss": 0.4682, "step": 4380 }, { "epoch": 1.2122742683179963, "grad_norm": 0.1769479513168335, "learning_rate": 1e-05, "loss": 0.5315, "step": 4381 }, { "epoch": 1.2125510274683458, "grad_norm": 0.16666215658187866, "learning_rate": 1e-05, "loss": 0.5003, "step": 4382 }, { "epoch": 1.2128277866186952, "grad_norm": 0.1614946573972702, "learning_rate": 1e-05, "loss": 0.5157, "step": 4383 }, { "epoch": 1.2131045457690446, "grad_norm": 0.16982780396938324, "learning_rate": 1e-05, "loss": 0.5246, "step": 4384 }, { "epoch": 1.2133813049193938, "grad_norm": 0.17204563319683075, "learning_rate": 1e-05, "loss": 0.4901, "step": 4385 }, { "epoch": 1.2136580640697434, "grad_norm": 0.16786403954029083, "learning_rate": 1e-05, "loss": 0.4905, "step": 4386 }, { "epoch": 1.2139348232200926, "grad_norm": 0.17052818834781647, "learning_rate": 1e-05, "loss": 0.5203, "step": 4387 }, { "epoch": 1.2142115823704422, "grad_norm": 0.17403262853622437, "learning_rate": 1e-05, "loss": 0.4976, "step": 4388 }, { "epoch": 1.2144883415207914, "grad_norm": 0.1798192709684372, "learning_rate": 1e-05, "loss": 0.5277, "step": 4389 }, { "epoch": 1.2147651006711409, "grad_norm": 0.18173743784427643, "learning_rate": 1e-05, "loss": 0.5554, "step": 4390 }, { "epoch": 1.2150418598214903, "grad_norm": 0.16798147559165955, "learning_rate": 1e-05, "loss": 0.4944, "step": 4391 }, { "epoch": 1.2153186189718397, "grad_norm": 0.1688915491104126, "learning_rate": 1e-05, "loss": 0.4934, "step": 4392 }, { "epoch": 1.215595378122189, "grad_norm": 0.16659040749073029, "learning_rate": 1e-05, "loss": 0.513, "step": 4393 }, { "epoch": 1.2158721372725385, "grad_norm": 0.16071057319641113, "learning_rate": 1e-05, "loss": 0.5432, "step": 4394 }, { "epoch": 1.216148896422888, "grad_norm": 0.18080231547355652, "learning_rate": 1e-05, "loss": 0.5139, "step": 4395 }, { "epoch": 1.2164256555732373, "grad_norm": 0.1763884276151657, "learning_rate": 1e-05, "loss": 0.5164, "step": 4396 }, { "epoch": 1.2167024147235868, "grad_norm": 0.1727411150932312, "learning_rate": 1e-05, "loss": 0.5076, "step": 4397 }, { "epoch": 1.2169791738739362, "grad_norm": 0.16790027916431427, "learning_rate": 1e-05, "loss": 0.5335, "step": 4398 }, { "epoch": 1.2172559330242856, "grad_norm": 0.1798558533191681, "learning_rate": 1e-05, "loss": 0.5079, "step": 4399 }, { "epoch": 1.217532692174635, "grad_norm": 0.1753474622964859, "learning_rate": 1e-05, "loss": 0.5345, "step": 4400 }, { "epoch": 1.2178094513249844, "grad_norm": 0.1678311973810196, "learning_rate": 1e-05, "loss": 0.5369, "step": 4401 }, { "epoch": 1.2180862104753338, "grad_norm": 0.17656004428863525, "learning_rate": 1e-05, "loss": 0.4877, "step": 4402 }, { "epoch": 1.2183629696256832, "grad_norm": 0.1657923012971878, "learning_rate": 1e-05, "loss": 0.533, "step": 4403 }, { "epoch": 1.2186397287760327, "grad_norm": 0.16442875564098358, "learning_rate": 1e-05, "loss": 0.4923, "step": 4404 }, { "epoch": 1.218916487926382, "grad_norm": 0.17335394024848938, "learning_rate": 1e-05, "loss": 0.5274, "step": 4405 }, { "epoch": 1.2191932470767315, "grad_norm": 0.17518244683742523, "learning_rate": 1e-05, "loss": 0.5027, "step": 4406 }, { "epoch": 1.219470006227081, "grad_norm": 0.17086388170719147, "learning_rate": 1e-05, "loss": 0.4971, "step": 4407 }, { "epoch": 1.2197467653774303, "grad_norm": 0.17108827829360962, "learning_rate": 1e-05, "loss": 0.5288, "step": 4408 }, { "epoch": 1.2200235245277797, "grad_norm": 0.1813056617975235, "learning_rate": 1e-05, "loss": 0.5098, "step": 4409 }, { "epoch": 1.2203002836781291, "grad_norm": 0.17716041207313538, "learning_rate": 1e-05, "loss": 0.5409, "step": 4410 }, { "epoch": 1.2205770428284786, "grad_norm": 0.16989867389202118, "learning_rate": 1e-05, "loss": 0.4828, "step": 4411 }, { "epoch": 1.220853801978828, "grad_norm": 0.16689160466194153, "learning_rate": 1e-05, "loss": 0.4928, "step": 4412 }, { "epoch": 1.2211305611291774, "grad_norm": 0.17023628950119019, "learning_rate": 1e-05, "loss": 0.5144, "step": 4413 }, { "epoch": 1.2214073202795268, "grad_norm": 0.16595667600631714, "learning_rate": 1e-05, "loss": 0.4866, "step": 4414 }, { "epoch": 1.2216840794298762, "grad_norm": 0.17647062242031097, "learning_rate": 1e-05, "loss": 0.514, "step": 4415 }, { "epoch": 1.2219608385802256, "grad_norm": 0.17161566019058228, "learning_rate": 1e-05, "loss": 0.5015, "step": 4416 }, { "epoch": 1.222237597730575, "grad_norm": 0.16682536900043488, "learning_rate": 1e-05, "loss": 0.5015, "step": 4417 }, { "epoch": 1.2225143568809245, "grad_norm": 0.17087247967720032, "learning_rate": 1e-05, "loss": 0.5113, "step": 4418 }, { "epoch": 1.2227911160312739, "grad_norm": 0.17193852365016937, "learning_rate": 1e-05, "loss": 0.5245, "step": 4419 }, { "epoch": 1.223067875181623, "grad_norm": 0.16877751052379608, "learning_rate": 1e-05, "loss": 0.5149, "step": 4420 }, { "epoch": 1.2233446343319727, "grad_norm": 0.17174313962459564, "learning_rate": 1e-05, "loss": 0.5158, "step": 4421 }, { "epoch": 1.223621393482322, "grad_norm": 0.16996300220489502, "learning_rate": 1e-05, "loss": 0.5049, "step": 4422 }, { "epoch": 1.2238981526326713, "grad_norm": 0.17859242856502533, "learning_rate": 1e-05, "loss": 0.5082, "step": 4423 }, { "epoch": 1.2241749117830207, "grad_norm": 0.17553460597991943, "learning_rate": 1e-05, "loss": 0.5185, "step": 4424 }, { "epoch": 1.2244516709333702, "grad_norm": 0.1745861917734146, "learning_rate": 1e-05, "loss": 0.4793, "step": 4425 }, { "epoch": 1.2247284300837196, "grad_norm": 0.18261802196502686, "learning_rate": 1e-05, "loss": 0.5358, "step": 4426 }, { "epoch": 1.225005189234069, "grad_norm": 0.17262890934944153, "learning_rate": 1e-05, "loss": 0.5318, "step": 4427 }, { "epoch": 1.2252819483844184, "grad_norm": 0.1692073494195938, "learning_rate": 1e-05, "loss": 0.5102, "step": 4428 }, { "epoch": 1.2255587075347678, "grad_norm": 0.17617833614349365, "learning_rate": 1e-05, "loss": 0.5399, "step": 4429 }, { "epoch": 1.2258354666851172, "grad_norm": 0.18052981793880463, "learning_rate": 1e-05, "loss": 0.4913, "step": 4430 }, { "epoch": 1.2261122258354666, "grad_norm": 0.17561204731464386, "learning_rate": 1e-05, "loss": 0.5294, "step": 4431 }, { "epoch": 1.226388984985816, "grad_norm": 0.1761842668056488, "learning_rate": 1e-05, "loss": 0.5298, "step": 4432 }, { "epoch": 1.2266657441361655, "grad_norm": 0.17566566169261932, "learning_rate": 1e-05, "loss": 0.507, "step": 4433 }, { "epoch": 1.2269425032865149, "grad_norm": 0.16847993433475494, "learning_rate": 1e-05, "loss": 0.5162, "step": 4434 }, { "epoch": 1.2272192624368643, "grad_norm": 0.1825845092535019, "learning_rate": 1e-05, "loss": 0.5183, "step": 4435 }, { "epoch": 1.2274960215872137, "grad_norm": 0.18474549055099487, "learning_rate": 1e-05, "loss": 0.513, "step": 4436 }, { "epoch": 1.2277727807375631, "grad_norm": 0.16947491466999054, "learning_rate": 1e-05, "loss": 0.5215, "step": 4437 }, { "epoch": 1.2280495398879125, "grad_norm": 0.16834618151187897, "learning_rate": 1e-05, "loss": 0.4921, "step": 4438 }, { "epoch": 1.228326299038262, "grad_norm": 0.17222170531749725, "learning_rate": 1e-05, "loss": 0.531, "step": 4439 }, { "epoch": 1.2286030581886114, "grad_norm": 0.17112374305725098, "learning_rate": 1e-05, "loss": 0.5092, "step": 4440 }, { "epoch": 1.2288798173389608, "grad_norm": 0.16858375072479248, "learning_rate": 1e-05, "loss": 0.5203, "step": 4441 }, { "epoch": 1.2291565764893102, "grad_norm": 0.16820867359638214, "learning_rate": 1e-05, "loss": 0.5193, "step": 4442 }, { "epoch": 1.2294333356396596, "grad_norm": 0.16767618060112, "learning_rate": 1e-05, "loss": 0.4902, "step": 4443 }, { "epoch": 1.229710094790009, "grad_norm": 0.17459911108016968, "learning_rate": 1e-05, "loss": 0.5142, "step": 4444 }, { "epoch": 1.2299868539403584, "grad_norm": 0.1761551797389984, "learning_rate": 1e-05, "loss": 0.4952, "step": 4445 }, { "epoch": 1.2302636130907079, "grad_norm": 0.17708133161067963, "learning_rate": 1e-05, "loss": 0.4999, "step": 4446 }, { "epoch": 1.2305403722410573, "grad_norm": 0.17384694516658783, "learning_rate": 1e-05, "loss": 0.5258, "step": 4447 }, { "epoch": 1.2308171313914067, "grad_norm": 0.17853420972824097, "learning_rate": 1e-05, "loss": 0.5116, "step": 4448 }, { "epoch": 1.231093890541756, "grad_norm": 0.1768224984407425, "learning_rate": 1e-05, "loss": 0.5436, "step": 4449 }, { "epoch": 1.2313706496921055, "grad_norm": 0.17954066395759583, "learning_rate": 1e-05, "loss": 0.509, "step": 4450 }, { "epoch": 1.231647408842455, "grad_norm": 0.17727030813694, "learning_rate": 1e-05, "loss": 0.5107, "step": 4451 }, { "epoch": 1.2319241679928044, "grad_norm": 0.17893926799297333, "learning_rate": 1e-05, "loss": 0.5119, "step": 4452 }, { "epoch": 1.2322009271431535, "grad_norm": 0.16988691687583923, "learning_rate": 1e-05, "loss": 0.5013, "step": 4453 }, { "epoch": 1.2324776862935032, "grad_norm": 0.16946184635162354, "learning_rate": 1e-05, "loss": 0.5065, "step": 4454 }, { "epoch": 1.2327544454438524, "grad_norm": 0.16753067076206207, "learning_rate": 1e-05, "loss": 0.51, "step": 4455 }, { "epoch": 1.233031204594202, "grad_norm": 0.17490306496620178, "learning_rate": 1e-05, "loss": 0.5193, "step": 4456 }, { "epoch": 1.2333079637445512, "grad_norm": 0.1708901822566986, "learning_rate": 1e-05, "loss": 0.5115, "step": 4457 }, { "epoch": 1.2335847228949006, "grad_norm": 0.167562797665596, "learning_rate": 1e-05, "loss": 0.4933, "step": 4458 }, { "epoch": 1.23386148204525, "grad_norm": 0.17164646089076996, "learning_rate": 1e-05, "loss": 0.524, "step": 4459 }, { "epoch": 1.2341382411955995, "grad_norm": 0.174498051404953, "learning_rate": 1e-05, "loss": 0.5374, "step": 4460 }, { "epoch": 1.2344150003459489, "grad_norm": 0.16329708695411682, "learning_rate": 1e-05, "loss": 0.5254, "step": 4461 }, { "epoch": 1.2346917594962983, "grad_norm": 0.17160378396511078, "learning_rate": 1e-05, "loss": 0.5255, "step": 4462 }, { "epoch": 1.2349685186466477, "grad_norm": 0.17109476029872894, "learning_rate": 1e-05, "loss": 0.5243, "step": 4463 }, { "epoch": 1.235245277796997, "grad_norm": 0.17186717689037323, "learning_rate": 1e-05, "loss": 0.5105, "step": 4464 }, { "epoch": 1.2355220369473465, "grad_norm": 0.16409748792648315, "learning_rate": 1e-05, "loss": 0.5227, "step": 4465 }, { "epoch": 1.235798796097696, "grad_norm": 0.16772454977035522, "learning_rate": 1e-05, "loss": 0.5126, "step": 4466 }, { "epoch": 1.2360755552480454, "grad_norm": 0.17386755347251892, "learning_rate": 1e-05, "loss": 0.5064, "step": 4467 }, { "epoch": 1.2363523143983948, "grad_norm": 0.1692780703306198, "learning_rate": 1e-05, "loss": 0.5149, "step": 4468 }, { "epoch": 1.2366290735487442, "grad_norm": 0.17016610503196716, "learning_rate": 1e-05, "loss": 0.5113, "step": 4469 }, { "epoch": 1.2369058326990936, "grad_norm": 0.1716240644454956, "learning_rate": 1e-05, "loss": 0.4971, "step": 4470 }, { "epoch": 1.237182591849443, "grad_norm": 0.1742994785308838, "learning_rate": 1e-05, "loss": 0.5204, "step": 4471 }, { "epoch": 1.2374593509997924, "grad_norm": 0.17050360143184662, "learning_rate": 1e-05, "loss": 0.5272, "step": 4472 }, { "epoch": 1.2377361101501418, "grad_norm": 0.16152697801589966, "learning_rate": 1e-05, "loss": 0.5322, "step": 4473 }, { "epoch": 1.2380128693004913, "grad_norm": 0.17303569614887238, "learning_rate": 1e-05, "loss": 0.5145, "step": 4474 }, { "epoch": 1.2382896284508407, "grad_norm": 0.1725127100944519, "learning_rate": 1e-05, "loss": 0.5194, "step": 4475 }, { "epoch": 1.23856638760119, "grad_norm": 0.17977134883403778, "learning_rate": 1e-05, "loss": 0.5183, "step": 4476 }, { "epoch": 1.2388431467515395, "grad_norm": 0.17953452467918396, "learning_rate": 1e-05, "loss": 0.5483, "step": 4477 }, { "epoch": 1.239119905901889, "grad_norm": 0.17852534353733063, "learning_rate": 1e-05, "loss": 0.5409, "step": 4478 }, { "epoch": 1.2393966650522383, "grad_norm": 0.16989833116531372, "learning_rate": 1e-05, "loss": 0.5509, "step": 4479 }, { "epoch": 1.2396734242025877, "grad_norm": 0.17652294039726257, "learning_rate": 1e-05, "loss": 0.5049, "step": 4480 }, { "epoch": 1.2399501833529372, "grad_norm": 0.16672806441783905, "learning_rate": 1e-05, "loss": 0.5339, "step": 4481 }, { "epoch": 1.2402269425032866, "grad_norm": 0.17695936560630798, "learning_rate": 1e-05, "loss": 0.5229, "step": 4482 }, { "epoch": 1.240503701653636, "grad_norm": 0.1816418319940567, "learning_rate": 1e-05, "loss": 0.4944, "step": 4483 }, { "epoch": 1.2407804608039854, "grad_norm": 0.16940763592720032, "learning_rate": 1e-05, "loss": 0.4984, "step": 4484 }, { "epoch": 1.2410572199543348, "grad_norm": 0.16106976568698883, "learning_rate": 1e-05, "loss": 0.4927, "step": 4485 }, { "epoch": 1.2413339791046842, "grad_norm": 0.1735873818397522, "learning_rate": 1e-05, "loss": 0.5235, "step": 4486 }, { "epoch": 1.2416107382550337, "grad_norm": 0.17521364986896515, "learning_rate": 1e-05, "loss": 0.5085, "step": 4487 }, { "epoch": 1.2418874974053828, "grad_norm": 0.17180997133255005, "learning_rate": 1e-05, "loss": 0.5122, "step": 4488 }, { "epoch": 1.2421642565557325, "grad_norm": 0.17183205485343933, "learning_rate": 1e-05, "loss": 0.4991, "step": 4489 }, { "epoch": 1.2424410157060817, "grad_norm": 0.17165614664554596, "learning_rate": 1e-05, "loss": 0.5069, "step": 4490 }, { "epoch": 1.2427177748564313, "grad_norm": 0.17599612474441528, "learning_rate": 1e-05, "loss": 0.5294, "step": 4491 }, { "epoch": 1.2429945340067805, "grad_norm": 0.1731826514005661, "learning_rate": 1e-05, "loss": 0.5055, "step": 4492 }, { "epoch": 1.24327129315713, "grad_norm": 0.1622460037469864, "learning_rate": 1e-05, "loss": 0.4941, "step": 4493 }, { "epoch": 1.2435480523074793, "grad_norm": 0.18319639563560486, "learning_rate": 1e-05, "loss": 0.5506, "step": 4494 }, { "epoch": 1.2438248114578287, "grad_norm": 0.171820729970932, "learning_rate": 1e-05, "loss": 0.504, "step": 4495 }, { "epoch": 1.2441015706081782, "grad_norm": 0.17179572582244873, "learning_rate": 1e-05, "loss": 0.4887, "step": 4496 }, { "epoch": 1.2443783297585276, "grad_norm": 0.17110544443130493, "learning_rate": 1e-05, "loss": 0.5385, "step": 4497 }, { "epoch": 1.244655088908877, "grad_norm": 0.17081429064273834, "learning_rate": 1e-05, "loss": 0.5222, "step": 4498 }, { "epoch": 1.2449318480592264, "grad_norm": 0.17319712042808533, "learning_rate": 1e-05, "loss": 0.5287, "step": 4499 }, { "epoch": 1.2452086072095758, "grad_norm": 0.18193113803863525, "learning_rate": 1e-05, "loss": 0.4889, "step": 4500 }, { "epoch": 1.2454853663599252, "grad_norm": 0.17426609992980957, "learning_rate": 1e-05, "loss": 0.507, "step": 4501 }, { "epoch": 1.2457621255102747, "grad_norm": 0.16967232525348663, "learning_rate": 1e-05, "loss": 0.5059, "step": 4502 }, { "epoch": 1.246038884660624, "grad_norm": 0.1684071123600006, "learning_rate": 1e-05, "loss": 0.5338, "step": 4503 }, { "epoch": 1.2463156438109735, "grad_norm": 0.18562857806682587, "learning_rate": 1e-05, "loss": 0.5206, "step": 4504 }, { "epoch": 1.246592402961323, "grad_norm": 0.18053026497364044, "learning_rate": 1e-05, "loss": 0.5046, "step": 4505 }, { "epoch": 1.2468691621116723, "grad_norm": 0.17219726741313934, "learning_rate": 1e-05, "loss": 0.5211, "step": 4506 }, { "epoch": 1.2471459212620217, "grad_norm": 0.1746980994939804, "learning_rate": 1e-05, "loss": 0.5268, "step": 4507 }, { "epoch": 1.2474226804123711, "grad_norm": 0.17707176506519318, "learning_rate": 1e-05, "loss": 0.5276, "step": 4508 }, { "epoch": 1.2476994395627206, "grad_norm": 0.16669632494449615, "learning_rate": 1e-05, "loss": 0.4748, "step": 4509 }, { "epoch": 1.24797619871307, "grad_norm": 0.16380564868450165, "learning_rate": 1e-05, "loss": 0.5051, "step": 4510 }, { "epoch": 1.2482529578634194, "grad_norm": 0.1695927232503891, "learning_rate": 1e-05, "loss": 0.5078, "step": 4511 }, { "epoch": 1.2485297170137688, "grad_norm": 0.1761677861213684, "learning_rate": 1e-05, "loss": 0.5082, "step": 4512 }, { "epoch": 1.2488064761641182, "grad_norm": 0.16544274985790253, "learning_rate": 1e-05, "loss": 0.5262, "step": 4513 }, { "epoch": 1.2490832353144676, "grad_norm": 0.16794496774673462, "learning_rate": 1e-05, "loss": 0.511, "step": 4514 }, { "epoch": 1.249359994464817, "grad_norm": 0.17480814456939697, "learning_rate": 1e-05, "loss": 0.5349, "step": 4515 }, { "epoch": 1.2496367536151665, "grad_norm": 0.16681437194347382, "learning_rate": 1e-05, "loss": 0.5149, "step": 4516 }, { "epoch": 1.2499135127655159, "grad_norm": 0.17690163850784302, "learning_rate": 1e-05, "loss": 0.5249, "step": 4517 }, { "epoch": 1.2501902719158653, "grad_norm": 0.16989384591579437, "learning_rate": 1e-05, "loss": 0.5066, "step": 4518 }, { "epoch": 1.2504670310662147, "grad_norm": 0.16907964646816254, "learning_rate": 1e-05, "loss": 0.4995, "step": 4519 }, { "epoch": 1.2507437902165641, "grad_norm": 0.170003280043602, "learning_rate": 1e-05, "loss": 0.5204, "step": 4520 }, { "epoch": 1.2510205493669133, "grad_norm": 0.16567999124526978, "learning_rate": 1e-05, "loss": 0.524, "step": 4521 }, { "epoch": 1.251297308517263, "grad_norm": 0.17527459561824799, "learning_rate": 1e-05, "loss": 0.5136, "step": 4522 }, { "epoch": 1.2515740676676121, "grad_norm": 0.17955318093299866, "learning_rate": 1e-05, "loss": 0.5371, "step": 4523 }, { "epoch": 1.2518508268179618, "grad_norm": 0.16435125470161438, "learning_rate": 1e-05, "loss": 0.4854, "step": 4524 }, { "epoch": 1.252127585968311, "grad_norm": 0.16733932495117188, "learning_rate": 1e-05, "loss": 0.5105, "step": 4525 }, { "epoch": 1.2524043451186606, "grad_norm": 0.1698843538761139, "learning_rate": 1e-05, "loss": 0.55, "step": 4526 }, { "epoch": 1.2526811042690098, "grad_norm": 0.17593954503536224, "learning_rate": 1e-05, "loss": 0.5106, "step": 4527 }, { "epoch": 1.2529578634193594, "grad_norm": 0.16979201138019562, "learning_rate": 1e-05, "loss": 0.5048, "step": 4528 }, { "epoch": 1.2532346225697086, "grad_norm": 0.16839337348937988, "learning_rate": 1e-05, "loss": 0.517, "step": 4529 }, { "epoch": 1.253511381720058, "grad_norm": 0.1696937084197998, "learning_rate": 1e-05, "loss": 0.5149, "step": 4530 }, { "epoch": 1.2537881408704075, "grad_norm": 0.1771940290927887, "learning_rate": 1e-05, "loss": 0.4971, "step": 4531 }, { "epoch": 1.2540649000207569, "grad_norm": 0.16390205919742584, "learning_rate": 1e-05, "loss": 0.535, "step": 4532 }, { "epoch": 1.2543416591711063, "grad_norm": 0.18101175129413605, "learning_rate": 1e-05, "loss": 0.5092, "step": 4533 }, { "epoch": 1.2546184183214557, "grad_norm": 0.16999058425426483, "learning_rate": 1e-05, "loss": 0.5368, "step": 4534 }, { "epoch": 1.2548951774718051, "grad_norm": 0.17159545421600342, "learning_rate": 1e-05, "loss": 0.537, "step": 4535 }, { "epoch": 1.2551719366221545, "grad_norm": 0.1685776263475418, "learning_rate": 1e-05, "loss": 0.5234, "step": 4536 }, { "epoch": 1.255448695772504, "grad_norm": 0.17046460509300232, "learning_rate": 1e-05, "loss": 0.5191, "step": 4537 }, { "epoch": 1.2557254549228534, "grad_norm": 0.18023721873760223, "learning_rate": 1e-05, "loss": 0.5305, "step": 4538 }, { "epoch": 1.2560022140732028, "grad_norm": 0.17422838509082794, "learning_rate": 1e-05, "loss": 0.4993, "step": 4539 }, { "epoch": 1.2562789732235522, "grad_norm": 0.17837917804718018, "learning_rate": 1e-05, "loss": 0.5467, "step": 4540 }, { "epoch": 1.2565557323739016, "grad_norm": 0.17248576879501343, "learning_rate": 1e-05, "loss": 0.4893, "step": 4541 }, { "epoch": 1.256832491524251, "grad_norm": 0.16749103367328644, "learning_rate": 1e-05, "loss": 0.492, "step": 4542 }, { "epoch": 1.2571092506746004, "grad_norm": 0.17520010471343994, "learning_rate": 1e-05, "loss": 0.515, "step": 4543 }, { "epoch": 1.2573860098249499, "grad_norm": 0.17698128521442413, "learning_rate": 1e-05, "loss": 0.5383, "step": 4544 }, { "epoch": 1.2576627689752993, "grad_norm": 0.1750320941209793, "learning_rate": 1e-05, "loss": 0.5251, "step": 4545 }, { "epoch": 1.2579395281256487, "grad_norm": 0.16981279850006104, "learning_rate": 1e-05, "loss": 0.4842, "step": 4546 }, { "epoch": 1.258216287275998, "grad_norm": 0.1744374781847, "learning_rate": 1e-05, "loss": 0.5036, "step": 4547 }, { "epoch": 1.2584930464263475, "grad_norm": 0.17111501097679138, "learning_rate": 1e-05, "loss": 0.4983, "step": 4548 }, { "epoch": 1.258769805576697, "grad_norm": 0.17887862026691437, "learning_rate": 1e-05, "loss": 0.5059, "step": 4549 }, { "epoch": 1.2590465647270463, "grad_norm": 0.16990521550178528, "learning_rate": 1e-05, "loss": 0.5344, "step": 4550 }, { "epoch": 1.2593233238773958, "grad_norm": 0.18289001286029816, "learning_rate": 1e-05, "loss": 0.5145, "step": 4551 }, { "epoch": 1.2596000830277452, "grad_norm": 0.1820388287305832, "learning_rate": 1e-05, "loss": 0.5008, "step": 4552 }, { "epoch": 1.2598768421780946, "grad_norm": 0.1702607423067093, "learning_rate": 1e-05, "loss": 0.4885, "step": 4553 }, { "epoch": 1.2601536013284438, "grad_norm": 0.1726025938987732, "learning_rate": 1e-05, "loss": 0.5015, "step": 4554 }, { "epoch": 1.2604303604787934, "grad_norm": 0.17574243247509003, "learning_rate": 1e-05, "loss": 0.5162, "step": 4555 }, { "epoch": 1.2607071196291426, "grad_norm": 0.17439153790473938, "learning_rate": 1e-05, "loss": 0.5019, "step": 4556 }, { "epoch": 1.2609838787794923, "grad_norm": 0.1642858386039734, "learning_rate": 1e-05, "loss": 0.5104, "step": 4557 }, { "epoch": 1.2612606379298414, "grad_norm": 0.17237411439418793, "learning_rate": 1e-05, "loss": 0.5261, "step": 4558 }, { "epoch": 1.261537397080191, "grad_norm": 0.17228509485721588, "learning_rate": 1e-05, "loss": 0.542, "step": 4559 }, { "epoch": 1.2618141562305403, "grad_norm": 0.16839146614074707, "learning_rate": 1e-05, "loss": 0.5339, "step": 4560 }, { "epoch": 1.26209091538089, "grad_norm": 0.17364349961280823, "learning_rate": 1e-05, "loss": 0.5077, "step": 4561 }, { "epoch": 1.262367674531239, "grad_norm": 0.1759667992591858, "learning_rate": 1e-05, "loss": 0.5073, "step": 4562 }, { "epoch": 1.2626444336815885, "grad_norm": 0.1654359996318817, "learning_rate": 1e-05, "loss": 0.5134, "step": 4563 }, { "epoch": 1.262921192831938, "grad_norm": 0.16681453585624695, "learning_rate": 1e-05, "loss": 0.5039, "step": 4564 }, { "epoch": 1.2631979519822873, "grad_norm": 0.16966965794563293, "learning_rate": 1e-05, "loss": 0.5027, "step": 4565 }, { "epoch": 1.2634747111326368, "grad_norm": 0.17198395729064941, "learning_rate": 1e-05, "loss": 0.4882, "step": 4566 }, { "epoch": 1.2637514702829862, "grad_norm": 0.1671171337366104, "learning_rate": 1e-05, "loss": 0.5129, "step": 4567 }, { "epoch": 1.2640282294333356, "grad_norm": 0.16842031478881836, "learning_rate": 1e-05, "loss": 0.5126, "step": 4568 }, { "epoch": 1.264304988583685, "grad_norm": 0.17916615307331085, "learning_rate": 1e-05, "loss": 0.5127, "step": 4569 }, { "epoch": 1.2645817477340344, "grad_norm": 0.17796699702739716, "learning_rate": 1e-05, "loss": 0.5203, "step": 4570 }, { "epoch": 1.2648585068843838, "grad_norm": 0.16685399413108826, "learning_rate": 1e-05, "loss": 0.5253, "step": 4571 }, { "epoch": 1.2651352660347333, "grad_norm": 0.17501147091388702, "learning_rate": 1e-05, "loss": 0.4958, "step": 4572 }, { "epoch": 1.2654120251850827, "grad_norm": 0.1784643828868866, "learning_rate": 1e-05, "loss": 0.5325, "step": 4573 }, { "epoch": 1.265688784335432, "grad_norm": 0.17589309811592102, "learning_rate": 1e-05, "loss": 0.5329, "step": 4574 }, { "epoch": 1.2659655434857815, "grad_norm": 0.1756751537322998, "learning_rate": 1e-05, "loss": 0.5004, "step": 4575 }, { "epoch": 1.266242302636131, "grad_norm": 0.15848664939403534, "learning_rate": 1e-05, "loss": 0.5071, "step": 4576 }, { "epoch": 1.2665190617864803, "grad_norm": 0.1714331954717636, "learning_rate": 1e-05, "loss": 0.5147, "step": 4577 }, { "epoch": 1.2667958209368297, "grad_norm": 0.16701097786426544, "learning_rate": 1e-05, "loss": 0.5051, "step": 4578 }, { "epoch": 1.2670725800871792, "grad_norm": 0.17403534054756165, "learning_rate": 1e-05, "loss": 0.5087, "step": 4579 }, { "epoch": 1.2673493392375286, "grad_norm": 0.16961298882961273, "learning_rate": 1e-05, "loss": 0.5072, "step": 4580 }, { "epoch": 1.267626098387878, "grad_norm": 0.16840848326683044, "learning_rate": 1e-05, "loss": 0.5169, "step": 4581 }, { "epoch": 1.2679028575382274, "grad_norm": 0.1779921054840088, "learning_rate": 1e-05, "loss": 0.5021, "step": 4582 }, { "epoch": 1.2681796166885768, "grad_norm": 0.17870059609413147, "learning_rate": 1e-05, "loss": 0.5462, "step": 4583 }, { "epoch": 1.2684563758389262, "grad_norm": 0.17239931225776672, "learning_rate": 1e-05, "loss": 0.4701, "step": 4584 }, { "epoch": 1.2687331349892756, "grad_norm": 0.16065743565559387, "learning_rate": 1e-05, "loss": 0.5283, "step": 4585 }, { "epoch": 1.269009894139625, "grad_norm": 0.16513237357139587, "learning_rate": 1e-05, "loss": 0.462, "step": 4586 }, { "epoch": 1.2692866532899745, "grad_norm": 0.1750873178243637, "learning_rate": 1e-05, "loss": 0.5203, "step": 4587 }, { "epoch": 1.269563412440324, "grad_norm": 0.16938242316246033, "learning_rate": 1e-05, "loss": 0.4996, "step": 4588 }, { "epoch": 1.269840171590673, "grad_norm": 0.1718243658542633, "learning_rate": 1e-05, "loss": 0.5061, "step": 4589 }, { "epoch": 1.2701169307410227, "grad_norm": 0.17565158009529114, "learning_rate": 1e-05, "loss": 0.5144, "step": 4590 }, { "epoch": 1.270393689891372, "grad_norm": 0.17327836155891418, "learning_rate": 1e-05, "loss": 0.5042, "step": 4591 }, { "epoch": 1.2706704490417216, "grad_norm": 0.16585321724414825, "learning_rate": 1e-05, "loss": 0.4914, "step": 4592 }, { "epoch": 1.2709472081920707, "grad_norm": 0.16982169449329376, "learning_rate": 1e-05, "loss": 0.5103, "step": 4593 }, { "epoch": 1.2712239673424204, "grad_norm": 0.16780662536621094, "learning_rate": 1e-05, "loss": 0.5141, "step": 4594 }, { "epoch": 1.2715007264927696, "grad_norm": 0.1747530847787857, "learning_rate": 1e-05, "loss": 0.5447, "step": 4595 }, { "epoch": 1.2717774856431192, "grad_norm": 0.18711970746517181, "learning_rate": 1e-05, "loss": 0.4957, "step": 4596 }, { "epoch": 1.2720542447934684, "grad_norm": 0.17626211047172546, "learning_rate": 1e-05, "loss": 0.5454, "step": 4597 }, { "epoch": 1.2723310039438178, "grad_norm": 0.18022631108760834, "learning_rate": 1e-05, "loss": 0.5327, "step": 4598 }, { "epoch": 1.2726077630941672, "grad_norm": 0.16410136222839355, "learning_rate": 1e-05, "loss": 0.4946, "step": 4599 }, { "epoch": 1.2728845222445166, "grad_norm": 0.17350967228412628, "learning_rate": 1e-05, "loss": 0.5197, "step": 4600 }, { "epoch": 1.273161281394866, "grad_norm": 0.17575836181640625, "learning_rate": 1e-05, "loss": 0.526, "step": 4601 }, { "epoch": 1.2734380405452155, "grad_norm": 0.173710435628891, "learning_rate": 1e-05, "loss": 0.5219, "step": 4602 }, { "epoch": 1.273714799695565, "grad_norm": 0.16319800913333893, "learning_rate": 1e-05, "loss": 0.5116, "step": 4603 }, { "epoch": 1.2739915588459143, "grad_norm": 0.171942800283432, "learning_rate": 1e-05, "loss": 0.5303, "step": 4604 }, { "epoch": 1.2742683179962637, "grad_norm": 0.1735111027956009, "learning_rate": 1e-05, "loss": 0.5337, "step": 4605 }, { "epoch": 1.2745450771466131, "grad_norm": 0.16667169332504272, "learning_rate": 1e-05, "loss": 0.5005, "step": 4606 }, { "epoch": 1.2748218362969626, "grad_norm": 0.1714317798614502, "learning_rate": 1e-05, "loss": 0.5199, "step": 4607 }, { "epoch": 1.275098595447312, "grad_norm": 0.16802279651165009, "learning_rate": 1e-05, "loss": 0.5178, "step": 4608 }, { "epoch": 1.2753753545976614, "grad_norm": 0.1734207421541214, "learning_rate": 1e-05, "loss": 0.5034, "step": 4609 }, { "epoch": 1.2756521137480108, "grad_norm": 0.16641786694526672, "learning_rate": 1e-05, "loss": 0.5252, "step": 4610 }, { "epoch": 1.2759288728983602, "grad_norm": 0.17412203550338745, "learning_rate": 1e-05, "loss": 0.5072, "step": 4611 }, { "epoch": 1.2762056320487096, "grad_norm": 0.16782024502754211, "learning_rate": 1e-05, "loss": 0.5206, "step": 4612 }, { "epoch": 1.276482391199059, "grad_norm": 0.17321768403053284, "learning_rate": 1e-05, "loss": 0.5022, "step": 4613 }, { "epoch": 1.2767591503494085, "grad_norm": 0.16980616748332977, "learning_rate": 1e-05, "loss": 0.5197, "step": 4614 }, { "epoch": 1.2770359094997579, "grad_norm": 0.1635449230670929, "learning_rate": 1e-05, "loss": 0.5148, "step": 4615 }, { "epoch": 1.2773126686501073, "grad_norm": 0.16951750218868256, "learning_rate": 1e-05, "loss": 0.4844, "step": 4616 }, { "epoch": 1.2775894278004567, "grad_norm": 0.17546679079532623, "learning_rate": 1e-05, "loss": 0.5173, "step": 4617 }, { "epoch": 1.2778661869508061, "grad_norm": 0.18513445556163788, "learning_rate": 1e-05, "loss": 0.5513, "step": 4618 }, { "epoch": 1.2781429461011555, "grad_norm": 0.17666149139404297, "learning_rate": 1e-05, "loss": 0.5474, "step": 4619 }, { "epoch": 1.278419705251505, "grad_norm": 0.1780172437429428, "learning_rate": 1e-05, "loss": 0.5464, "step": 4620 }, { "epoch": 1.2786964644018544, "grad_norm": 0.17124341428279877, "learning_rate": 1e-05, "loss": 0.5346, "step": 4621 }, { "epoch": 1.2789732235522038, "grad_norm": 0.17442302405834198, "learning_rate": 1e-05, "loss": 0.5246, "step": 4622 }, { "epoch": 1.2792499827025532, "grad_norm": 0.1790185123682022, "learning_rate": 1e-05, "loss": 0.5332, "step": 4623 }, { "epoch": 1.2795267418529024, "grad_norm": 0.18566586077213287, "learning_rate": 1e-05, "loss": 0.5005, "step": 4624 }, { "epoch": 1.279803501003252, "grad_norm": 0.1676729917526245, "learning_rate": 1e-05, "loss": 0.4982, "step": 4625 }, { "epoch": 1.2800802601536012, "grad_norm": 0.16727736592292786, "learning_rate": 1e-05, "loss": 0.5132, "step": 4626 }, { "epoch": 1.2803570193039508, "grad_norm": 0.1727401465177536, "learning_rate": 1e-05, "loss": 0.5186, "step": 4627 }, { "epoch": 1.2806337784543, "grad_norm": 0.1669590026140213, "learning_rate": 1e-05, "loss": 0.5328, "step": 4628 }, { "epoch": 1.2809105376046497, "grad_norm": 0.16757145524024963, "learning_rate": 1e-05, "loss": 0.5349, "step": 4629 }, { "epoch": 1.2811872967549989, "grad_norm": 0.1655714362859726, "learning_rate": 1e-05, "loss": 0.5322, "step": 4630 }, { "epoch": 1.2814640559053485, "grad_norm": 0.17598162591457367, "learning_rate": 1e-05, "loss": 0.505, "step": 4631 }, { "epoch": 1.2817408150556977, "grad_norm": 0.17220677435398102, "learning_rate": 1e-05, "loss": 0.5335, "step": 4632 }, { "epoch": 1.2820175742060471, "grad_norm": 0.16504183411598206, "learning_rate": 1e-05, "loss": 0.5133, "step": 4633 }, { "epoch": 1.2822943333563965, "grad_norm": 0.1716657429933548, "learning_rate": 1e-05, "loss": 0.5314, "step": 4634 }, { "epoch": 1.282571092506746, "grad_norm": 0.17293746769428253, "learning_rate": 1e-05, "loss": 0.5045, "step": 4635 }, { "epoch": 1.2828478516570954, "grad_norm": 0.17661762237548828, "learning_rate": 1e-05, "loss": 0.5133, "step": 4636 }, { "epoch": 1.2831246108074448, "grad_norm": 0.16877971589565277, "learning_rate": 1e-05, "loss": 0.4885, "step": 4637 }, { "epoch": 1.2834013699577942, "grad_norm": 0.17161478102207184, "learning_rate": 1e-05, "loss": 0.5065, "step": 4638 }, { "epoch": 1.2836781291081436, "grad_norm": 0.1857447475194931, "learning_rate": 1e-05, "loss": 0.536, "step": 4639 }, { "epoch": 1.283954888258493, "grad_norm": 0.17267486453056335, "learning_rate": 1e-05, "loss": 0.5162, "step": 4640 }, { "epoch": 1.2842316474088424, "grad_norm": 0.17793980240821838, "learning_rate": 1e-05, "loss": 0.5026, "step": 4641 }, { "epoch": 1.2845084065591919, "grad_norm": 0.17353639006614685, "learning_rate": 1e-05, "loss": 0.4854, "step": 4642 }, { "epoch": 1.2847851657095413, "grad_norm": 0.16671216487884521, "learning_rate": 1e-05, "loss": 0.5221, "step": 4643 }, { "epoch": 1.2850619248598907, "grad_norm": 0.18157121539115906, "learning_rate": 1e-05, "loss": 0.5113, "step": 4644 }, { "epoch": 1.28533868401024, "grad_norm": 0.16905687749385834, "learning_rate": 1e-05, "loss": 0.4906, "step": 4645 }, { "epoch": 1.2856154431605895, "grad_norm": 0.17143870890140533, "learning_rate": 1e-05, "loss": 0.5269, "step": 4646 }, { "epoch": 1.285892202310939, "grad_norm": 0.17031629383563995, "learning_rate": 1e-05, "loss": 0.5296, "step": 4647 }, { "epoch": 1.2861689614612883, "grad_norm": 0.17987099289894104, "learning_rate": 1e-05, "loss": 0.516, "step": 4648 }, { "epoch": 1.2864457206116378, "grad_norm": 0.17331023514270782, "learning_rate": 1e-05, "loss": 0.509, "step": 4649 }, { "epoch": 1.2867224797619872, "grad_norm": 0.17234963178634644, "learning_rate": 1e-05, "loss": 0.4911, "step": 4650 }, { "epoch": 1.2869992389123366, "grad_norm": 0.16821107268333435, "learning_rate": 1e-05, "loss": 0.4894, "step": 4651 }, { "epoch": 1.287275998062686, "grad_norm": 0.1726440042257309, "learning_rate": 1e-05, "loss": 0.5377, "step": 4652 }, { "epoch": 1.2875527572130354, "grad_norm": 0.17152442038059235, "learning_rate": 1e-05, "loss": 0.5093, "step": 4653 }, { "epoch": 1.2878295163633848, "grad_norm": 0.17716126143932343, "learning_rate": 1e-05, "loss": 0.5029, "step": 4654 }, { "epoch": 1.2881062755137342, "grad_norm": 0.16305027902126312, "learning_rate": 1e-05, "loss": 0.5085, "step": 4655 }, { "epoch": 1.2883830346640837, "grad_norm": 0.16713054478168488, "learning_rate": 1e-05, "loss": 0.5134, "step": 4656 }, { "epoch": 1.2886597938144329, "grad_norm": 0.1758957952260971, "learning_rate": 1e-05, "loss": 0.5277, "step": 4657 }, { "epoch": 1.2889365529647825, "grad_norm": 0.1711980402469635, "learning_rate": 1e-05, "loss": 0.4889, "step": 4658 }, { "epoch": 1.2892133121151317, "grad_norm": 0.17121745645999908, "learning_rate": 1e-05, "loss": 0.5277, "step": 4659 }, { "epoch": 1.2894900712654813, "grad_norm": 0.1785721778869629, "learning_rate": 1e-05, "loss": 0.5207, "step": 4660 }, { "epoch": 1.2897668304158305, "grad_norm": 0.17563475668430328, "learning_rate": 1e-05, "loss": 0.5288, "step": 4661 }, { "epoch": 1.2900435895661801, "grad_norm": 0.17762383818626404, "learning_rate": 1e-05, "loss": 0.5187, "step": 4662 }, { "epoch": 1.2903203487165293, "grad_norm": 0.17659832537174225, "learning_rate": 1e-05, "loss": 0.5298, "step": 4663 }, { "epoch": 1.290597107866879, "grad_norm": 0.18036186695098877, "learning_rate": 1e-05, "loss": 0.5153, "step": 4664 }, { "epoch": 1.2908738670172282, "grad_norm": 0.16016517579555511, "learning_rate": 1e-05, "loss": 0.4646, "step": 4665 }, { "epoch": 1.2911506261675776, "grad_norm": 0.18035688996315002, "learning_rate": 1e-05, "loss": 0.5268, "step": 4666 }, { "epoch": 1.291427385317927, "grad_norm": 0.17651161551475525, "learning_rate": 1e-05, "loss": 0.5486, "step": 4667 }, { "epoch": 1.2917041444682764, "grad_norm": 0.16736194491386414, "learning_rate": 1e-05, "loss": 0.4967, "step": 4668 }, { "epoch": 1.2919809036186258, "grad_norm": 0.16853386163711548, "learning_rate": 1e-05, "loss": 0.5255, "step": 4669 }, { "epoch": 1.2922576627689752, "grad_norm": 0.1695672571659088, "learning_rate": 1e-05, "loss": 0.5048, "step": 4670 }, { "epoch": 1.2925344219193247, "grad_norm": 0.16714009642601013, "learning_rate": 1e-05, "loss": 0.5136, "step": 4671 }, { "epoch": 1.292811181069674, "grad_norm": 0.17931917309761047, "learning_rate": 1e-05, "loss": 0.5145, "step": 4672 }, { "epoch": 1.2930879402200235, "grad_norm": 0.1765013486146927, "learning_rate": 1e-05, "loss": 0.523, "step": 4673 }, { "epoch": 1.293364699370373, "grad_norm": 0.16916099190711975, "learning_rate": 1e-05, "loss": 0.4959, "step": 4674 }, { "epoch": 1.2936414585207223, "grad_norm": 0.16643016040325165, "learning_rate": 1e-05, "loss": 0.5065, "step": 4675 }, { "epoch": 1.2939182176710717, "grad_norm": 0.17106349766254425, "learning_rate": 1e-05, "loss": 0.5176, "step": 4676 }, { "epoch": 1.2941949768214212, "grad_norm": 0.17398956418037415, "learning_rate": 1e-05, "loss": 0.5146, "step": 4677 }, { "epoch": 1.2944717359717706, "grad_norm": 0.17351636290550232, "learning_rate": 1e-05, "loss": 0.5514, "step": 4678 }, { "epoch": 1.29474849512212, "grad_norm": 0.17220914363861084, "learning_rate": 1e-05, "loss": 0.5233, "step": 4679 }, { "epoch": 1.2950252542724694, "grad_norm": 0.16932304203510284, "learning_rate": 1e-05, "loss": 0.501, "step": 4680 }, { "epoch": 1.2953020134228188, "grad_norm": 0.17393814027309418, "learning_rate": 1e-05, "loss": 0.5096, "step": 4681 }, { "epoch": 1.2955787725731682, "grad_norm": 0.1652020961046219, "learning_rate": 1e-05, "loss": 0.5139, "step": 4682 }, { "epoch": 1.2958555317235176, "grad_norm": 0.17445412278175354, "learning_rate": 1e-05, "loss": 0.5142, "step": 4683 }, { "epoch": 1.296132290873867, "grad_norm": 0.17421451210975647, "learning_rate": 1e-05, "loss": 0.5364, "step": 4684 }, { "epoch": 1.2964090500242165, "grad_norm": 0.17407754063606262, "learning_rate": 1e-05, "loss": 0.5462, "step": 4685 }, { "epoch": 1.2966858091745659, "grad_norm": 0.17185232043266296, "learning_rate": 1e-05, "loss": 0.4966, "step": 4686 }, { "epoch": 1.2969625683249153, "grad_norm": 0.17117509245872498, "learning_rate": 1e-05, "loss": 0.505, "step": 4687 }, { "epoch": 1.2972393274752647, "grad_norm": 0.17491382360458374, "learning_rate": 1e-05, "loss": 0.5061, "step": 4688 }, { "epoch": 1.2975160866256141, "grad_norm": 0.1680753380060196, "learning_rate": 1e-05, "loss": 0.5188, "step": 4689 }, { "epoch": 1.2977928457759635, "grad_norm": 0.17282147705554962, "learning_rate": 1e-05, "loss": 0.529, "step": 4690 }, { "epoch": 1.298069604926313, "grad_norm": 0.17049385607242584, "learning_rate": 1e-05, "loss": 0.5182, "step": 4691 }, { "epoch": 1.2983463640766622, "grad_norm": 0.17110289633274078, "learning_rate": 1e-05, "loss": 0.5667, "step": 4692 }, { "epoch": 1.2986231232270118, "grad_norm": 0.17895978689193726, "learning_rate": 1e-05, "loss": 0.4963, "step": 4693 }, { "epoch": 1.298899882377361, "grad_norm": 0.1646633893251419, "learning_rate": 1e-05, "loss": 0.5134, "step": 4694 }, { "epoch": 1.2991766415277106, "grad_norm": 0.17403119802474976, "learning_rate": 1e-05, "loss": 0.5027, "step": 4695 }, { "epoch": 1.2994534006780598, "grad_norm": 0.17589432001113892, "learning_rate": 1e-05, "loss": 0.5052, "step": 4696 }, { "epoch": 1.2997301598284094, "grad_norm": 0.17793132364749908, "learning_rate": 1e-05, "loss": 0.514, "step": 4697 }, { "epoch": 1.3000069189787586, "grad_norm": 0.16710078716278076, "learning_rate": 1e-05, "loss": 0.5062, "step": 4698 }, { "epoch": 1.3002836781291083, "grad_norm": 0.17750318348407745, "learning_rate": 1e-05, "loss": 0.5348, "step": 4699 }, { "epoch": 1.3005604372794575, "grad_norm": 0.17359746992588043, "learning_rate": 1e-05, "loss": 0.5129, "step": 4700 }, { "epoch": 1.3008371964298069, "grad_norm": 0.17973656952381134, "learning_rate": 1e-05, "loss": 0.4987, "step": 4701 }, { "epoch": 1.3011139555801563, "grad_norm": 0.1691453605890274, "learning_rate": 1e-05, "loss": 0.51, "step": 4702 }, { "epoch": 1.3013907147305057, "grad_norm": 0.1648511439561844, "learning_rate": 1e-05, "loss": 0.4911, "step": 4703 }, { "epoch": 1.3016674738808551, "grad_norm": 0.1693110167980194, "learning_rate": 1e-05, "loss": 0.487, "step": 4704 }, { "epoch": 1.3019442330312045, "grad_norm": 0.16826894879341125, "learning_rate": 1e-05, "loss": 0.5123, "step": 4705 }, { "epoch": 1.302220992181554, "grad_norm": 0.17896369099617004, "learning_rate": 1e-05, "loss": 0.515, "step": 4706 }, { "epoch": 1.3024977513319034, "grad_norm": 0.17964142560958862, "learning_rate": 1e-05, "loss": 0.5071, "step": 4707 }, { "epoch": 1.3027745104822528, "grad_norm": 0.16972054541110992, "learning_rate": 1e-05, "loss": 0.4749, "step": 4708 }, { "epoch": 1.3030512696326022, "grad_norm": 0.16560481488704681, "learning_rate": 1e-05, "loss": 0.5269, "step": 4709 }, { "epoch": 1.3033280287829516, "grad_norm": 0.1761196106672287, "learning_rate": 1e-05, "loss": 0.5133, "step": 4710 }, { "epoch": 1.303604787933301, "grad_norm": 0.17714464664459229, "learning_rate": 1e-05, "loss": 0.504, "step": 4711 }, { "epoch": 1.3038815470836504, "grad_norm": 0.17972798645496368, "learning_rate": 1e-05, "loss": 0.5271, "step": 4712 }, { "epoch": 1.3041583062339999, "grad_norm": 0.16957013309001923, "learning_rate": 1e-05, "loss": 0.5147, "step": 4713 }, { "epoch": 1.3044350653843493, "grad_norm": 0.17704443633556366, "learning_rate": 1e-05, "loss": 0.4998, "step": 4714 }, { "epoch": 1.3047118245346987, "grad_norm": 0.1779005229473114, "learning_rate": 1e-05, "loss": 0.5406, "step": 4715 }, { "epoch": 1.304988583685048, "grad_norm": 0.17330054938793182, "learning_rate": 1e-05, "loss": 0.5263, "step": 4716 }, { "epoch": 1.3052653428353975, "grad_norm": 0.17117606103420258, "learning_rate": 1e-05, "loss": 0.4961, "step": 4717 }, { "epoch": 1.305542101985747, "grad_norm": 0.1767159402370453, "learning_rate": 1e-05, "loss": 0.5006, "step": 4718 }, { "epoch": 1.3058188611360964, "grad_norm": 0.16911762952804565, "learning_rate": 1e-05, "loss": 0.5022, "step": 4719 }, { "epoch": 1.3060956202864458, "grad_norm": 0.17218682169914246, "learning_rate": 1e-05, "loss": 0.496, "step": 4720 }, { "epoch": 1.3063723794367952, "grad_norm": 0.16647206246852875, "learning_rate": 1e-05, "loss": 0.5207, "step": 4721 }, { "epoch": 1.3066491385871446, "grad_norm": 0.17403949797153473, "learning_rate": 1e-05, "loss": 0.5067, "step": 4722 }, { "epoch": 1.306925897737494, "grad_norm": 0.16617906093597412, "learning_rate": 1e-05, "loss": 0.5228, "step": 4723 }, { "epoch": 1.3072026568878434, "grad_norm": 0.17393887042999268, "learning_rate": 1e-05, "loss": 0.5141, "step": 4724 }, { "epoch": 1.3074794160381928, "grad_norm": 0.17436982691287994, "learning_rate": 1e-05, "loss": 0.4955, "step": 4725 }, { "epoch": 1.3077561751885423, "grad_norm": 0.17356961965560913, "learning_rate": 1e-05, "loss": 0.5062, "step": 4726 }, { "epoch": 1.3080329343388915, "grad_norm": 0.18844516575336456, "learning_rate": 1e-05, "loss": 0.5645, "step": 4727 }, { "epoch": 1.308309693489241, "grad_norm": 0.17564363777637482, "learning_rate": 1e-05, "loss": 0.5161, "step": 4728 }, { "epoch": 1.3085864526395903, "grad_norm": 0.1759365200996399, "learning_rate": 1e-05, "loss": 0.5155, "step": 4729 }, { "epoch": 1.30886321178994, "grad_norm": 0.19222389161586761, "learning_rate": 1e-05, "loss": 0.5333, "step": 4730 }, { "epoch": 1.309139970940289, "grad_norm": 0.16992834210395813, "learning_rate": 1e-05, "loss": 0.4905, "step": 4731 }, { "epoch": 1.3094167300906387, "grad_norm": 0.16827437281608582, "learning_rate": 1e-05, "loss": 0.5173, "step": 4732 }, { "epoch": 1.309693489240988, "grad_norm": 0.17910262942314148, "learning_rate": 1e-05, "loss": 0.5136, "step": 4733 }, { "epoch": 1.3099702483913376, "grad_norm": 0.1776721179485321, "learning_rate": 1e-05, "loss": 0.537, "step": 4734 }, { "epoch": 1.3102470075416868, "grad_norm": 0.17568284273147583, "learning_rate": 1e-05, "loss": 0.5133, "step": 4735 }, { "epoch": 1.3105237666920362, "grad_norm": 0.1754336953163147, "learning_rate": 1e-05, "loss": 0.53, "step": 4736 }, { "epoch": 1.3108005258423856, "grad_norm": 0.1661696434020996, "learning_rate": 1e-05, "loss": 0.5117, "step": 4737 }, { "epoch": 1.311077284992735, "grad_norm": 0.1764325201511383, "learning_rate": 1e-05, "loss": 0.5133, "step": 4738 }, { "epoch": 1.3113540441430844, "grad_norm": 0.16829213500022888, "learning_rate": 1e-05, "loss": 0.4979, "step": 4739 }, { "epoch": 1.3116308032934338, "grad_norm": 0.171210378408432, "learning_rate": 1e-05, "loss": 0.5156, "step": 4740 }, { "epoch": 1.3119075624437833, "grad_norm": 0.1724245399236679, "learning_rate": 1e-05, "loss": 0.5186, "step": 4741 }, { "epoch": 1.3121843215941327, "grad_norm": 0.17678679525852203, "learning_rate": 1e-05, "loss": 0.5226, "step": 4742 }, { "epoch": 1.312461080744482, "grad_norm": 0.17913131415843964, "learning_rate": 1e-05, "loss": 0.4972, "step": 4743 }, { "epoch": 1.3127378398948315, "grad_norm": 0.17090411484241486, "learning_rate": 1e-05, "loss": 0.5114, "step": 4744 }, { "epoch": 1.313014599045181, "grad_norm": 0.1612505167722702, "learning_rate": 1e-05, "loss": 0.5279, "step": 4745 }, { "epoch": 1.3132913581955303, "grad_norm": 0.18756678700447083, "learning_rate": 1e-05, "loss": 0.5122, "step": 4746 }, { "epoch": 1.3135681173458797, "grad_norm": 0.18003100156784058, "learning_rate": 1e-05, "loss": 0.4974, "step": 4747 }, { "epoch": 1.3138448764962292, "grad_norm": 0.17326965928077698, "learning_rate": 1e-05, "loss": 0.5259, "step": 4748 }, { "epoch": 1.3141216356465786, "grad_norm": 0.16926701366901398, "learning_rate": 1e-05, "loss": 0.5218, "step": 4749 }, { "epoch": 1.314398394796928, "grad_norm": 0.18010851740837097, "learning_rate": 1e-05, "loss": 0.5277, "step": 4750 }, { "epoch": 1.3146751539472774, "grad_norm": 0.17590202391147614, "learning_rate": 1e-05, "loss": 0.4975, "step": 4751 }, { "epoch": 1.3149519130976268, "grad_norm": 0.17320281267166138, "learning_rate": 1e-05, "loss": 0.5016, "step": 4752 }, { "epoch": 1.3152286722479762, "grad_norm": 0.1662915050983429, "learning_rate": 1e-05, "loss": 0.4957, "step": 4753 }, { "epoch": 1.3155054313983257, "grad_norm": 0.1652456820011139, "learning_rate": 1e-05, "loss": 0.5094, "step": 4754 }, { "epoch": 1.315782190548675, "grad_norm": 0.18341109156608582, "learning_rate": 1e-05, "loss": 0.5087, "step": 4755 }, { "epoch": 1.3160589496990245, "grad_norm": 0.16731640696525574, "learning_rate": 1e-05, "loss": 0.503, "step": 4756 }, { "epoch": 1.316335708849374, "grad_norm": 0.17273390293121338, "learning_rate": 1e-05, "loss": 0.5193, "step": 4757 }, { "epoch": 1.3166124679997233, "grad_norm": 0.18552221357822418, "learning_rate": 1e-05, "loss": 0.5258, "step": 4758 }, { "epoch": 1.3168892271500727, "grad_norm": 0.16781127452850342, "learning_rate": 1e-05, "loss": 0.5094, "step": 4759 }, { "epoch": 1.317165986300422, "grad_norm": 0.1688990443944931, "learning_rate": 1e-05, "loss": 0.5252, "step": 4760 }, { "epoch": 1.3174427454507716, "grad_norm": 0.1623779982328415, "learning_rate": 1e-05, "loss": 0.4901, "step": 4761 }, { "epoch": 1.3177195046011208, "grad_norm": 0.18173760175704956, "learning_rate": 1e-05, "loss": 0.5214, "step": 4762 }, { "epoch": 1.3179962637514704, "grad_norm": 0.16615267097949982, "learning_rate": 1e-05, "loss": 0.5265, "step": 4763 }, { "epoch": 1.3182730229018196, "grad_norm": 0.16462630033493042, "learning_rate": 1e-05, "loss": 0.5271, "step": 4764 }, { "epoch": 1.3185497820521692, "grad_norm": 0.17515796422958374, "learning_rate": 1e-05, "loss": 0.5149, "step": 4765 }, { "epoch": 1.3188265412025184, "grad_norm": 0.17264951765537262, "learning_rate": 1e-05, "loss": 0.5317, "step": 4766 }, { "epoch": 1.319103300352868, "grad_norm": 0.17645058035850525, "learning_rate": 1e-05, "loss": 0.4905, "step": 4767 }, { "epoch": 1.3193800595032172, "grad_norm": 0.17172951996326447, "learning_rate": 1e-05, "loss": 0.5015, "step": 4768 }, { "epoch": 1.3196568186535667, "grad_norm": 0.1779409646987915, "learning_rate": 1e-05, "loss": 0.509, "step": 4769 }, { "epoch": 1.319933577803916, "grad_norm": 0.17247053980827332, "learning_rate": 1e-05, "loss": 0.4976, "step": 4770 }, { "epoch": 1.3202103369542655, "grad_norm": 0.16673868894577026, "learning_rate": 1e-05, "loss": 0.4962, "step": 4771 }, { "epoch": 1.320487096104615, "grad_norm": 0.16055721044540405, "learning_rate": 1e-05, "loss": 0.4809, "step": 4772 }, { "epoch": 1.3207638552549643, "grad_norm": 0.17632663249969482, "learning_rate": 1e-05, "loss": 0.5155, "step": 4773 }, { "epoch": 1.3210406144053137, "grad_norm": 0.17455941438674927, "learning_rate": 1e-05, "loss": 0.5064, "step": 4774 }, { "epoch": 1.3213173735556631, "grad_norm": 0.1755576878786087, "learning_rate": 1e-05, "loss": 0.545, "step": 4775 }, { "epoch": 1.3215941327060126, "grad_norm": 0.17459118366241455, "learning_rate": 1e-05, "loss": 0.515, "step": 4776 }, { "epoch": 1.321870891856362, "grad_norm": 0.17288702726364136, "learning_rate": 1e-05, "loss": 0.5009, "step": 4777 }, { "epoch": 1.3221476510067114, "grad_norm": 0.16858677566051483, "learning_rate": 1e-05, "loss": 0.5011, "step": 4778 }, { "epoch": 1.3224244101570608, "grad_norm": 0.1698942631483078, "learning_rate": 1e-05, "loss": 0.5063, "step": 4779 }, { "epoch": 1.3227011693074102, "grad_norm": 0.17525048553943634, "learning_rate": 1e-05, "loss": 0.4822, "step": 4780 }, { "epoch": 1.3229779284577596, "grad_norm": 0.15912586450576782, "learning_rate": 1e-05, "loss": 0.5038, "step": 4781 }, { "epoch": 1.323254687608109, "grad_norm": 0.17581519484519958, "learning_rate": 1e-05, "loss": 0.5372, "step": 4782 }, { "epoch": 1.3235314467584585, "grad_norm": 0.17945189774036407, "learning_rate": 1e-05, "loss": 0.4897, "step": 4783 }, { "epoch": 1.3238082059088079, "grad_norm": 0.18186397850513458, "learning_rate": 1e-05, "loss": 0.4882, "step": 4784 }, { "epoch": 1.3240849650591573, "grad_norm": 0.1836584359407425, "learning_rate": 1e-05, "loss": 0.5307, "step": 4785 }, { "epoch": 1.3243617242095067, "grad_norm": 0.1686200648546219, "learning_rate": 1e-05, "loss": 0.5, "step": 4786 }, { "epoch": 1.3246384833598561, "grad_norm": 0.1674656867980957, "learning_rate": 1e-05, "loss": 0.4878, "step": 4787 }, { "epoch": 1.3249152425102055, "grad_norm": 0.1585969626903534, "learning_rate": 1e-05, "loss": 0.492, "step": 4788 }, { "epoch": 1.325192001660555, "grad_norm": 0.1678539365530014, "learning_rate": 1e-05, "loss": 0.5225, "step": 4789 }, { "epoch": 1.3254687608109044, "grad_norm": 0.1813361644744873, "learning_rate": 1e-05, "loss": 0.5172, "step": 4790 }, { "epoch": 1.3257455199612538, "grad_norm": 0.16658681631088257, "learning_rate": 1e-05, "loss": 0.4968, "step": 4791 }, { "epoch": 1.3260222791116032, "grad_norm": 0.1699797660112381, "learning_rate": 1e-05, "loss": 0.4963, "step": 4792 }, { "epoch": 1.3262990382619526, "grad_norm": 0.175380140542984, "learning_rate": 1e-05, "loss": 0.524, "step": 4793 }, { "epoch": 1.326575797412302, "grad_norm": 0.192968487739563, "learning_rate": 1e-05, "loss": 0.5232, "step": 4794 }, { "epoch": 1.3268525565626512, "grad_norm": 0.17919492721557617, "learning_rate": 1e-05, "loss": 0.5195, "step": 4795 }, { "epoch": 1.3271293157130009, "grad_norm": 0.17850051820278168, "learning_rate": 1e-05, "loss": 0.488, "step": 4796 }, { "epoch": 1.32740607486335, "grad_norm": 0.1695319563150406, "learning_rate": 1e-05, "loss": 0.5181, "step": 4797 }, { "epoch": 1.3276828340136997, "grad_norm": 0.18189319968223572, "learning_rate": 1e-05, "loss": 0.536, "step": 4798 }, { "epoch": 1.3279595931640489, "grad_norm": 0.1681692749261856, "learning_rate": 1e-05, "loss": 0.5141, "step": 4799 }, { "epoch": 1.3282363523143985, "grad_norm": 0.1680491417646408, "learning_rate": 1e-05, "loss": 0.5146, "step": 4800 }, { "epoch": 1.3285131114647477, "grad_norm": 0.1770651936531067, "learning_rate": 1e-05, "loss": 0.5144, "step": 4801 }, { "epoch": 1.3287898706150973, "grad_norm": 0.17958612740039825, "learning_rate": 1e-05, "loss": 0.5106, "step": 4802 }, { "epoch": 1.3290666297654465, "grad_norm": 0.17642080783843994, "learning_rate": 1e-05, "loss": 0.4944, "step": 4803 }, { "epoch": 1.329343388915796, "grad_norm": 0.17311877012252808, "learning_rate": 1e-05, "loss": 0.5055, "step": 4804 }, { "epoch": 1.3296201480661454, "grad_norm": 0.17590388655662537, "learning_rate": 1e-05, "loss": 0.5036, "step": 4805 }, { "epoch": 1.3298969072164948, "grad_norm": 0.1767399162054062, "learning_rate": 1e-05, "loss": 0.5136, "step": 4806 }, { "epoch": 1.3301736663668442, "grad_norm": 0.16912215948104858, "learning_rate": 1e-05, "loss": 0.5216, "step": 4807 }, { "epoch": 1.3304504255171936, "grad_norm": 0.17882516980171204, "learning_rate": 1e-05, "loss": 0.5232, "step": 4808 }, { "epoch": 1.330727184667543, "grad_norm": 0.17492404580116272, "learning_rate": 1e-05, "loss": 0.5196, "step": 4809 }, { "epoch": 1.3310039438178924, "grad_norm": 0.17033295333385468, "learning_rate": 1e-05, "loss": 0.5232, "step": 4810 }, { "epoch": 1.3312807029682419, "grad_norm": 0.16848887503147125, "learning_rate": 1e-05, "loss": 0.5181, "step": 4811 }, { "epoch": 1.3315574621185913, "grad_norm": 0.17215897142887115, "learning_rate": 1e-05, "loss": 0.5188, "step": 4812 }, { "epoch": 1.3318342212689407, "grad_norm": 0.17365919053554535, "learning_rate": 1e-05, "loss": 0.505, "step": 4813 }, { "epoch": 1.33211098041929, "grad_norm": 0.16578902304172516, "learning_rate": 1e-05, "loss": 0.5097, "step": 4814 }, { "epoch": 1.3323877395696395, "grad_norm": 0.1691289246082306, "learning_rate": 1e-05, "loss": 0.5344, "step": 4815 }, { "epoch": 1.332664498719989, "grad_norm": 0.17478717863559723, "learning_rate": 1e-05, "loss": 0.5329, "step": 4816 }, { "epoch": 1.3329412578703383, "grad_norm": 0.1724739670753479, "learning_rate": 1e-05, "loss": 0.4957, "step": 4817 }, { "epoch": 1.3332180170206878, "grad_norm": 0.17130188643932343, "learning_rate": 1e-05, "loss": 0.4944, "step": 4818 }, { "epoch": 1.3334947761710372, "grad_norm": 0.17137134075164795, "learning_rate": 1e-05, "loss": 0.515, "step": 4819 }, { "epoch": 1.3337715353213866, "grad_norm": 0.16657428443431854, "learning_rate": 1e-05, "loss": 0.5113, "step": 4820 }, { "epoch": 1.334048294471736, "grad_norm": 0.16498427093029022, "learning_rate": 1e-05, "loss": 0.5005, "step": 4821 }, { "epoch": 1.3343250536220854, "grad_norm": 0.16469189524650574, "learning_rate": 1e-05, "loss": 0.5167, "step": 4822 }, { "epoch": 1.3346018127724348, "grad_norm": 0.16664335131645203, "learning_rate": 1e-05, "loss": 0.5054, "step": 4823 }, { "epoch": 1.3348785719227843, "grad_norm": 0.162863090634346, "learning_rate": 1e-05, "loss": 0.5219, "step": 4824 }, { "epoch": 1.3351553310731337, "grad_norm": 0.18135637044906616, "learning_rate": 1e-05, "loss": 0.5038, "step": 4825 }, { "epoch": 1.335432090223483, "grad_norm": 0.1769382655620575, "learning_rate": 1e-05, "loss": 0.5104, "step": 4826 }, { "epoch": 1.3357088493738325, "grad_norm": 0.16598521173000336, "learning_rate": 1e-05, "loss": 0.4916, "step": 4827 }, { "epoch": 1.335985608524182, "grad_norm": 0.17242203652858734, "learning_rate": 1e-05, "loss": 0.5122, "step": 4828 }, { "epoch": 1.3362623676745313, "grad_norm": 0.16886165738105774, "learning_rate": 1e-05, "loss": 0.4984, "step": 4829 }, { "epoch": 1.3365391268248805, "grad_norm": 0.1636175513267517, "learning_rate": 1e-05, "loss": 0.4914, "step": 4830 }, { "epoch": 1.3368158859752302, "grad_norm": 0.17797791957855225, "learning_rate": 1e-05, "loss": 0.5085, "step": 4831 }, { "epoch": 1.3370926451255793, "grad_norm": 0.1684337854385376, "learning_rate": 1e-05, "loss": 0.5126, "step": 4832 }, { "epoch": 1.337369404275929, "grad_norm": 0.17354126274585724, "learning_rate": 1e-05, "loss": 0.5222, "step": 4833 }, { "epoch": 1.3376461634262782, "grad_norm": 0.18245786428451538, "learning_rate": 1e-05, "loss": 0.5256, "step": 4834 }, { "epoch": 1.3379229225766278, "grad_norm": 0.17054487764835358, "learning_rate": 1e-05, "loss": 0.5064, "step": 4835 }, { "epoch": 1.338199681726977, "grad_norm": 0.1639653593301773, "learning_rate": 1e-05, "loss": 0.4926, "step": 4836 }, { "epoch": 1.3384764408773266, "grad_norm": 0.17196466028690338, "learning_rate": 1e-05, "loss": 0.5244, "step": 4837 }, { "epoch": 1.3387532000276758, "grad_norm": 0.1619420349597931, "learning_rate": 1e-05, "loss": 0.5281, "step": 4838 }, { "epoch": 1.3390299591780253, "grad_norm": 0.17143535614013672, "learning_rate": 1e-05, "loss": 0.5046, "step": 4839 }, { "epoch": 1.3393067183283747, "grad_norm": 0.16496378183364868, "learning_rate": 1e-05, "loss": 0.5231, "step": 4840 }, { "epoch": 1.339583477478724, "grad_norm": 0.16596297919750214, "learning_rate": 1e-05, "loss": 0.4981, "step": 4841 }, { "epoch": 1.3398602366290735, "grad_norm": 0.1736653596162796, "learning_rate": 1e-05, "loss": 0.5493, "step": 4842 }, { "epoch": 1.340136995779423, "grad_norm": 0.1687522679567337, "learning_rate": 1e-05, "loss": 0.5373, "step": 4843 }, { "epoch": 1.3404137549297723, "grad_norm": 0.17487788200378418, "learning_rate": 1e-05, "loss": 0.5173, "step": 4844 }, { "epoch": 1.3406905140801217, "grad_norm": 0.17002983391284943, "learning_rate": 1e-05, "loss": 0.5096, "step": 4845 }, { "epoch": 1.3409672732304712, "grad_norm": 0.16866816580295563, "learning_rate": 1e-05, "loss": 0.5313, "step": 4846 }, { "epoch": 1.3412440323808206, "grad_norm": 0.1748374104499817, "learning_rate": 1e-05, "loss": 0.5216, "step": 4847 }, { "epoch": 1.34152079153117, "grad_norm": 0.17063353955745697, "learning_rate": 1e-05, "loss": 0.5351, "step": 4848 }, { "epoch": 1.3417975506815194, "grad_norm": 0.166826993227005, "learning_rate": 1e-05, "loss": 0.4951, "step": 4849 }, { "epoch": 1.3420743098318688, "grad_norm": 0.17721249163150787, "learning_rate": 1e-05, "loss": 0.5044, "step": 4850 }, { "epoch": 1.3423510689822182, "grad_norm": 0.16730619966983795, "learning_rate": 1e-05, "loss": 0.5139, "step": 4851 }, { "epoch": 1.3426278281325676, "grad_norm": 0.16730546951293945, "learning_rate": 1e-05, "loss": 0.5293, "step": 4852 }, { "epoch": 1.342904587282917, "grad_norm": 0.17972059547901154, "learning_rate": 1e-05, "loss": 0.521, "step": 4853 }, { "epoch": 1.3431813464332665, "grad_norm": 0.1802917718887329, "learning_rate": 1e-05, "loss": 0.55, "step": 4854 }, { "epoch": 1.343458105583616, "grad_norm": 0.17319674789905548, "learning_rate": 1e-05, "loss": 0.5013, "step": 4855 }, { "epoch": 1.3437348647339653, "grad_norm": 0.1746973693370819, "learning_rate": 1e-05, "loss": 0.505, "step": 4856 }, { "epoch": 1.3440116238843147, "grad_norm": 0.17773163318634033, "learning_rate": 1e-05, "loss": 0.5159, "step": 4857 }, { "epoch": 1.3442883830346641, "grad_norm": 0.16914871335029602, "learning_rate": 1e-05, "loss": 0.5241, "step": 4858 }, { "epoch": 1.3445651421850136, "grad_norm": 0.1786206066608429, "learning_rate": 1e-05, "loss": 0.5203, "step": 4859 }, { "epoch": 1.344841901335363, "grad_norm": 0.173251673579216, "learning_rate": 1e-05, "loss": 0.4995, "step": 4860 }, { "epoch": 1.3451186604857124, "grad_norm": 0.16806848347187042, "learning_rate": 1e-05, "loss": 0.5286, "step": 4861 }, { "epoch": 1.3453954196360618, "grad_norm": 0.18509361147880554, "learning_rate": 1e-05, "loss": 0.5229, "step": 4862 }, { "epoch": 1.345672178786411, "grad_norm": 0.1799076646566391, "learning_rate": 1e-05, "loss": 0.5036, "step": 4863 }, { "epoch": 1.3459489379367606, "grad_norm": 0.1703077256679535, "learning_rate": 1e-05, "loss": 0.4853, "step": 4864 }, { "epoch": 1.3462256970871098, "grad_norm": 0.17601387202739716, "learning_rate": 1e-05, "loss": 0.496, "step": 4865 }, { "epoch": 1.3465024562374595, "grad_norm": 0.16126905381679535, "learning_rate": 1e-05, "loss": 0.5239, "step": 4866 }, { "epoch": 1.3467792153878086, "grad_norm": 0.17842446267604828, "learning_rate": 1e-05, "loss": 0.5336, "step": 4867 }, { "epoch": 1.3470559745381583, "grad_norm": 0.1868063062429428, "learning_rate": 1e-05, "loss": 0.5122, "step": 4868 }, { "epoch": 1.3473327336885075, "grad_norm": 0.1737649291753769, "learning_rate": 1e-05, "loss": 0.5196, "step": 4869 }, { "epoch": 1.3476094928388571, "grad_norm": 0.16291543841362, "learning_rate": 1e-05, "loss": 0.5058, "step": 4870 }, { "epoch": 1.3478862519892063, "grad_norm": 0.16199184954166412, "learning_rate": 1e-05, "loss": 0.506, "step": 4871 }, { "epoch": 1.3481630111395557, "grad_norm": 0.17554448544979095, "learning_rate": 1e-05, "loss": 0.5319, "step": 4872 }, { "epoch": 1.3484397702899051, "grad_norm": 0.17620396614074707, "learning_rate": 1e-05, "loss": 0.5159, "step": 4873 }, { "epoch": 1.3487165294402546, "grad_norm": 0.16716426610946655, "learning_rate": 1e-05, "loss": 0.5025, "step": 4874 }, { "epoch": 1.348993288590604, "grad_norm": 0.17813587188720703, "learning_rate": 1e-05, "loss": 0.5223, "step": 4875 }, { "epoch": 1.3492700477409534, "grad_norm": 0.16648076474666595, "learning_rate": 1e-05, "loss": 0.5291, "step": 4876 }, { "epoch": 1.3495468068913028, "grad_norm": 0.1871509850025177, "learning_rate": 1e-05, "loss": 0.5274, "step": 4877 }, { "epoch": 1.3498235660416522, "grad_norm": 0.1773994266986847, "learning_rate": 1e-05, "loss": 0.4877, "step": 4878 }, { "epoch": 1.3501003251920016, "grad_norm": 0.16922447085380554, "learning_rate": 1e-05, "loss": 0.4818, "step": 4879 }, { "epoch": 1.350377084342351, "grad_norm": 0.16670091450214386, "learning_rate": 1e-05, "loss": 0.5031, "step": 4880 }, { "epoch": 1.3506538434927005, "grad_norm": 0.17248737812042236, "learning_rate": 1e-05, "loss": 0.5283, "step": 4881 }, { "epoch": 1.3509306026430499, "grad_norm": 0.1823015660047531, "learning_rate": 1e-05, "loss": 0.5089, "step": 4882 }, { "epoch": 1.3512073617933993, "grad_norm": 0.1720418781042099, "learning_rate": 1e-05, "loss": 0.5382, "step": 4883 }, { "epoch": 1.3514841209437487, "grad_norm": 0.1735372245311737, "learning_rate": 1e-05, "loss": 0.5397, "step": 4884 }, { "epoch": 1.3517608800940981, "grad_norm": 0.17577730119228363, "learning_rate": 1e-05, "loss": 0.5406, "step": 4885 }, { "epoch": 1.3520376392444475, "grad_norm": 0.1728721559047699, "learning_rate": 1e-05, "loss": 0.517, "step": 4886 }, { "epoch": 1.352314398394797, "grad_norm": 0.17254291474819183, "learning_rate": 1e-05, "loss": 0.5312, "step": 4887 }, { "epoch": 1.3525911575451464, "grad_norm": 0.1720219850540161, "learning_rate": 1e-05, "loss": 0.5189, "step": 4888 }, { "epoch": 1.3528679166954958, "grad_norm": 0.17457520961761475, "learning_rate": 1e-05, "loss": 0.5042, "step": 4889 }, { "epoch": 1.3531446758458452, "grad_norm": 0.17108190059661865, "learning_rate": 1e-05, "loss": 0.5151, "step": 4890 }, { "epoch": 1.3534214349961946, "grad_norm": 0.1754976063966751, "learning_rate": 1e-05, "loss": 0.5035, "step": 4891 }, { "epoch": 1.353698194146544, "grad_norm": 0.16618266701698303, "learning_rate": 1e-05, "loss": 0.522, "step": 4892 }, { "epoch": 1.3539749532968934, "grad_norm": 0.1694563776254654, "learning_rate": 1e-05, "loss": 0.5364, "step": 4893 }, { "epoch": 1.3542517124472429, "grad_norm": 0.18364806473255157, "learning_rate": 1e-05, "loss": 0.5183, "step": 4894 }, { "epoch": 1.3545284715975923, "grad_norm": 0.16794146597385406, "learning_rate": 1e-05, "loss": 0.5176, "step": 4895 }, { "epoch": 1.3548052307479417, "grad_norm": 0.17273764312267303, "learning_rate": 1e-05, "loss": 0.5122, "step": 4896 }, { "epoch": 1.355081989898291, "grad_norm": 0.17595431208610535, "learning_rate": 1e-05, "loss": 0.541, "step": 4897 }, { "epoch": 1.3553587490486403, "grad_norm": 0.17925867438316345, "learning_rate": 1e-05, "loss": 0.5213, "step": 4898 }, { "epoch": 1.35563550819899, "grad_norm": 0.16428440809249878, "learning_rate": 1e-05, "loss": 0.5188, "step": 4899 }, { "epoch": 1.3559122673493391, "grad_norm": 0.17495334148406982, "learning_rate": 1e-05, "loss": 0.5152, "step": 4900 }, { "epoch": 1.3561890264996888, "grad_norm": 0.16458524763584137, "learning_rate": 1e-05, "loss": 0.4901, "step": 4901 }, { "epoch": 1.356465785650038, "grad_norm": 0.1681058555841446, "learning_rate": 1e-05, "loss": 0.4905, "step": 4902 }, { "epoch": 1.3567425448003876, "grad_norm": 0.16965517401695251, "learning_rate": 1e-05, "loss": 0.4617, "step": 4903 }, { "epoch": 1.3570193039507368, "grad_norm": 0.1680353879928589, "learning_rate": 1e-05, "loss": 0.5055, "step": 4904 }, { "epoch": 1.3572960631010864, "grad_norm": 0.17708781361579895, "learning_rate": 1e-05, "loss": 0.5261, "step": 4905 }, { "epoch": 1.3575728222514356, "grad_norm": 0.17825129628181458, "learning_rate": 1e-05, "loss": 0.5035, "step": 4906 }, { "epoch": 1.357849581401785, "grad_norm": 0.17475442588329315, "learning_rate": 1e-05, "loss": 0.5212, "step": 4907 }, { "epoch": 1.3581263405521344, "grad_norm": 0.17409439384937286, "learning_rate": 1e-05, "loss": 0.4981, "step": 4908 }, { "epoch": 1.3584030997024839, "grad_norm": 0.17596636712551117, "learning_rate": 1e-05, "loss": 0.5029, "step": 4909 }, { "epoch": 1.3586798588528333, "grad_norm": 0.17946267127990723, "learning_rate": 1e-05, "loss": 0.5175, "step": 4910 }, { "epoch": 1.3589566180031827, "grad_norm": 0.17427873611450195, "learning_rate": 1e-05, "loss": 0.5146, "step": 4911 }, { "epoch": 1.359233377153532, "grad_norm": 0.16862142086029053, "learning_rate": 1e-05, "loss": 0.484, "step": 4912 }, { "epoch": 1.3595101363038815, "grad_norm": 0.16985133290290833, "learning_rate": 1e-05, "loss": 0.5188, "step": 4913 }, { "epoch": 1.359786895454231, "grad_norm": 0.16927430033683777, "learning_rate": 1e-05, "loss": 0.5158, "step": 4914 }, { "epoch": 1.3600636546045803, "grad_norm": 0.1814134120941162, "learning_rate": 1e-05, "loss": 0.5139, "step": 4915 }, { "epoch": 1.3603404137549298, "grad_norm": 0.1629602164030075, "learning_rate": 1e-05, "loss": 0.5037, "step": 4916 }, { "epoch": 1.3606171729052792, "grad_norm": 0.17988640069961548, "learning_rate": 1e-05, "loss": 0.5034, "step": 4917 }, { "epoch": 1.3608939320556286, "grad_norm": 0.1644207090139389, "learning_rate": 1e-05, "loss": 0.5181, "step": 4918 }, { "epoch": 1.361170691205978, "grad_norm": 0.17936329543590546, "learning_rate": 1e-05, "loss": 0.5135, "step": 4919 }, { "epoch": 1.3614474503563274, "grad_norm": 0.17758075892925262, "learning_rate": 1e-05, "loss": 0.5224, "step": 4920 }, { "epoch": 1.3617242095066768, "grad_norm": 0.17076243460178375, "learning_rate": 1e-05, "loss": 0.5273, "step": 4921 }, { "epoch": 1.3620009686570262, "grad_norm": 0.16385658085346222, "learning_rate": 1e-05, "loss": 0.5022, "step": 4922 }, { "epoch": 1.3622777278073757, "grad_norm": 0.17137563228607178, "learning_rate": 1e-05, "loss": 0.5044, "step": 4923 }, { "epoch": 1.362554486957725, "grad_norm": 0.16754209995269775, "learning_rate": 1e-05, "loss": 0.5158, "step": 4924 }, { "epoch": 1.3628312461080745, "grad_norm": 0.1737937033176422, "learning_rate": 1e-05, "loss": 0.5373, "step": 4925 }, { "epoch": 1.363108005258424, "grad_norm": 0.17897778749465942, "learning_rate": 1e-05, "loss": 0.4924, "step": 4926 }, { "epoch": 1.3633847644087733, "grad_norm": 0.1718917340040207, "learning_rate": 1e-05, "loss": 0.5369, "step": 4927 }, { "epoch": 1.3636615235591227, "grad_norm": 0.1727554202079773, "learning_rate": 1e-05, "loss": 0.5061, "step": 4928 }, { "epoch": 1.3639382827094721, "grad_norm": 0.17345812916755676, "learning_rate": 1e-05, "loss": 0.5157, "step": 4929 }, { "epoch": 1.3642150418598216, "grad_norm": 0.17578858137130737, "learning_rate": 1e-05, "loss": 0.5333, "step": 4930 }, { "epoch": 1.364491801010171, "grad_norm": 0.18099795281887054, "learning_rate": 1e-05, "loss": 0.5192, "step": 4931 }, { "epoch": 1.3647685601605204, "grad_norm": 0.17687155306339264, "learning_rate": 1e-05, "loss": 0.5181, "step": 4932 }, { "epoch": 1.3650453193108696, "grad_norm": 0.17512871325016022, "learning_rate": 1e-05, "loss": 0.5229, "step": 4933 }, { "epoch": 1.3653220784612192, "grad_norm": 0.18337887525558472, "learning_rate": 1e-05, "loss": 0.5404, "step": 4934 }, { "epoch": 1.3655988376115684, "grad_norm": 0.1747918576002121, "learning_rate": 1e-05, "loss": 0.5459, "step": 4935 }, { "epoch": 1.365875596761918, "grad_norm": 0.17607614398002625, "learning_rate": 1e-05, "loss": 0.5183, "step": 4936 }, { "epoch": 1.3661523559122672, "grad_norm": 0.17993614077568054, "learning_rate": 1e-05, "loss": 0.4688, "step": 4937 }, { "epoch": 1.3664291150626169, "grad_norm": 0.1698361486196518, "learning_rate": 1e-05, "loss": 0.4915, "step": 4938 }, { "epoch": 1.366705874212966, "grad_norm": 0.173442542552948, "learning_rate": 1e-05, "loss": 0.5056, "step": 4939 }, { "epoch": 1.3669826333633157, "grad_norm": 0.17304202914237976, "learning_rate": 1e-05, "loss": 0.4925, "step": 4940 }, { "epoch": 1.367259392513665, "grad_norm": 0.17288249731063843, "learning_rate": 1e-05, "loss": 0.5254, "step": 4941 }, { "epoch": 1.3675361516640143, "grad_norm": 0.17282579839229584, "learning_rate": 1e-05, "loss": 0.4975, "step": 4942 }, { "epoch": 1.3678129108143637, "grad_norm": 0.18236806988716125, "learning_rate": 1e-05, "loss": 0.5404, "step": 4943 }, { "epoch": 1.3680896699647132, "grad_norm": 0.17273853719234467, "learning_rate": 1e-05, "loss": 0.475, "step": 4944 }, { "epoch": 1.3683664291150626, "grad_norm": 0.15939995646476746, "learning_rate": 1e-05, "loss": 0.521, "step": 4945 }, { "epoch": 1.368643188265412, "grad_norm": 0.16381248831748962, "learning_rate": 1e-05, "loss": 0.4897, "step": 4946 }, { "epoch": 1.3689199474157614, "grad_norm": 0.16902688145637512, "learning_rate": 1e-05, "loss": 0.5134, "step": 4947 }, { "epoch": 1.3691967065661108, "grad_norm": 0.16807058453559875, "learning_rate": 1e-05, "loss": 0.5255, "step": 4948 }, { "epoch": 1.3694734657164602, "grad_norm": 0.17257548868656158, "learning_rate": 1e-05, "loss": 0.4709, "step": 4949 }, { "epoch": 1.3697502248668096, "grad_norm": 0.16953544318675995, "learning_rate": 1e-05, "loss": 0.5168, "step": 4950 }, { "epoch": 1.370026984017159, "grad_norm": 0.17008447647094727, "learning_rate": 1e-05, "loss": 0.4947, "step": 4951 }, { "epoch": 1.3703037431675085, "grad_norm": 0.17229345440864563, "learning_rate": 1e-05, "loss": 0.5119, "step": 4952 }, { "epoch": 1.3705805023178579, "grad_norm": 0.16957645118236542, "learning_rate": 1e-05, "loss": 0.5111, "step": 4953 }, { "epoch": 1.3708572614682073, "grad_norm": 0.17459626495838165, "learning_rate": 1e-05, "loss": 0.5067, "step": 4954 }, { "epoch": 1.3711340206185567, "grad_norm": 0.1679563969373703, "learning_rate": 1e-05, "loss": 0.5115, "step": 4955 }, { "epoch": 1.3714107797689061, "grad_norm": 0.17986606061458588, "learning_rate": 1e-05, "loss": 0.5264, "step": 4956 }, { "epoch": 1.3716875389192555, "grad_norm": 0.16910827159881592, "learning_rate": 1e-05, "loss": 0.5439, "step": 4957 }, { "epoch": 1.371964298069605, "grad_norm": 0.1679869294166565, "learning_rate": 1e-05, "loss": 0.5213, "step": 4958 }, { "epoch": 1.3722410572199544, "grad_norm": 0.16844604909420013, "learning_rate": 1e-05, "loss": 0.5031, "step": 4959 }, { "epoch": 1.3725178163703038, "grad_norm": 0.17815928161144257, "learning_rate": 1e-05, "loss": 0.5105, "step": 4960 }, { "epoch": 1.3727945755206532, "grad_norm": 0.1699095517396927, "learning_rate": 1e-05, "loss": 0.5177, "step": 4961 }, { "epoch": 1.3730713346710026, "grad_norm": 0.1753484159708023, "learning_rate": 1e-05, "loss": 0.5175, "step": 4962 }, { "epoch": 1.373348093821352, "grad_norm": 0.17204663157463074, "learning_rate": 1e-05, "loss": 0.5156, "step": 4963 }, { "epoch": 1.3736248529717014, "grad_norm": 0.17324505746364594, "learning_rate": 1e-05, "loss": 0.5122, "step": 4964 }, { "epoch": 1.3739016121220509, "grad_norm": 0.17231236398220062, "learning_rate": 1e-05, "loss": 0.4812, "step": 4965 }, { "epoch": 1.3741783712724, "grad_norm": 0.17633497714996338, "learning_rate": 1e-05, "loss": 0.503, "step": 4966 }, { "epoch": 1.3744551304227497, "grad_norm": 0.16446001827716827, "learning_rate": 1e-05, "loss": 0.496, "step": 4967 }, { "epoch": 1.3747318895730989, "grad_norm": 0.17584779858589172, "learning_rate": 1e-05, "loss": 0.4936, "step": 4968 }, { "epoch": 1.3750086487234485, "grad_norm": 0.16834527254104614, "learning_rate": 1e-05, "loss": 0.4764, "step": 4969 }, { "epoch": 1.3752854078737977, "grad_norm": 0.17197799682617188, "learning_rate": 1e-05, "loss": 0.5256, "step": 4970 }, { "epoch": 1.3755621670241474, "grad_norm": 0.16657337546348572, "learning_rate": 1e-05, "loss": 0.466, "step": 4971 }, { "epoch": 1.3758389261744965, "grad_norm": 0.16865594685077667, "learning_rate": 1e-05, "loss": 0.4732, "step": 4972 }, { "epoch": 1.3761156853248462, "grad_norm": 0.16984203457832336, "learning_rate": 1e-05, "loss": 0.5239, "step": 4973 }, { "epoch": 1.3763924444751954, "grad_norm": 0.181301087141037, "learning_rate": 1e-05, "loss": 0.5573, "step": 4974 }, { "epoch": 1.3766692036255448, "grad_norm": 0.1752910166978836, "learning_rate": 1e-05, "loss": 0.505, "step": 4975 }, { "epoch": 1.3769459627758942, "grad_norm": 0.16768914461135864, "learning_rate": 1e-05, "loss": 0.5052, "step": 4976 }, { "epoch": 1.3772227219262436, "grad_norm": 0.17330987751483917, "learning_rate": 1e-05, "loss": 0.4874, "step": 4977 }, { "epoch": 1.377499481076593, "grad_norm": 0.16536960005760193, "learning_rate": 1e-05, "loss": 0.5024, "step": 4978 }, { "epoch": 1.3777762402269425, "grad_norm": 0.16870713233947754, "learning_rate": 1e-05, "loss": 0.4848, "step": 4979 }, { "epoch": 1.3780529993772919, "grad_norm": 0.17118315398693085, "learning_rate": 1e-05, "loss": 0.5317, "step": 4980 }, { "epoch": 1.3783297585276413, "grad_norm": 0.17117489874362946, "learning_rate": 1e-05, "loss": 0.4728, "step": 4981 }, { "epoch": 1.3786065176779907, "grad_norm": 0.1656055450439453, "learning_rate": 1e-05, "loss": 0.5028, "step": 4982 }, { "epoch": 1.37888327682834, "grad_norm": 0.17034950852394104, "learning_rate": 1e-05, "loss": 0.5133, "step": 4983 }, { "epoch": 1.3791600359786895, "grad_norm": 0.18222682178020477, "learning_rate": 1e-05, "loss": 0.4905, "step": 4984 }, { "epoch": 1.379436795129039, "grad_norm": 0.17165571451187134, "learning_rate": 1e-05, "loss": 0.4975, "step": 4985 }, { "epoch": 1.3797135542793884, "grad_norm": 0.16102822124958038, "learning_rate": 1e-05, "loss": 0.5303, "step": 4986 }, { "epoch": 1.3799903134297378, "grad_norm": 0.17077641189098358, "learning_rate": 1e-05, "loss": 0.5077, "step": 4987 }, { "epoch": 1.3802670725800872, "grad_norm": 0.17787466943264008, "learning_rate": 1e-05, "loss": 0.5413, "step": 4988 }, { "epoch": 1.3805438317304366, "grad_norm": 0.16619352996349335, "learning_rate": 1e-05, "loss": 0.5052, "step": 4989 }, { "epoch": 1.380820590880786, "grad_norm": 0.1685626208782196, "learning_rate": 1e-05, "loss": 0.5047, "step": 4990 }, { "epoch": 1.3810973500311354, "grad_norm": 0.17309455573558807, "learning_rate": 1e-05, "loss": 0.5117, "step": 4991 }, { "epoch": 1.3813741091814848, "grad_norm": 0.16801291704177856, "learning_rate": 1e-05, "loss": 0.5083, "step": 4992 }, { "epoch": 1.3816508683318343, "grad_norm": 0.17231954634189606, "learning_rate": 1e-05, "loss": 0.5255, "step": 4993 }, { "epoch": 1.3819276274821837, "grad_norm": 0.170889213681221, "learning_rate": 1e-05, "loss": 0.5178, "step": 4994 }, { "epoch": 1.382204386632533, "grad_norm": 0.17418302595615387, "learning_rate": 1e-05, "loss": 0.5349, "step": 4995 }, { "epoch": 1.3824811457828825, "grad_norm": 0.1761275827884674, "learning_rate": 1e-05, "loss": 0.5375, "step": 4996 }, { "epoch": 1.382757904933232, "grad_norm": 0.1685614436864853, "learning_rate": 1e-05, "loss": 0.4747, "step": 4997 }, { "epoch": 1.3830346640835813, "grad_norm": 0.16578984260559082, "learning_rate": 1e-05, "loss": 0.5131, "step": 4998 }, { "epoch": 1.3833114232339307, "grad_norm": 0.1763755828142166, "learning_rate": 1e-05, "loss": 0.5589, "step": 4999 }, { "epoch": 1.3835881823842802, "grad_norm": 0.1836429387331009, "learning_rate": 1e-05, "loss": 0.5295, "step": 5000 }, { "epoch": 1.3838649415346294, "grad_norm": 0.17831476032733917, "learning_rate": 1e-05, "loss": 0.4979, "step": 5001 }, { "epoch": 1.384141700684979, "grad_norm": 0.16536158323287964, "learning_rate": 1e-05, "loss": 0.5169, "step": 5002 }, { "epoch": 1.3844184598353282, "grad_norm": 0.17533034086227417, "learning_rate": 1e-05, "loss": 0.5139, "step": 5003 }, { "epoch": 1.3846952189856778, "grad_norm": 0.17751818895339966, "learning_rate": 1e-05, "loss": 0.513, "step": 5004 }, { "epoch": 1.384971978136027, "grad_norm": 0.1749873161315918, "learning_rate": 1e-05, "loss": 0.5027, "step": 5005 }, { "epoch": 1.3852487372863767, "grad_norm": 0.1677870750427246, "learning_rate": 1e-05, "loss": 0.5065, "step": 5006 }, { "epoch": 1.3855254964367258, "grad_norm": 0.170377716422081, "learning_rate": 1e-05, "loss": 0.5073, "step": 5007 }, { "epoch": 1.3858022555870755, "grad_norm": 0.17776957154273987, "learning_rate": 1e-05, "loss": 0.5309, "step": 5008 }, { "epoch": 1.3860790147374247, "grad_norm": 0.16936103999614716, "learning_rate": 1e-05, "loss": 0.5156, "step": 5009 }, { "epoch": 1.386355773887774, "grad_norm": 0.16277265548706055, "learning_rate": 1e-05, "loss": 0.5383, "step": 5010 }, { "epoch": 1.3866325330381235, "grad_norm": 0.17232145369052887, "learning_rate": 1e-05, "loss": 0.5032, "step": 5011 }, { "epoch": 1.386909292188473, "grad_norm": 0.16953912377357483, "learning_rate": 1e-05, "loss": 0.5081, "step": 5012 }, { "epoch": 1.3871860513388223, "grad_norm": 0.16451746225357056, "learning_rate": 1e-05, "loss": 0.4927, "step": 5013 }, { "epoch": 1.3874628104891717, "grad_norm": 0.17116588354110718, "learning_rate": 1e-05, "loss": 0.52, "step": 5014 }, { "epoch": 1.3877395696395212, "grad_norm": 0.17100270092487335, "learning_rate": 1e-05, "loss": 0.4939, "step": 5015 }, { "epoch": 1.3880163287898706, "grad_norm": 0.16684211790561676, "learning_rate": 1e-05, "loss": 0.552, "step": 5016 }, { "epoch": 1.38829308794022, "grad_norm": 0.1769651174545288, "learning_rate": 1e-05, "loss": 0.523, "step": 5017 }, { "epoch": 1.3885698470905694, "grad_norm": 0.17715676128864288, "learning_rate": 1e-05, "loss": 0.5147, "step": 5018 }, { "epoch": 1.3888466062409188, "grad_norm": 0.16457293927669525, "learning_rate": 1e-05, "loss": 0.4894, "step": 5019 }, { "epoch": 1.3891233653912682, "grad_norm": 0.1662992238998413, "learning_rate": 1e-05, "loss": 0.4726, "step": 5020 }, { "epoch": 1.3894001245416177, "grad_norm": 0.1655515879392624, "learning_rate": 1e-05, "loss": 0.532, "step": 5021 }, { "epoch": 1.389676883691967, "grad_norm": 0.16579893231391907, "learning_rate": 1e-05, "loss": 0.5227, "step": 5022 }, { "epoch": 1.3899536428423165, "grad_norm": 0.17291350662708282, "learning_rate": 1e-05, "loss": 0.5345, "step": 5023 }, { "epoch": 1.390230401992666, "grad_norm": 0.1694248765707016, "learning_rate": 1e-05, "loss": 0.5006, "step": 5024 }, { "epoch": 1.3905071611430153, "grad_norm": 0.17172116041183472, "learning_rate": 1e-05, "loss": 0.5293, "step": 5025 }, { "epoch": 1.3907839202933647, "grad_norm": 0.16335979104042053, "learning_rate": 1e-05, "loss": 0.4706, "step": 5026 }, { "epoch": 1.3910606794437141, "grad_norm": 0.17385275661945343, "learning_rate": 1e-05, "loss": 0.5298, "step": 5027 }, { "epoch": 1.3913374385940636, "grad_norm": 0.16982759535312653, "learning_rate": 1e-05, "loss": 0.5058, "step": 5028 }, { "epoch": 1.391614197744413, "grad_norm": 0.16428416967391968, "learning_rate": 1e-05, "loss": 0.4952, "step": 5029 }, { "epoch": 1.3918909568947624, "grad_norm": 0.16895031929016113, "learning_rate": 1e-05, "loss": 0.5421, "step": 5030 }, { "epoch": 1.3921677160451118, "grad_norm": 0.17294941842556, "learning_rate": 1e-05, "loss": 0.5177, "step": 5031 }, { "epoch": 1.3924444751954612, "grad_norm": 0.16466346383094788, "learning_rate": 1e-05, "loss": 0.5, "step": 5032 }, { "epoch": 1.3927212343458106, "grad_norm": 0.17364181578159332, "learning_rate": 1e-05, "loss": 0.5, "step": 5033 }, { "epoch": 1.39299799349616, "grad_norm": 0.1685313731431961, "learning_rate": 1e-05, "loss": 0.5181, "step": 5034 }, { "epoch": 1.3932747526465095, "grad_norm": 0.17664267122745514, "learning_rate": 1e-05, "loss": 0.5107, "step": 5035 }, { "epoch": 1.3935515117968587, "grad_norm": 0.17337805032730103, "learning_rate": 1e-05, "loss": 0.4987, "step": 5036 }, { "epoch": 1.3938282709472083, "grad_norm": 0.17234288156032562, "learning_rate": 1e-05, "loss": 0.5158, "step": 5037 }, { "epoch": 1.3941050300975575, "grad_norm": 0.17066547274589539, "learning_rate": 1e-05, "loss": 0.531, "step": 5038 }, { "epoch": 1.3943817892479071, "grad_norm": 0.17628628015518188, "learning_rate": 1e-05, "loss": 0.5179, "step": 5039 }, { "epoch": 1.3946585483982563, "grad_norm": 0.18437279760837555, "learning_rate": 1e-05, "loss": 0.5237, "step": 5040 }, { "epoch": 1.394935307548606, "grad_norm": 0.16861389577388763, "learning_rate": 1e-05, "loss": 0.5119, "step": 5041 }, { "epoch": 1.3952120666989551, "grad_norm": 0.1730859875679016, "learning_rate": 1e-05, "loss": 0.5063, "step": 5042 }, { "epoch": 1.3954888258493046, "grad_norm": 0.18236693739891052, "learning_rate": 1e-05, "loss": 0.5188, "step": 5043 }, { "epoch": 1.395765584999654, "grad_norm": 0.17288541793823242, "learning_rate": 1e-05, "loss": 0.525, "step": 5044 }, { "epoch": 1.3960423441500034, "grad_norm": 0.17914634943008423, "learning_rate": 1e-05, "loss": 0.5066, "step": 5045 }, { "epoch": 1.3963191033003528, "grad_norm": 0.17016077041625977, "learning_rate": 1e-05, "loss": 0.5113, "step": 5046 }, { "epoch": 1.3965958624507022, "grad_norm": 0.1710539609193802, "learning_rate": 1e-05, "loss": 0.509, "step": 5047 }, { "epoch": 1.3968726216010516, "grad_norm": 0.17247572541236877, "learning_rate": 1e-05, "loss": 0.4978, "step": 5048 }, { "epoch": 1.397149380751401, "grad_norm": 0.179972842335701, "learning_rate": 1e-05, "loss": 0.5257, "step": 5049 }, { "epoch": 1.3974261399017505, "grad_norm": 0.17266683280467987, "learning_rate": 1e-05, "loss": 0.4937, "step": 5050 }, { "epoch": 1.3977028990520999, "grad_norm": 0.17122399806976318, "learning_rate": 1e-05, "loss": 0.4836, "step": 5051 }, { "epoch": 1.3979796582024493, "grad_norm": 0.1795734167098999, "learning_rate": 1e-05, "loss": 0.5062, "step": 5052 }, { "epoch": 1.3982564173527987, "grad_norm": 0.17656981945037842, "learning_rate": 1e-05, "loss": 0.5069, "step": 5053 }, { "epoch": 1.3985331765031481, "grad_norm": 0.164387509226799, "learning_rate": 1e-05, "loss": 0.512, "step": 5054 }, { "epoch": 1.3988099356534975, "grad_norm": 0.1729288250207901, "learning_rate": 1e-05, "loss": 0.5219, "step": 5055 }, { "epoch": 1.399086694803847, "grad_norm": 0.1706847995519638, "learning_rate": 1e-05, "loss": 0.4973, "step": 5056 }, { "epoch": 1.3993634539541964, "grad_norm": 0.16694962978363037, "learning_rate": 1e-05, "loss": 0.536, "step": 5057 }, { "epoch": 1.3996402131045458, "grad_norm": 0.1688452661037445, "learning_rate": 1e-05, "loss": 0.5031, "step": 5058 }, { "epoch": 1.3999169722548952, "grad_norm": 0.17485547065734863, "learning_rate": 1e-05, "loss": 0.5061, "step": 5059 }, { "epoch": 1.4001937314052446, "grad_norm": 0.1751842200756073, "learning_rate": 1e-05, "loss": 0.5008, "step": 5060 }, { "epoch": 1.400470490555594, "grad_norm": 0.16989479959011078, "learning_rate": 1e-05, "loss": 0.528, "step": 5061 }, { "epoch": 1.4007472497059434, "grad_norm": 0.1666814088821411, "learning_rate": 1e-05, "loss": 0.5307, "step": 5062 }, { "epoch": 1.4010240088562929, "grad_norm": 0.16763392090797424, "learning_rate": 1e-05, "loss": 0.5167, "step": 5063 }, { "epoch": 1.4013007680066423, "grad_norm": 0.17006537318229675, "learning_rate": 1e-05, "loss": 0.5237, "step": 5064 }, { "epoch": 1.4015775271569917, "grad_norm": 0.17813026905059814, "learning_rate": 1e-05, "loss": 0.5088, "step": 5065 }, { "epoch": 1.401854286307341, "grad_norm": 0.17858856916427612, "learning_rate": 1e-05, "loss": 0.5013, "step": 5066 }, { "epoch": 1.4021310454576905, "grad_norm": 0.1745169311761856, "learning_rate": 1e-05, "loss": 0.4952, "step": 5067 }, { "epoch": 1.40240780460804, "grad_norm": 0.16994695365428925, "learning_rate": 1e-05, "loss": 0.5317, "step": 5068 }, { "epoch": 1.4026845637583891, "grad_norm": 0.17568525671958923, "learning_rate": 1e-05, "loss": 0.5124, "step": 5069 }, { "epoch": 1.4029613229087388, "grad_norm": 0.1735432744026184, "learning_rate": 1e-05, "loss": 0.5415, "step": 5070 }, { "epoch": 1.403238082059088, "grad_norm": 0.18026743829250336, "learning_rate": 1e-05, "loss": 0.5027, "step": 5071 }, { "epoch": 1.4035148412094376, "grad_norm": 0.16683229804039001, "learning_rate": 1e-05, "loss": 0.5315, "step": 5072 }, { "epoch": 1.4037916003597868, "grad_norm": 0.16880030930042267, "learning_rate": 1e-05, "loss": 0.4978, "step": 5073 }, { "epoch": 1.4040683595101364, "grad_norm": 0.1674952358007431, "learning_rate": 1e-05, "loss": 0.526, "step": 5074 }, { "epoch": 1.4043451186604856, "grad_norm": 0.16549818217754364, "learning_rate": 1e-05, "loss": 0.5087, "step": 5075 }, { "epoch": 1.4046218778108353, "grad_norm": 0.16980810463428497, "learning_rate": 1e-05, "loss": 0.4915, "step": 5076 }, { "epoch": 1.4048986369611844, "grad_norm": 0.16654232144355774, "learning_rate": 1e-05, "loss": 0.531, "step": 5077 }, { "epoch": 1.4051753961115339, "grad_norm": 0.1726519763469696, "learning_rate": 1e-05, "loss": 0.5011, "step": 5078 }, { "epoch": 1.4054521552618833, "grad_norm": 0.1830408275127411, "learning_rate": 1e-05, "loss": 0.5113, "step": 5079 }, { "epoch": 1.4057289144122327, "grad_norm": 0.17404648661613464, "learning_rate": 1e-05, "loss": 0.4916, "step": 5080 }, { "epoch": 1.406005673562582, "grad_norm": 0.1759020835161209, "learning_rate": 1e-05, "loss": 0.5231, "step": 5081 }, { "epoch": 1.4062824327129315, "grad_norm": 0.1673096865415573, "learning_rate": 1e-05, "loss": 0.5057, "step": 5082 }, { "epoch": 1.406559191863281, "grad_norm": 0.1677807718515396, "learning_rate": 1e-05, "loss": 0.5137, "step": 5083 }, { "epoch": 1.4068359510136303, "grad_norm": 0.1720089465379715, "learning_rate": 1e-05, "loss": 0.4891, "step": 5084 }, { "epoch": 1.4071127101639798, "grad_norm": 0.168177530169487, "learning_rate": 1e-05, "loss": 0.4848, "step": 5085 }, { "epoch": 1.4073894693143292, "grad_norm": 0.1691683828830719, "learning_rate": 1e-05, "loss": 0.5108, "step": 5086 }, { "epoch": 1.4076662284646786, "grad_norm": 0.1689532995223999, "learning_rate": 1e-05, "loss": 0.5024, "step": 5087 }, { "epoch": 1.407942987615028, "grad_norm": 0.17100124061107635, "learning_rate": 1e-05, "loss": 0.502, "step": 5088 }, { "epoch": 1.4082197467653774, "grad_norm": 0.17464938759803772, "learning_rate": 1e-05, "loss": 0.5117, "step": 5089 }, { "epoch": 1.4084965059157268, "grad_norm": 0.17114543914794922, "learning_rate": 1e-05, "loss": 0.4994, "step": 5090 }, { "epoch": 1.4087732650660763, "grad_norm": 0.16608648002147675, "learning_rate": 1e-05, "loss": 0.5325, "step": 5091 }, { "epoch": 1.4090500242164257, "grad_norm": 0.16960369050502777, "learning_rate": 1e-05, "loss": 0.507, "step": 5092 }, { "epoch": 1.409326783366775, "grad_norm": 0.16681793332099915, "learning_rate": 1e-05, "loss": 0.497, "step": 5093 }, { "epoch": 1.4096035425171245, "grad_norm": 0.1708766222000122, "learning_rate": 1e-05, "loss": 0.5205, "step": 5094 }, { "epoch": 1.409880301667474, "grad_norm": 0.16952116787433624, "learning_rate": 1e-05, "loss": 0.4884, "step": 5095 }, { "epoch": 1.4101570608178233, "grad_norm": 0.17383864521980286, "learning_rate": 1e-05, "loss": 0.523, "step": 5096 }, { "epoch": 1.4104338199681727, "grad_norm": 0.1677718311548233, "learning_rate": 1e-05, "loss": 0.4928, "step": 5097 }, { "epoch": 1.4107105791185222, "grad_norm": 0.16543254256248474, "learning_rate": 1e-05, "loss": 0.4955, "step": 5098 }, { "epoch": 1.4109873382688716, "grad_norm": 0.17124801874160767, "learning_rate": 1e-05, "loss": 0.5211, "step": 5099 }, { "epoch": 1.411264097419221, "grad_norm": 0.17645268142223358, "learning_rate": 1e-05, "loss": 0.5111, "step": 5100 }, { "epoch": 1.4115408565695704, "grad_norm": 0.17213594913482666, "learning_rate": 1e-05, "loss": 0.4865, "step": 5101 }, { "epoch": 1.4118176157199198, "grad_norm": 0.17476052045822144, "learning_rate": 1e-05, "loss": 0.5096, "step": 5102 }, { "epoch": 1.4120943748702692, "grad_norm": 0.1724434196949005, "learning_rate": 1e-05, "loss": 0.5178, "step": 5103 }, { "epoch": 1.4123711340206184, "grad_norm": 0.18233825266361237, "learning_rate": 1e-05, "loss": 0.5145, "step": 5104 }, { "epoch": 1.412647893170968, "grad_norm": 0.17254376411437988, "learning_rate": 1e-05, "loss": 0.4864, "step": 5105 }, { "epoch": 1.4129246523213173, "grad_norm": 0.17022769153118134, "learning_rate": 1e-05, "loss": 0.5104, "step": 5106 }, { "epoch": 1.413201411471667, "grad_norm": 0.1642652451992035, "learning_rate": 1e-05, "loss": 0.5144, "step": 5107 }, { "epoch": 1.413478170622016, "grad_norm": 0.17520059645175934, "learning_rate": 1e-05, "loss": 0.5, "step": 5108 }, { "epoch": 1.4137549297723657, "grad_norm": 0.16643063724040985, "learning_rate": 1e-05, "loss": 0.5103, "step": 5109 }, { "epoch": 1.414031688922715, "grad_norm": 0.17584076523780823, "learning_rate": 1e-05, "loss": 0.5215, "step": 5110 }, { "epoch": 1.4143084480730646, "grad_norm": 0.18059715628623962, "learning_rate": 1e-05, "loss": 0.5287, "step": 5111 }, { "epoch": 1.4145852072234137, "grad_norm": 0.16798701882362366, "learning_rate": 1e-05, "loss": 0.502, "step": 5112 }, { "epoch": 1.4148619663737632, "grad_norm": 0.16749374568462372, "learning_rate": 1e-05, "loss": 0.5376, "step": 5113 }, { "epoch": 1.4151387255241126, "grad_norm": 0.1767723113298416, "learning_rate": 1e-05, "loss": 0.4875, "step": 5114 }, { "epoch": 1.415415484674462, "grad_norm": 0.17279787361621857, "learning_rate": 1e-05, "loss": 0.4769, "step": 5115 }, { "epoch": 1.4156922438248114, "grad_norm": 0.17280015349388123, "learning_rate": 1e-05, "loss": 0.536, "step": 5116 }, { "epoch": 1.4159690029751608, "grad_norm": 0.1669088453054428, "learning_rate": 1e-05, "loss": 0.5002, "step": 5117 }, { "epoch": 1.4162457621255102, "grad_norm": 0.16881194710731506, "learning_rate": 1e-05, "loss": 0.5081, "step": 5118 }, { "epoch": 1.4165225212758596, "grad_norm": 0.16570840775966644, "learning_rate": 1e-05, "loss": 0.5041, "step": 5119 }, { "epoch": 1.416799280426209, "grad_norm": 0.1689225435256958, "learning_rate": 1e-05, "loss": 0.4752, "step": 5120 }, { "epoch": 1.4170760395765585, "grad_norm": 0.17076602578163147, "learning_rate": 1e-05, "loss": 0.5121, "step": 5121 }, { "epoch": 1.417352798726908, "grad_norm": 0.17375150322914124, "learning_rate": 1e-05, "loss": 0.5376, "step": 5122 }, { "epoch": 1.4176295578772573, "grad_norm": 0.1688227653503418, "learning_rate": 1e-05, "loss": 0.482, "step": 5123 }, { "epoch": 1.4179063170276067, "grad_norm": 0.16973716020584106, "learning_rate": 1e-05, "loss": 0.4596, "step": 5124 }, { "epoch": 1.4181830761779561, "grad_norm": 0.1697857826948166, "learning_rate": 1e-05, "loss": 0.5076, "step": 5125 }, { "epoch": 1.4184598353283056, "grad_norm": 0.17718715965747833, "learning_rate": 1e-05, "loss": 0.5237, "step": 5126 }, { "epoch": 1.418736594478655, "grad_norm": 0.17417655885219574, "learning_rate": 1e-05, "loss": 0.4922, "step": 5127 }, { "epoch": 1.4190133536290044, "grad_norm": 0.16786900162696838, "learning_rate": 1e-05, "loss": 0.504, "step": 5128 }, { "epoch": 1.4192901127793538, "grad_norm": 0.16762250661849976, "learning_rate": 1e-05, "loss": 0.4831, "step": 5129 }, { "epoch": 1.4195668719297032, "grad_norm": 0.1680264174938202, "learning_rate": 1e-05, "loss": 0.5132, "step": 5130 }, { "epoch": 1.4198436310800526, "grad_norm": 0.1722099930047989, "learning_rate": 1e-05, "loss": 0.5156, "step": 5131 }, { "epoch": 1.420120390230402, "grad_norm": 0.17062869668006897, "learning_rate": 1e-05, "loss": 0.512, "step": 5132 }, { "epoch": 1.4203971493807515, "grad_norm": 0.16773198544979095, "learning_rate": 1e-05, "loss": 0.4985, "step": 5133 }, { "epoch": 1.4206739085311009, "grad_norm": 0.16791489720344543, "learning_rate": 1e-05, "loss": 0.4987, "step": 5134 }, { "epoch": 1.4209506676814503, "grad_norm": 0.1737375557422638, "learning_rate": 1e-05, "loss": 0.5224, "step": 5135 }, { "epoch": 1.4212274268317997, "grad_norm": 0.17255309224128723, "learning_rate": 1e-05, "loss": 0.5182, "step": 5136 }, { "epoch": 1.4215041859821491, "grad_norm": 0.16922029852867126, "learning_rate": 1e-05, "loss": 0.5022, "step": 5137 }, { "epoch": 1.4217809451324985, "grad_norm": 0.17019017040729523, "learning_rate": 1e-05, "loss": 0.5146, "step": 5138 }, { "epoch": 1.4220577042828477, "grad_norm": 0.17627979815006256, "learning_rate": 1e-05, "loss": 0.4839, "step": 5139 }, { "epoch": 1.4223344634331974, "grad_norm": 0.1731594353914261, "learning_rate": 1e-05, "loss": 0.5218, "step": 5140 }, { "epoch": 1.4226112225835466, "grad_norm": 0.17664024233818054, "learning_rate": 1e-05, "loss": 0.5051, "step": 5141 }, { "epoch": 1.4228879817338962, "grad_norm": 0.16639649868011475, "learning_rate": 1e-05, "loss": 0.5098, "step": 5142 }, { "epoch": 1.4231647408842454, "grad_norm": 0.18534547090530396, "learning_rate": 1e-05, "loss": 0.4947, "step": 5143 }, { "epoch": 1.423441500034595, "grad_norm": 0.16485215723514557, "learning_rate": 1e-05, "loss": 0.5017, "step": 5144 }, { "epoch": 1.4237182591849442, "grad_norm": 0.1836279183626175, "learning_rate": 1e-05, "loss": 0.503, "step": 5145 }, { "epoch": 1.4239950183352936, "grad_norm": 0.17465580999851227, "learning_rate": 1e-05, "loss": 0.4879, "step": 5146 }, { "epoch": 1.424271777485643, "grad_norm": 0.173024982213974, "learning_rate": 1e-05, "loss": 0.5097, "step": 5147 }, { "epoch": 1.4245485366359925, "grad_norm": 0.16777129471302032, "learning_rate": 1e-05, "loss": 0.5052, "step": 5148 }, { "epoch": 1.4248252957863419, "grad_norm": 0.16954590380191803, "learning_rate": 1e-05, "loss": 0.5233, "step": 5149 }, { "epoch": 1.4251020549366913, "grad_norm": 0.1774609386920929, "learning_rate": 1e-05, "loss": 0.5084, "step": 5150 }, { "epoch": 1.4253788140870407, "grad_norm": 0.17064176499843597, "learning_rate": 1e-05, "loss": 0.4983, "step": 5151 }, { "epoch": 1.4256555732373901, "grad_norm": 0.1690301150083542, "learning_rate": 1e-05, "loss": 0.5218, "step": 5152 }, { "epoch": 1.4259323323877395, "grad_norm": 0.17366887629032135, "learning_rate": 1e-05, "loss": 0.5359, "step": 5153 }, { "epoch": 1.426209091538089, "grad_norm": 0.17475661635398865, "learning_rate": 1e-05, "loss": 0.4951, "step": 5154 }, { "epoch": 1.4264858506884384, "grad_norm": 0.16703951358795166, "learning_rate": 1e-05, "loss": 0.5072, "step": 5155 }, { "epoch": 1.4267626098387878, "grad_norm": 0.1873999536037445, "learning_rate": 1e-05, "loss": 0.512, "step": 5156 }, { "epoch": 1.4270393689891372, "grad_norm": 0.16968388855457306, "learning_rate": 1e-05, "loss": 0.51, "step": 5157 }, { "epoch": 1.4273161281394866, "grad_norm": 0.17141415178775787, "learning_rate": 1e-05, "loss": 0.5096, "step": 5158 }, { "epoch": 1.427592887289836, "grad_norm": 0.18389075994491577, "learning_rate": 1e-05, "loss": 0.5182, "step": 5159 }, { "epoch": 1.4278696464401854, "grad_norm": 0.17101429402828217, "learning_rate": 1e-05, "loss": 0.4949, "step": 5160 }, { "epoch": 1.4281464055905349, "grad_norm": 0.17339500784873962, "learning_rate": 1e-05, "loss": 0.5121, "step": 5161 }, { "epoch": 1.4284231647408843, "grad_norm": 0.17498981952667236, "learning_rate": 1e-05, "loss": 0.5306, "step": 5162 }, { "epoch": 1.4286999238912337, "grad_norm": 0.17259852588176727, "learning_rate": 1e-05, "loss": 0.539, "step": 5163 }, { "epoch": 1.428976683041583, "grad_norm": 0.1795351505279541, "learning_rate": 1e-05, "loss": 0.5163, "step": 5164 }, { "epoch": 1.4292534421919325, "grad_norm": 0.17422696948051453, "learning_rate": 1e-05, "loss": 0.5231, "step": 5165 }, { "epoch": 1.429530201342282, "grad_norm": 0.1649637371301651, "learning_rate": 1e-05, "loss": 0.5026, "step": 5166 }, { "epoch": 1.4298069604926313, "grad_norm": 0.18533110618591309, "learning_rate": 1e-05, "loss": 0.5121, "step": 5167 }, { "epoch": 1.4300837196429808, "grad_norm": 0.16692084074020386, "learning_rate": 1e-05, "loss": 0.4934, "step": 5168 }, { "epoch": 1.4303604787933302, "grad_norm": 0.16439779102802277, "learning_rate": 1e-05, "loss": 0.497, "step": 5169 }, { "epoch": 1.4306372379436796, "grad_norm": 0.17180785536766052, "learning_rate": 1e-05, "loss": 0.4839, "step": 5170 }, { "epoch": 1.430913997094029, "grad_norm": 0.1683887392282486, "learning_rate": 1e-05, "loss": 0.5115, "step": 5171 }, { "epoch": 1.4311907562443782, "grad_norm": 0.17027713358402252, "learning_rate": 1e-05, "loss": 0.5313, "step": 5172 }, { "epoch": 1.4314675153947278, "grad_norm": 0.18352185189723969, "learning_rate": 1e-05, "loss": 0.5273, "step": 5173 }, { "epoch": 1.431744274545077, "grad_norm": 0.17186294496059418, "learning_rate": 1e-05, "loss": 0.5063, "step": 5174 }, { "epoch": 1.4320210336954267, "grad_norm": 0.17705489695072174, "learning_rate": 1e-05, "loss": 0.5021, "step": 5175 }, { "epoch": 1.4322977928457759, "grad_norm": 0.16368462145328522, "learning_rate": 1e-05, "loss": 0.4926, "step": 5176 }, { "epoch": 1.4325745519961255, "grad_norm": 0.16939634084701538, "learning_rate": 1e-05, "loss": 0.5341, "step": 5177 }, { "epoch": 1.4328513111464747, "grad_norm": 0.17520947754383087, "learning_rate": 1e-05, "loss": 0.5057, "step": 5178 }, { "epoch": 1.4331280702968243, "grad_norm": 0.17300230264663696, "learning_rate": 1e-05, "loss": 0.4974, "step": 5179 }, { "epoch": 1.4334048294471735, "grad_norm": 0.16406495869159698, "learning_rate": 1e-05, "loss": 0.4802, "step": 5180 }, { "epoch": 1.433681588597523, "grad_norm": 0.1615128219127655, "learning_rate": 1e-05, "loss": 0.4882, "step": 5181 }, { "epoch": 1.4339583477478723, "grad_norm": 0.1733463704586029, "learning_rate": 1e-05, "loss": 0.5058, "step": 5182 }, { "epoch": 1.4342351068982218, "grad_norm": 0.16744254529476166, "learning_rate": 1e-05, "loss": 0.5102, "step": 5183 }, { "epoch": 1.4345118660485712, "grad_norm": 0.17743343114852905, "learning_rate": 1e-05, "loss": 0.4834, "step": 5184 }, { "epoch": 1.4347886251989206, "grad_norm": 0.1717759370803833, "learning_rate": 1e-05, "loss": 0.4857, "step": 5185 }, { "epoch": 1.43506538434927, "grad_norm": 0.18223369121551514, "learning_rate": 1e-05, "loss": 0.5071, "step": 5186 }, { "epoch": 1.4353421434996194, "grad_norm": 0.17252840101718903, "learning_rate": 1e-05, "loss": 0.5034, "step": 5187 }, { "epoch": 1.4356189026499688, "grad_norm": 0.17040760815143585, "learning_rate": 1e-05, "loss": 0.5053, "step": 5188 }, { "epoch": 1.4358956618003182, "grad_norm": 0.17292019724845886, "learning_rate": 1e-05, "loss": 0.5157, "step": 5189 }, { "epoch": 1.4361724209506677, "grad_norm": 0.1663621962070465, "learning_rate": 1e-05, "loss": 0.5064, "step": 5190 }, { "epoch": 1.436449180101017, "grad_norm": 0.17312970757484436, "learning_rate": 1e-05, "loss": 0.5138, "step": 5191 }, { "epoch": 1.4367259392513665, "grad_norm": 0.17648522555828094, "learning_rate": 1e-05, "loss": 0.5438, "step": 5192 }, { "epoch": 1.437002698401716, "grad_norm": 0.16814987361431122, "learning_rate": 1e-05, "loss": 0.5077, "step": 5193 }, { "epoch": 1.4372794575520653, "grad_norm": 0.16404730081558228, "learning_rate": 1e-05, "loss": 0.4859, "step": 5194 }, { "epoch": 1.4375562167024147, "grad_norm": 0.17172177135944366, "learning_rate": 1e-05, "loss": 0.5186, "step": 5195 }, { "epoch": 1.4378329758527642, "grad_norm": 0.16706208884716034, "learning_rate": 1e-05, "loss": 0.4961, "step": 5196 }, { "epoch": 1.4381097350031136, "grad_norm": 0.17198581993579865, "learning_rate": 1e-05, "loss": 0.5251, "step": 5197 }, { "epoch": 1.438386494153463, "grad_norm": 0.17353779077529907, "learning_rate": 1e-05, "loss": 0.5232, "step": 5198 }, { "epoch": 1.4386632533038124, "grad_norm": 0.1728837490081787, "learning_rate": 1e-05, "loss": 0.5131, "step": 5199 }, { "epoch": 1.4389400124541618, "grad_norm": 0.18332578241825104, "learning_rate": 1e-05, "loss": 0.4967, "step": 5200 }, { "epoch": 1.4392167716045112, "grad_norm": 0.16564153134822845, "learning_rate": 1e-05, "loss": 0.5288, "step": 5201 }, { "epoch": 1.4394935307548606, "grad_norm": 0.16662567853927612, "learning_rate": 1e-05, "loss": 0.4906, "step": 5202 }, { "epoch": 1.43977028990521, "grad_norm": 0.1742815375328064, "learning_rate": 1e-05, "loss": 0.4962, "step": 5203 }, { "epoch": 1.4400470490555595, "grad_norm": 0.1694512814283371, "learning_rate": 1e-05, "loss": 0.5223, "step": 5204 }, { "epoch": 1.4403238082059089, "grad_norm": 0.18007077276706696, "learning_rate": 1e-05, "loss": 0.515, "step": 5205 }, { "epoch": 1.4406005673562583, "grad_norm": 0.1627376228570938, "learning_rate": 1e-05, "loss": 0.5062, "step": 5206 }, { "epoch": 1.4408773265066075, "grad_norm": 0.1714673489332199, "learning_rate": 1e-05, "loss": 0.5074, "step": 5207 }, { "epoch": 1.4411540856569571, "grad_norm": 0.16863952577114105, "learning_rate": 1e-05, "loss": 0.4982, "step": 5208 }, { "epoch": 1.4414308448073063, "grad_norm": 0.17662715911865234, "learning_rate": 1e-05, "loss": 0.5008, "step": 5209 }, { "epoch": 1.441707603957656, "grad_norm": 0.17375683784484863, "learning_rate": 1e-05, "loss": 0.4906, "step": 5210 }, { "epoch": 1.4419843631080052, "grad_norm": 0.1774461269378662, "learning_rate": 1e-05, "loss": 0.5013, "step": 5211 }, { "epoch": 1.4422611222583548, "grad_norm": 0.17491842806339264, "learning_rate": 1e-05, "loss": 0.5166, "step": 5212 }, { "epoch": 1.442537881408704, "grad_norm": 0.1722240447998047, "learning_rate": 1e-05, "loss": 0.5091, "step": 5213 }, { "epoch": 1.4428146405590536, "grad_norm": 0.17759805917739868, "learning_rate": 1e-05, "loss": 0.4916, "step": 5214 }, { "epoch": 1.4430913997094028, "grad_norm": 0.17778919637203217, "learning_rate": 1e-05, "loss": 0.4897, "step": 5215 }, { "epoch": 1.4433681588597522, "grad_norm": 0.1766655445098877, "learning_rate": 1e-05, "loss": 0.5504, "step": 5216 }, { "epoch": 1.4436449180101016, "grad_norm": 0.17456930875778198, "learning_rate": 1e-05, "loss": 0.5103, "step": 5217 }, { "epoch": 1.443921677160451, "grad_norm": 0.16944743692874908, "learning_rate": 1e-05, "loss": 0.5109, "step": 5218 }, { "epoch": 1.4441984363108005, "grad_norm": 0.18366263806819916, "learning_rate": 1e-05, "loss": 0.5203, "step": 5219 }, { "epoch": 1.4444751954611499, "grad_norm": 0.17243745923042297, "learning_rate": 1e-05, "loss": 0.4993, "step": 5220 }, { "epoch": 1.4447519546114993, "grad_norm": 0.18775925040245056, "learning_rate": 1e-05, "loss": 0.5232, "step": 5221 }, { "epoch": 1.4450287137618487, "grad_norm": 0.1737116575241089, "learning_rate": 1e-05, "loss": 0.5164, "step": 5222 }, { "epoch": 1.4453054729121981, "grad_norm": 0.16858668625354767, "learning_rate": 1e-05, "loss": 0.5125, "step": 5223 }, { "epoch": 1.4455822320625475, "grad_norm": 0.17151902616024017, "learning_rate": 1e-05, "loss": 0.5326, "step": 5224 }, { "epoch": 1.445858991212897, "grad_norm": 0.18465259671211243, "learning_rate": 1e-05, "loss": 0.4768, "step": 5225 }, { "epoch": 1.4461357503632464, "grad_norm": 0.1648217737674713, "learning_rate": 1e-05, "loss": 0.5317, "step": 5226 }, { "epoch": 1.4464125095135958, "grad_norm": 0.1671583652496338, "learning_rate": 1e-05, "loss": 0.5026, "step": 5227 }, { "epoch": 1.4466892686639452, "grad_norm": 0.17544205486774445, "learning_rate": 1e-05, "loss": 0.5199, "step": 5228 }, { "epoch": 1.4469660278142946, "grad_norm": 0.17750097811222076, "learning_rate": 1e-05, "loss": 0.4836, "step": 5229 }, { "epoch": 1.447242786964644, "grad_norm": 0.16157318651676178, "learning_rate": 1e-05, "loss": 0.5099, "step": 5230 }, { "epoch": 1.4475195461149934, "grad_norm": 0.1720394641160965, "learning_rate": 1e-05, "loss": 0.5036, "step": 5231 }, { "epoch": 1.4477963052653429, "grad_norm": 0.17100967466831207, "learning_rate": 1e-05, "loss": 0.5252, "step": 5232 }, { "epoch": 1.4480730644156923, "grad_norm": 0.17421303689479828, "learning_rate": 1e-05, "loss": 0.5237, "step": 5233 }, { "epoch": 1.4483498235660417, "grad_norm": 0.17301176488399506, "learning_rate": 1e-05, "loss": 0.5102, "step": 5234 }, { "epoch": 1.448626582716391, "grad_norm": 0.1702166050672531, "learning_rate": 1e-05, "loss": 0.5276, "step": 5235 }, { "epoch": 1.4489033418667405, "grad_norm": 0.1768055111169815, "learning_rate": 1e-05, "loss": 0.5256, "step": 5236 }, { "epoch": 1.44918010101709, "grad_norm": 0.17773157358169556, "learning_rate": 1e-05, "loss": 0.5191, "step": 5237 }, { "epoch": 1.4494568601674394, "grad_norm": 0.1648378223180771, "learning_rate": 1e-05, "loss": 0.4995, "step": 5238 }, { "epoch": 1.4497336193177888, "grad_norm": 0.17756670713424683, "learning_rate": 1e-05, "loss": 0.5092, "step": 5239 }, { "epoch": 1.4500103784681382, "grad_norm": 0.17509086430072784, "learning_rate": 1e-05, "loss": 0.5138, "step": 5240 }, { "epoch": 1.4502871376184876, "grad_norm": 0.16828496754169464, "learning_rate": 1e-05, "loss": 0.5159, "step": 5241 }, { "epoch": 1.4505638967688368, "grad_norm": 0.17238080501556396, "learning_rate": 1e-05, "loss": 0.5093, "step": 5242 }, { "epoch": 1.4508406559191864, "grad_norm": 0.16622285544872284, "learning_rate": 1e-05, "loss": 0.5274, "step": 5243 }, { "epoch": 1.4511174150695356, "grad_norm": 0.1874370276927948, "learning_rate": 1e-05, "loss": 0.5217, "step": 5244 }, { "epoch": 1.4513941742198853, "grad_norm": 0.1753305345773697, "learning_rate": 1e-05, "loss": 0.5008, "step": 5245 }, { "epoch": 1.4516709333702345, "grad_norm": 0.16994652152061462, "learning_rate": 1e-05, "loss": 0.5321, "step": 5246 }, { "epoch": 1.451947692520584, "grad_norm": 0.17426103353500366, "learning_rate": 1e-05, "loss": 0.491, "step": 5247 }, { "epoch": 1.4522244516709333, "grad_norm": 0.1680522859096527, "learning_rate": 1e-05, "loss": 0.5257, "step": 5248 }, { "epoch": 1.4525012108212827, "grad_norm": 0.17569799721240997, "learning_rate": 1e-05, "loss": 0.5412, "step": 5249 }, { "epoch": 1.452777969971632, "grad_norm": 0.1722254604101181, "learning_rate": 1e-05, "loss": 0.5081, "step": 5250 }, { "epoch": 1.4530547291219815, "grad_norm": 0.17831335961818695, "learning_rate": 1e-05, "loss": 0.5043, "step": 5251 }, { "epoch": 1.453331488272331, "grad_norm": 0.16616623103618622, "learning_rate": 1e-05, "loss": 0.5258, "step": 5252 }, { "epoch": 1.4536082474226804, "grad_norm": 0.17273664474487305, "learning_rate": 1e-05, "loss": 0.5365, "step": 5253 }, { "epoch": 1.4538850065730298, "grad_norm": 0.1700437366962433, "learning_rate": 1e-05, "loss": 0.5044, "step": 5254 }, { "epoch": 1.4541617657233792, "grad_norm": 0.16838271915912628, "learning_rate": 1e-05, "loss": 0.5056, "step": 5255 }, { "epoch": 1.4544385248737286, "grad_norm": 0.16508698463439941, "learning_rate": 1e-05, "loss": 0.503, "step": 5256 }, { "epoch": 1.454715284024078, "grad_norm": 0.1679951697587967, "learning_rate": 1e-05, "loss": 0.4731, "step": 5257 }, { "epoch": 1.4549920431744274, "grad_norm": 0.1735604852437973, "learning_rate": 1e-05, "loss": 0.4821, "step": 5258 }, { "epoch": 1.4552688023247768, "grad_norm": 0.17546339333057404, "learning_rate": 1e-05, "loss": 0.5004, "step": 5259 }, { "epoch": 1.4555455614751263, "grad_norm": 0.16814076900482178, "learning_rate": 1e-05, "loss": 0.4746, "step": 5260 }, { "epoch": 1.4558223206254757, "grad_norm": 0.17487949132919312, "learning_rate": 1e-05, "loss": 0.5367, "step": 5261 }, { "epoch": 1.456099079775825, "grad_norm": 0.17817705869674683, "learning_rate": 1e-05, "loss": 0.4777, "step": 5262 }, { "epoch": 1.4563758389261745, "grad_norm": 0.16941474378108978, "learning_rate": 1e-05, "loss": 0.5091, "step": 5263 }, { "epoch": 1.456652598076524, "grad_norm": 0.16775158047676086, "learning_rate": 1e-05, "loss": 0.5401, "step": 5264 }, { "epoch": 1.4569293572268733, "grad_norm": 0.17655682563781738, "learning_rate": 1e-05, "loss": 0.5043, "step": 5265 }, { "epoch": 1.4572061163772227, "grad_norm": 0.17684480547904968, "learning_rate": 1e-05, "loss": 0.5178, "step": 5266 }, { "epoch": 1.4574828755275722, "grad_norm": 0.17693494260311127, "learning_rate": 1e-05, "loss": 0.4984, "step": 5267 }, { "epoch": 1.4577596346779216, "grad_norm": 0.16956326365470886, "learning_rate": 1e-05, "loss": 0.4941, "step": 5268 }, { "epoch": 1.458036393828271, "grad_norm": 0.17268750071525574, "learning_rate": 1e-05, "loss": 0.5239, "step": 5269 }, { "epoch": 1.4583131529786204, "grad_norm": 0.17383641004562378, "learning_rate": 1e-05, "loss": 0.5028, "step": 5270 }, { "epoch": 1.4585899121289698, "grad_norm": 0.17037013173103333, "learning_rate": 1e-05, "loss": 0.4877, "step": 5271 }, { "epoch": 1.4588666712793192, "grad_norm": 0.18643350899219513, "learning_rate": 1e-05, "loss": 0.4988, "step": 5272 }, { "epoch": 1.4591434304296687, "grad_norm": 0.1815769374370575, "learning_rate": 1e-05, "loss": 0.4756, "step": 5273 }, { "epoch": 1.459420189580018, "grad_norm": 0.17629636824131012, "learning_rate": 1e-05, "loss": 0.5533, "step": 5274 }, { "epoch": 1.4596969487303673, "grad_norm": 0.1694086790084839, "learning_rate": 1e-05, "loss": 0.4966, "step": 5275 }, { "epoch": 1.459973707880717, "grad_norm": 0.17145244777202606, "learning_rate": 1e-05, "loss": 0.4896, "step": 5276 }, { "epoch": 1.460250467031066, "grad_norm": 0.1758272647857666, "learning_rate": 1e-05, "loss": 0.5287, "step": 5277 }, { "epoch": 1.4605272261814157, "grad_norm": 0.17284508049488068, "learning_rate": 1e-05, "loss": 0.5223, "step": 5278 }, { "epoch": 1.460803985331765, "grad_norm": 0.1654663234949112, "learning_rate": 1e-05, "loss": 0.4889, "step": 5279 }, { "epoch": 1.4610807444821146, "grad_norm": 0.1874736100435257, "learning_rate": 1e-05, "loss": 0.4835, "step": 5280 }, { "epoch": 1.4613575036324638, "grad_norm": 0.16794002056121826, "learning_rate": 1e-05, "loss": 0.4947, "step": 5281 }, { "epoch": 1.4616342627828134, "grad_norm": 0.1713135689496994, "learning_rate": 1e-05, "loss": 0.5012, "step": 5282 }, { "epoch": 1.4619110219331626, "grad_norm": 0.170026957988739, "learning_rate": 1e-05, "loss": 0.5418, "step": 5283 }, { "epoch": 1.462187781083512, "grad_norm": 0.17122112214565277, "learning_rate": 1e-05, "loss": 0.5095, "step": 5284 }, { "epoch": 1.4624645402338614, "grad_norm": 0.1673600971698761, "learning_rate": 1e-05, "loss": 0.4964, "step": 5285 }, { "epoch": 1.4627412993842108, "grad_norm": 0.17769384384155273, "learning_rate": 1e-05, "loss": 0.5273, "step": 5286 }, { "epoch": 1.4630180585345602, "grad_norm": 0.1830078661441803, "learning_rate": 1e-05, "loss": 0.5068, "step": 5287 }, { "epoch": 1.4632948176849097, "grad_norm": 0.16792310774326324, "learning_rate": 1e-05, "loss": 0.4926, "step": 5288 }, { "epoch": 1.463571576835259, "grad_norm": 0.16883133351802826, "learning_rate": 1e-05, "loss": 0.5104, "step": 5289 }, { "epoch": 1.4638483359856085, "grad_norm": 0.17364487051963806, "learning_rate": 1e-05, "loss": 0.4975, "step": 5290 }, { "epoch": 1.464125095135958, "grad_norm": 0.16724710166454315, "learning_rate": 1e-05, "loss": 0.5073, "step": 5291 }, { "epoch": 1.4644018542863073, "grad_norm": 0.16973726451396942, "learning_rate": 1e-05, "loss": 0.5311, "step": 5292 }, { "epoch": 1.4646786134366567, "grad_norm": 0.17479866743087769, "learning_rate": 1e-05, "loss": 0.481, "step": 5293 }, { "epoch": 1.4649553725870061, "grad_norm": 0.17278064787387848, "learning_rate": 1e-05, "loss": 0.4927, "step": 5294 }, { "epoch": 1.4652321317373556, "grad_norm": 0.1766432374715805, "learning_rate": 1e-05, "loss": 0.508, "step": 5295 }, { "epoch": 1.465508890887705, "grad_norm": 0.16984792053699493, "learning_rate": 1e-05, "loss": 0.5105, "step": 5296 }, { "epoch": 1.4657856500380544, "grad_norm": 0.1714525669813156, "learning_rate": 1e-05, "loss": 0.514, "step": 5297 }, { "epoch": 1.4660624091884038, "grad_norm": 0.16828395426273346, "learning_rate": 1e-05, "loss": 0.5372, "step": 5298 }, { "epoch": 1.4663391683387532, "grad_norm": 0.17834872007369995, "learning_rate": 1e-05, "loss": 0.5077, "step": 5299 }, { "epoch": 1.4666159274891026, "grad_norm": 0.17337311804294586, "learning_rate": 1e-05, "loss": 0.5126, "step": 5300 }, { "epoch": 1.466892686639452, "grad_norm": 0.16427627205848694, "learning_rate": 1e-05, "loss": 0.4917, "step": 5301 }, { "epoch": 1.4671694457898015, "grad_norm": 0.1679351031780243, "learning_rate": 1e-05, "loss": 0.4927, "step": 5302 }, { "epoch": 1.4674462049401509, "grad_norm": 0.170682892203331, "learning_rate": 1e-05, "loss": 0.5133, "step": 5303 }, { "epoch": 1.4677229640905003, "grad_norm": 0.1784563958644867, "learning_rate": 1e-05, "loss": 0.5093, "step": 5304 }, { "epoch": 1.4679997232408497, "grad_norm": 0.17805121839046478, "learning_rate": 1e-05, "loss": 0.5118, "step": 5305 }, { "epoch": 1.4682764823911991, "grad_norm": 0.17046810686588287, "learning_rate": 1e-05, "loss": 0.4945, "step": 5306 }, { "epoch": 1.4685532415415485, "grad_norm": 0.16187791526317596, "learning_rate": 1e-05, "loss": 0.4889, "step": 5307 }, { "epoch": 1.468830000691898, "grad_norm": 0.1708817034959793, "learning_rate": 1e-05, "loss": 0.5221, "step": 5308 }, { "epoch": 1.4691067598422474, "grad_norm": 0.16986533999443054, "learning_rate": 1e-05, "loss": 0.5119, "step": 5309 }, { "epoch": 1.4693835189925966, "grad_norm": 0.17632107436656952, "learning_rate": 1e-05, "loss": 0.4788, "step": 5310 }, { "epoch": 1.4696602781429462, "grad_norm": 0.17547474801540375, "learning_rate": 1e-05, "loss": 0.5123, "step": 5311 }, { "epoch": 1.4699370372932954, "grad_norm": 0.17485401034355164, "learning_rate": 1e-05, "loss": 0.5076, "step": 5312 }, { "epoch": 1.470213796443645, "grad_norm": 0.1713476926088333, "learning_rate": 1e-05, "loss": 0.4926, "step": 5313 }, { "epoch": 1.4704905555939942, "grad_norm": 0.16881966590881348, "learning_rate": 1e-05, "loss": 0.5301, "step": 5314 }, { "epoch": 1.4707673147443439, "grad_norm": 0.1639707237482071, "learning_rate": 1e-05, "loss": 0.4934, "step": 5315 }, { "epoch": 1.471044073894693, "grad_norm": 0.17386889457702637, "learning_rate": 1e-05, "loss": 0.4897, "step": 5316 }, { "epoch": 1.4713208330450427, "grad_norm": 0.17469021677970886, "learning_rate": 1e-05, "loss": 0.5188, "step": 5317 }, { "epoch": 1.4715975921953919, "grad_norm": 0.1800084412097931, "learning_rate": 1e-05, "loss": 0.5013, "step": 5318 }, { "epoch": 1.4718743513457413, "grad_norm": 0.17519345879554749, "learning_rate": 1e-05, "loss": 0.5252, "step": 5319 }, { "epoch": 1.4721511104960907, "grad_norm": 0.1699022352695465, "learning_rate": 1e-05, "loss": 0.497, "step": 5320 }, { "epoch": 1.4724278696464401, "grad_norm": 0.16084955632686615, "learning_rate": 1e-05, "loss": 0.4949, "step": 5321 }, { "epoch": 1.4727046287967895, "grad_norm": 0.16595536470413208, "learning_rate": 1e-05, "loss": 0.503, "step": 5322 }, { "epoch": 1.472981387947139, "grad_norm": 0.16389267146587372, "learning_rate": 1e-05, "loss": 0.5038, "step": 5323 }, { "epoch": 1.4732581470974884, "grad_norm": 0.1764102578163147, "learning_rate": 1e-05, "loss": 0.5088, "step": 5324 }, { "epoch": 1.4735349062478378, "grad_norm": 0.17245540022850037, "learning_rate": 1e-05, "loss": 0.49, "step": 5325 }, { "epoch": 1.4738116653981872, "grad_norm": 0.1670553982257843, "learning_rate": 1e-05, "loss": 0.5016, "step": 5326 }, { "epoch": 1.4740884245485366, "grad_norm": 0.1697409451007843, "learning_rate": 1e-05, "loss": 0.5248, "step": 5327 }, { "epoch": 1.474365183698886, "grad_norm": 0.17599676549434662, "learning_rate": 1e-05, "loss": 0.5273, "step": 5328 }, { "epoch": 1.4746419428492354, "grad_norm": 0.16762296855449677, "learning_rate": 1e-05, "loss": 0.511, "step": 5329 }, { "epoch": 1.4749187019995849, "grad_norm": 0.17733760178089142, "learning_rate": 1e-05, "loss": 0.4838, "step": 5330 }, { "epoch": 1.4751954611499343, "grad_norm": 0.17744235694408417, "learning_rate": 1e-05, "loss": 0.4844, "step": 5331 }, { "epoch": 1.4754722203002837, "grad_norm": 0.173455148935318, "learning_rate": 1e-05, "loss": 0.5014, "step": 5332 }, { "epoch": 1.475748979450633, "grad_norm": 0.17031678557395935, "learning_rate": 1e-05, "loss": 0.4997, "step": 5333 }, { "epoch": 1.4760257386009825, "grad_norm": 0.16378067433834076, "learning_rate": 1e-05, "loss": 0.5013, "step": 5334 }, { "epoch": 1.476302497751332, "grad_norm": 0.17166389524936676, "learning_rate": 1e-05, "loss": 0.5235, "step": 5335 }, { "epoch": 1.4765792569016813, "grad_norm": 0.1763632893562317, "learning_rate": 1e-05, "loss": 0.5015, "step": 5336 }, { "epoch": 1.4768560160520308, "grad_norm": 0.16209906339645386, "learning_rate": 1e-05, "loss": 0.5189, "step": 5337 }, { "epoch": 1.4771327752023802, "grad_norm": 0.1721232384443283, "learning_rate": 1e-05, "loss": 0.4945, "step": 5338 }, { "epoch": 1.4774095343527296, "grad_norm": 0.1677258312702179, "learning_rate": 1e-05, "loss": 0.5138, "step": 5339 }, { "epoch": 1.477686293503079, "grad_norm": 0.1710934340953827, "learning_rate": 1e-05, "loss": 0.5066, "step": 5340 }, { "epoch": 1.4779630526534284, "grad_norm": 0.17339745163917542, "learning_rate": 1e-05, "loss": 0.5299, "step": 5341 }, { "epoch": 1.4782398118037778, "grad_norm": 0.17914171516895294, "learning_rate": 1e-05, "loss": 0.4996, "step": 5342 }, { "epoch": 1.4785165709541273, "grad_norm": 0.16835936903953552, "learning_rate": 1e-05, "loss": 0.5069, "step": 5343 }, { "epoch": 1.4787933301044767, "grad_norm": 0.17058637738227844, "learning_rate": 1e-05, "loss": 0.4926, "step": 5344 }, { "epoch": 1.4790700892548259, "grad_norm": 0.17646679282188416, "learning_rate": 1e-05, "loss": 0.5181, "step": 5345 }, { "epoch": 1.4793468484051755, "grad_norm": 0.17503491044044495, "learning_rate": 1e-05, "loss": 0.4839, "step": 5346 }, { "epoch": 1.4796236075555247, "grad_norm": 0.16511398553848267, "learning_rate": 1e-05, "loss": 0.5022, "step": 5347 }, { "epoch": 1.4799003667058743, "grad_norm": 0.16730165481567383, "learning_rate": 1e-05, "loss": 0.5103, "step": 5348 }, { "epoch": 1.4801771258562235, "grad_norm": 0.16186410188674927, "learning_rate": 1e-05, "loss": 0.5183, "step": 5349 }, { "epoch": 1.4804538850065732, "grad_norm": 0.1740187406539917, "learning_rate": 1e-05, "loss": 0.4796, "step": 5350 }, { "epoch": 1.4807306441569223, "grad_norm": 0.17143741250038147, "learning_rate": 1e-05, "loss": 0.4808, "step": 5351 }, { "epoch": 1.4810074033072718, "grad_norm": 0.16923147439956665, "learning_rate": 1e-05, "loss": 0.5095, "step": 5352 }, { "epoch": 1.4812841624576212, "grad_norm": 0.1685020476579666, "learning_rate": 1e-05, "loss": 0.5097, "step": 5353 }, { "epoch": 1.4815609216079706, "grad_norm": 0.1698112189769745, "learning_rate": 1e-05, "loss": 0.487, "step": 5354 }, { "epoch": 1.48183768075832, "grad_norm": 0.171820268034935, "learning_rate": 1e-05, "loss": 0.4982, "step": 5355 }, { "epoch": 1.4821144399086694, "grad_norm": 0.17417201399803162, "learning_rate": 1e-05, "loss": 0.509, "step": 5356 }, { "epoch": 1.4823911990590188, "grad_norm": 0.16407012939453125, "learning_rate": 1e-05, "loss": 0.5257, "step": 5357 }, { "epoch": 1.4826679582093683, "grad_norm": 0.16956138610839844, "learning_rate": 1e-05, "loss": 0.5136, "step": 5358 }, { "epoch": 1.4829447173597177, "grad_norm": 0.16825705766677856, "learning_rate": 1e-05, "loss": 0.5142, "step": 5359 }, { "epoch": 1.483221476510067, "grad_norm": 0.17392802238464355, "learning_rate": 1e-05, "loss": 0.5278, "step": 5360 }, { "epoch": 1.4834982356604165, "grad_norm": 0.17343001067638397, "learning_rate": 1e-05, "loss": 0.5324, "step": 5361 }, { "epoch": 1.483774994810766, "grad_norm": 0.1768881380558014, "learning_rate": 1e-05, "loss": 0.5159, "step": 5362 }, { "epoch": 1.4840517539611153, "grad_norm": 0.17169183492660522, "learning_rate": 1e-05, "loss": 0.5053, "step": 5363 }, { "epoch": 1.4843285131114647, "grad_norm": 0.16920135915279388, "learning_rate": 1e-05, "loss": 0.5088, "step": 5364 }, { "epoch": 1.4846052722618142, "grad_norm": 0.17095154523849487, "learning_rate": 1e-05, "loss": 0.4823, "step": 5365 }, { "epoch": 1.4848820314121636, "grad_norm": 0.16455940902233124, "learning_rate": 1e-05, "loss": 0.494, "step": 5366 }, { "epoch": 1.485158790562513, "grad_norm": 0.16708451509475708, "learning_rate": 1e-05, "loss": 0.5194, "step": 5367 }, { "epoch": 1.4854355497128624, "grad_norm": 0.17856332659721375, "learning_rate": 1e-05, "loss": 0.5152, "step": 5368 }, { "epoch": 1.4857123088632118, "grad_norm": 0.17244793474674225, "learning_rate": 1e-05, "loss": 0.5021, "step": 5369 }, { "epoch": 1.4859890680135612, "grad_norm": 0.17337754368782043, "learning_rate": 1e-05, "loss": 0.5024, "step": 5370 }, { "epoch": 1.4862658271639106, "grad_norm": 0.16846399009227753, "learning_rate": 1e-05, "loss": 0.4975, "step": 5371 }, { "epoch": 1.48654258631426, "grad_norm": 0.16750723123550415, "learning_rate": 1e-05, "loss": 0.5124, "step": 5372 }, { "epoch": 1.4868193454646095, "grad_norm": 0.17104047536849976, "learning_rate": 1e-05, "loss": 0.4998, "step": 5373 }, { "epoch": 1.487096104614959, "grad_norm": 0.16988708078861237, "learning_rate": 1e-05, "loss": 0.4992, "step": 5374 }, { "epoch": 1.4873728637653083, "grad_norm": 0.17011606693267822, "learning_rate": 1e-05, "loss": 0.5042, "step": 5375 }, { "epoch": 1.4876496229156577, "grad_norm": 0.17503830790519714, "learning_rate": 1e-05, "loss": 0.5402, "step": 5376 }, { "epoch": 1.4879263820660071, "grad_norm": 0.17155979573726654, "learning_rate": 1e-05, "loss": 0.4971, "step": 5377 }, { "epoch": 1.4882031412163563, "grad_norm": 0.16540732979774475, "learning_rate": 1e-05, "loss": 0.5049, "step": 5378 }, { "epoch": 1.488479900366706, "grad_norm": 0.16291078925132751, "learning_rate": 1e-05, "loss": 0.5079, "step": 5379 }, { "epoch": 1.4887566595170552, "grad_norm": 0.1650591343641281, "learning_rate": 1e-05, "loss": 0.4958, "step": 5380 }, { "epoch": 1.4890334186674048, "grad_norm": 0.1739019751548767, "learning_rate": 1e-05, "loss": 0.5025, "step": 5381 }, { "epoch": 1.489310177817754, "grad_norm": 0.17510345578193665, "learning_rate": 1e-05, "loss": 0.5218, "step": 5382 }, { "epoch": 1.4895869369681036, "grad_norm": 0.17352452874183655, "learning_rate": 1e-05, "loss": 0.498, "step": 5383 }, { "epoch": 1.4898636961184528, "grad_norm": 0.1736406683921814, "learning_rate": 1e-05, "loss": 0.4854, "step": 5384 }, { "epoch": 1.4901404552688025, "grad_norm": 0.171755850315094, "learning_rate": 1e-05, "loss": 0.5088, "step": 5385 }, { "epoch": 1.4904172144191516, "grad_norm": 0.17751939594745636, "learning_rate": 1e-05, "loss": 0.5112, "step": 5386 }, { "epoch": 1.490693973569501, "grad_norm": 0.172989621758461, "learning_rate": 1e-05, "loss": 0.523, "step": 5387 }, { "epoch": 1.4909707327198505, "grad_norm": 0.17351563274860382, "learning_rate": 1e-05, "loss": 0.529, "step": 5388 }, { "epoch": 1.4912474918702, "grad_norm": 0.17191676795482635, "learning_rate": 1e-05, "loss": 0.4895, "step": 5389 }, { "epoch": 1.4915242510205493, "grad_norm": 0.17158865928649902, "learning_rate": 1e-05, "loss": 0.4962, "step": 5390 }, { "epoch": 1.4918010101708987, "grad_norm": 0.18003207445144653, "learning_rate": 1e-05, "loss": 0.4782, "step": 5391 }, { "epoch": 1.4920777693212481, "grad_norm": 0.16620846092700958, "learning_rate": 1e-05, "loss": 0.5071, "step": 5392 }, { "epoch": 1.4923545284715976, "grad_norm": 0.17225798964500427, "learning_rate": 1e-05, "loss": 0.5328, "step": 5393 }, { "epoch": 1.492631287621947, "grad_norm": 0.16791416704654694, "learning_rate": 1e-05, "loss": 0.5364, "step": 5394 }, { "epoch": 1.4929080467722964, "grad_norm": 0.17833590507507324, "learning_rate": 1e-05, "loss": 0.4907, "step": 5395 }, { "epoch": 1.4931848059226458, "grad_norm": 0.17423072457313538, "learning_rate": 1e-05, "loss": 0.4951, "step": 5396 }, { "epoch": 1.4934615650729952, "grad_norm": 0.17256273329257965, "learning_rate": 1e-05, "loss": 0.537, "step": 5397 }, { "epoch": 1.4937383242233446, "grad_norm": 0.1719953864812851, "learning_rate": 1e-05, "loss": 0.5104, "step": 5398 }, { "epoch": 1.494015083373694, "grad_norm": 0.18173429369926453, "learning_rate": 1e-05, "loss": 0.5044, "step": 5399 }, { "epoch": 1.4942918425240435, "grad_norm": 0.16918930411338806, "learning_rate": 1e-05, "loss": 0.5024, "step": 5400 }, { "epoch": 1.4945686016743929, "grad_norm": 0.17675994336605072, "learning_rate": 1e-05, "loss": 0.5047, "step": 5401 }, { "epoch": 1.4948453608247423, "grad_norm": 0.1683277040719986, "learning_rate": 1e-05, "loss": 0.5032, "step": 5402 }, { "epoch": 1.4951221199750917, "grad_norm": 0.1689365953207016, "learning_rate": 1e-05, "loss": 0.5099, "step": 5403 }, { "epoch": 1.4953988791254411, "grad_norm": 0.17356950044631958, "learning_rate": 1e-05, "loss": 0.5163, "step": 5404 }, { "epoch": 1.4956756382757905, "grad_norm": 0.17443795502185822, "learning_rate": 1e-05, "loss": 0.5144, "step": 5405 }, { "epoch": 1.49595239742614, "grad_norm": 0.17160208523273468, "learning_rate": 1e-05, "loss": 0.501, "step": 5406 }, { "epoch": 1.4962291565764894, "grad_norm": 0.17266447842121124, "learning_rate": 1e-05, "loss": 0.5271, "step": 5407 }, { "epoch": 1.4965059157268388, "grad_norm": 0.17609286308288574, "learning_rate": 1e-05, "loss": 0.5582, "step": 5408 }, { "epoch": 1.4967826748771882, "grad_norm": 0.1714417040348053, "learning_rate": 1e-05, "loss": 0.5373, "step": 5409 }, { "epoch": 1.4970594340275376, "grad_norm": 0.17817817628383636, "learning_rate": 1e-05, "loss": 0.5222, "step": 5410 }, { "epoch": 1.497336193177887, "grad_norm": 0.17979592084884644, "learning_rate": 1e-05, "loss": 0.511, "step": 5411 }, { "epoch": 1.4976129523282364, "grad_norm": 0.17023611068725586, "learning_rate": 1e-05, "loss": 0.5327, "step": 5412 }, { "epoch": 1.4978897114785856, "grad_norm": 0.1715734302997589, "learning_rate": 1e-05, "loss": 0.5165, "step": 5413 }, { "epoch": 1.4981664706289353, "grad_norm": 0.17174065113067627, "learning_rate": 1e-05, "loss": 0.496, "step": 5414 }, { "epoch": 1.4984432297792845, "grad_norm": 0.16870270669460297, "learning_rate": 1e-05, "loss": 0.49, "step": 5415 }, { "epoch": 1.498719988929634, "grad_norm": 0.15529486536979675, "learning_rate": 1e-05, "loss": 0.5, "step": 5416 }, { "epoch": 1.4989967480799833, "grad_norm": 0.17186832427978516, "learning_rate": 1e-05, "loss": 0.5131, "step": 5417 }, { "epoch": 1.499273507230333, "grad_norm": 0.17340245842933655, "learning_rate": 1e-05, "loss": 0.5146, "step": 5418 }, { "epoch": 1.4995502663806821, "grad_norm": 0.17503678798675537, "learning_rate": 1e-05, "loss": 0.509, "step": 5419 }, { "epoch": 1.4998270255310318, "grad_norm": 0.17032167315483093, "learning_rate": 1e-05, "loss": 0.5254, "step": 5420 }, { "epoch": 1.500103784681381, "grad_norm": 0.1755131334066391, "learning_rate": 1e-05, "loss": 0.5299, "step": 5421 }, { "epoch": 1.500103784681381, "eval_loss": 0.5305919051170349, "eval_runtime": 3248.2098, "eval_samples_per_second": 75.16, "eval_steps_per_second": 2.349, "step": 5421 }, { "epoch": 1.5003805438317306, "grad_norm": 0.18080729246139526, "learning_rate": 1e-05, "loss": 0.5111, "step": 5422 }, { "epoch": 1.5006573029820798, "grad_norm": 0.16426193714141846, "learning_rate": 1e-05, "loss": 0.5352, "step": 5423 }, { "epoch": 1.5009340621324294, "grad_norm": 0.1791055053472519, "learning_rate": 1e-05, "loss": 0.5021, "step": 5424 }, { "epoch": 1.5012108212827786, "grad_norm": 0.16503474116325378, "learning_rate": 1e-05, "loss": 0.4856, "step": 5425 }, { "epoch": 1.501487580433128, "grad_norm": 0.17163462936878204, "learning_rate": 1e-05, "loss": 0.5279, "step": 5426 }, { "epoch": 1.5017643395834774, "grad_norm": 0.1719263792037964, "learning_rate": 1e-05, "loss": 0.4892, "step": 5427 }, { "epoch": 1.5020410987338269, "grad_norm": 0.17845459282398224, "learning_rate": 1e-05, "loss": 0.493, "step": 5428 }, { "epoch": 1.5023178578841763, "grad_norm": 0.17286652326583862, "learning_rate": 1e-05, "loss": 0.4989, "step": 5429 }, { "epoch": 1.5025946170345257, "grad_norm": 0.17333030700683594, "learning_rate": 1e-05, "loss": 0.5096, "step": 5430 }, { "epoch": 1.502871376184875, "grad_norm": 0.1722920835018158, "learning_rate": 1e-05, "loss": 0.4945, "step": 5431 }, { "epoch": 1.5031481353352245, "grad_norm": 0.17372329533100128, "learning_rate": 1e-05, "loss": 0.5117, "step": 5432 }, { "epoch": 1.503424894485574, "grad_norm": 0.17839859426021576, "learning_rate": 1e-05, "loss": 0.5235, "step": 5433 }, { "epoch": 1.5037016536359233, "grad_norm": 0.17238666117191315, "learning_rate": 1e-05, "loss": 0.4977, "step": 5434 }, { "epoch": 1.5039784127862728, "grad_norm": 0.17881520092487335, "learning_rate": 1e-05, "loss": 0.5211, "step": 5435 }, { "epoch": 1.5042551719366222, "grad_norm": 0.16173753142356873, "learning_rate": 1e-05, "loss": 0.5113, "step": 5436 }, { "epoch": 1.5045319310869716, "grad_norm": 0.17096105217933655, "learning_rate": 1e-05, "loss": 0.5108, "step": 5437 }, { "epoch": 1.504808690237321, "grad_norm": 0.16590793430805206, "learning_rate": 1e-05, "loss": 0.4733, "step": 5438 }, { "epoch": 1.5050854493876704, "grad_norm": 0.17425410449504852, "learning_rate": 1e-05, "loss": 0.5069, "step": 5439 }, { "epoch": 1.5053622085380198, "grad_norm": 0.17928682267665863, "learning_rate": 1e-05, "loss": 0.5318, "step": 5440 }, { "epoch": 1.5056389676883692, "grad_norm": 0.17094489932060242, "learning_rate": 1e-05, "loss": 0.506, "step": 5441 }, { "epoch": 1.5059157268387187, "grad_norm": 0.174981489777565, "learning_rate": 1e-05, "loss": 0.5027, "step": 5442 }, { "epoch": 1.506192485989068, "grad_norm": 0.17065833508968353, "learning_rate": 1e-05, "loss": 0.5083, "step": 5443 }, { "epoch": 1.5064692451394173, "grad_norm": 0.17612896859645844, "learning_rate": 1e-05, "loss": 0.5144, "step": 5444 }, { "epoch": 1.506746004289767, "grad_norm": 0.16976617276668549, "learning_rate": 1e-05, "loss": 0.4951, "step": 5445 }, { "epoch": 1.507022763440116, "grad_norm": 0.18168213963508606, "learning_rate": 1e-05, "loss": 0.5365, "step": 5446 }, { "epoch": 1.5072995225904657, "grad_norm": 0.1692124754190445, "learning_rate": 1e-05, "loss": 0.493, "step": 5447 }, { "epoch": 1.507576281740815, "grad_norm": 0.16673195362091064, "learning_rate": 1e-05, "loss": 0.4815, "step": 5448 }, { "epoch": 1.5078530408911646, "grad_norm": 0.1617603749036789, "learning_rate": 1e-05, "loss": 0.5374, "step": 5449 }, { "epoch": 1.5081298000415138, "grad_norm": 0.17092639207839966, "learning_rate": 1e-05, "loss": 0.5186, "step": 5450 }, { "epoch": 1.5084065591918634, "grad_norm": 0.17039155960083008, "learning_rate": 1e-05, "loss": 0.5044, "step": 5451 }, { "epoch": 1.5086833183422126, "grad_norm": 0.16653016209602356, "learning_rate": 1e-05, "loss": 0.4948, "step": 5452 }, { "epoch": 1.5089600774925622, "grad_norm": 0.17467133700847626, "learning_rate": 1e-05, "loss": 0.5277, "step": 5453 }, { "epoch": 1.5092368366429114, "grad_norm": 0.1819293200969696, "learning_rate": 1e-05, "loss": 0.5176, "step": 5454 }, { "epoch": 1.509513595793261, "grad_norm": 0.17068088054656982, "learning_rate": 1e-05, "loss": 0.5036, "step": 5455 }, { "epoch": 1.5097903549436102, "grad_norm": 0.17781589925289154, "learning_rate": 1e-05, "loss": 0.5208, "step": 5456 }, { "epoch": 1.5100671140939599, "grad_norm": 0.17310887575149536, "learning_rate": 1e-05, "loss": 0.52, "step": 5457 }, { "epoch": 1.510343873244309, "grad_norm": 0.18024203181266785, "learning_rate": 1e-05, "loss": 0.5165, "step": 5458 }, { "epoch": 1.5106206323946587, "grad_norm": 0.1765560805797577, "learning_rate": 1e-05, "loss": 0.5107, "step": 5459 }, { "epoch": 1.510897391545008, "grad_norm": 0.17089252173900604, "learning_rate": 1e-05, "loss": 0.5194, "step": 5460 }, { "epoch": 1.5111741506953573, "grad_norm": 0.17142806947231293, "learning_rate": 1e-05, "loss": 0.4863, "step": 5461 }, { "epoch": 1.5114509098457067, "grad_norm": 0.17246049642562866, "learning_rate": 1e-05, "loss": 0.5066, "step": 5462 }, { "epoch": 1.5117276689960562, "grad_norm": 0.1708831787109375, "learning_rate": 1e-05, "loss": 0.4818, "step": 5463 }, { "epoch": 1.5120044281464056, "grad_norm": 0.1720452755689621, "learning_rate": 1e-05, "loss": 0.5159, "step": 5464 }, { "epoch": 1.512281187296755, "grad_norm": 0.1667604297399521, "learning_rate": 1e-05, "loss": 0.4763, "step": 5465 }, { "epoch": 1.5125579464471044, "grad_norm": 0.17397163808345795, "learning_rate": 1e-05, "loss": 0.5049, "step": 5466 }, { "epoch": 1.5128347055974538, "grad_norm": 0.17550456523895264, "learning_rate": 1e-05, "loss": 0.5011, "step": 5467 }, { "epoch": 1.5131114647478032, "grad_norm": 0.17355579137802124, "learning_rate": 1e-05, "loss": 0.5313, "step": 5468 }, { "epoch": 1.5133882238981526, "grad_norm": 0.16291193664073944, "learning_rate": 1e-05, "loss": 0.5024, "step": 5469 }, { "epoch": 1.513664983048502, "grad_norm": 0.17619620263576508, "learning_rate": 1e-05, "loss": 0.4958, "step": 5470 }, { "epoch": 1.5139417421988515, "grad_norm": 0.17223647236824036, "learning_rate": 1e-05, "loss": 0.5279, "step": 5471 }, { "epoch": 1.5142185013492009, "grad_norm": 0.1667315810918808, "learning_rate": 1e-05, "loss": 0.4988, "step": 5472 }, { "epoch": 1.5144952604995503, "grad_norm": 0.17866730690002441, "learning_rate": 1e-05, "loss": 0.508, "step": 5473 }, { "epoch": 1.5147720196498997, "grad_norm": 0.1689392477273941, "learning_rate": 1e-05, "loss": 0.5395, "step": 5474 }, { "epoch": 1.5150487788002491, "grad_norm": 0.16394320130348206, "learning_rate": 1e-05, "loss": 0.4931, "step": 5475 }, { "epoch": 1.5153255379505985, "grad_norm": 0.17264901101589203, "learning_rate": 1e-05, "loss": 0.499, "step": 5476 }, { "epoch": 1.5156022971009477, "grad_norm": 0.16979144513607025, "learning_rate": 1e-05, "loss": 0.507, "step": 5477 }, { "epoch": 1.5158790562512974, "grad_norm": 0.17619727551937103, "learning_rate": 1e-05, "loss": 0.5179, "step": 5478 }, { "epoch": 1.5161558154016466, "grad_norm": 0.1686241626739502, "learning_rate": 1e-05, "loss": 0.5287, "step": 5479 }, { "epoch": 1.5164325745519962, "grad_norm": 0.17550215125083923, "learning_rate": 1e-05, "loss": 0.489, "step": 5480 }, { "epoch": 1.5167093337023454, "grad_norm": 0.17380614578723907, "learning_rate": 1e-05, "loss": 0.5198, "step": 5481 }, { "epoch": 1.516986092852695, "grad_norm": 0.1682775467634201, "learning_rate": 1e-05, "loss": 0.5154, "step": 5482 }, { "epoch": 1.5172628520030442, "grad_norm": 0.17398399114608765, "learning_rate": 1e-05, "loss": 0.5273, "step": 5483 }, { "epoch": 1.5175396111533939, "grad_norm": 0.17570024728775024, "learning_rate": 1e-05, "loss": 0.5296, "step": 5484 }, { "epoch": 1.517816370303743, "grad_norm": 0.1833251416683197, "learning_rate": 1e-05, "loss": 0.4985, "step": 5485 }, { "epoch": 1.5180931294540927, "grad_norm": 0.16657093167304993, "learning_rate": 1e-05, "loss": 0.5088, "step": 5486 }, { "epoch": 1.5183698886044419, "grad_norm": 0.17466062307357788, "learning_rate": 1e-05, "loss": 0.5134, "step": 5487 }, { "epoch": 1.5186466477547915, "grad_norm": 0.19275304675102234, "learning_rate": 1e-05, "loss": 0.5223, "step": 5488 }, { "epoch": 1.5189234069051407, "grad_norm": 0.1681249588727951, "learning_rate": 1e-05, "loss": 0.484, "step": 5489 }, { "epoch": 1.5192001660554904, "grad_norm": 0.17586922645568848, "learning_rate": 1e-05, "loss": 0.4908, "step": 5490 }, { "epoch": 1.5194769252058395, "grad_norm": 0.17000369727611542, "learning_rate": 1e-05, "loss": 0.5042, "step": 5491 }, { "epoch": 1.5197536843561892, "grad_norm": 0.17450009286403656, "learning_rate": 1e-05, "loss": 0.5064, "step": 5492 }, { "epoch": 1.5200304435065384, "grad_norm": 0.1721426099538803, "learning_rate": 1e-05, "loss": 0.4987, "step": 5493 }, { "epoch": 1.5203072026568878, "grad_norm": 0.16500209271907806, "learning_rate": 1e-05, "loss": 0.4999, "step": 5494 }, { "epoch": 1.5205839618072372, "grad_norm": 0.17101052403450012, "learning_rate": 1e-05, "loss": 0.5203, "step": 5495 }, { "epoch": 1.5208607209575866, "grad_norm": 0.17051492631435394, "learning_rate": 1e-05, "loss": 0.5193, "step": 5496 }, { "epoch": 1.521137480107936, "grad_norm": 0.1770852953195572, "learning_rate": 1e-05, "loss": 0.4938, "step": 5497 }, { "epoch": 1.5214142392582855, "grad_norm": 0.174879789352417, "learning_rate": 1e-05, "loss": 0.5077, "step": 5498 }, { "epoch": 1.5216909984086349, "grad_norm": 0.17215582728385925, "learning_rate": 1e-05, "loss": 0.5183, "step": 5499 }, { "epoch": 1.5219677575589843, "grad_norm": 0.17158153653144836, "learning_rate": 1e-05, "loss": 0.5401, "step": 5500 }, { "epoch": 1.5222445167093337, "grad_norm": 0.16462352871894836, "learning_rate": 1e-05, "loss": 0.5016, "step": 5501 }, { "epoch": 1.522521275859683, "grad_norm": 0.16916896402835846, "learning_rate": 1e-05, "loss": 0.5047, "step": 5502 }, { "epoch": 1.5227980350100325, "grad_norm": 0.16956599056720734, "learning_rate": 1e-05, "loss": 0.505, "step": 5503 }, { "epoch": 1.523074794160382, "grad_norm": 0.1659354567527771, "learning_rate": 1e-05, "loss": 0.4906, "step": 5504 }, { "epoch": 1.5233515533107314, "grad_norm": 0.1714855432510376, "learning_rate": 1e-05, "loss": 0.5239, "step": 5505 }, { "epoch": 1.5236283124610808, "grad_norm": 0.17099213600158691, "learning_rate": 1e-05, "loss": 0.5171, "step": 5506 }, { "epoch": 1.5239050716114302, "grad_norm": 0.17373959720134735, "learning_rate": 1e-05, "loss": 0.4894, "step": 5507 }, { "epoch": 1.5241818307617796, "grad_norm": 0.16660667955875397, "learning_rate": 1e-05, "loss": 0.5385, "step": 5508 }, { "epoch": 1.524458589912129, "grad_norm": 0.1683300882577896, "learning_rate": 1e-05, "loss": 0.511, "step": 5509 }, { "epoch": 1.5247353490624784, "grad_norm": 0.17867718636989594, "learning_rate": 1e-05, "loss": 0.5046, "step": 5510 }, { "epoch": 1.5250121082128278, "grad_norm": 0.17685388028621674, "learning_rate": 1e-05, "loss": 0.4998, "step": 5511 }, { "epoch": 1.525288867363177, "grad_norm": 0.16855835914611816, "learning_rate": 1e-05, "loss": 0.5165, "step": 5512 }, { "epoch": 1.5255656265135267, "grad_norm": 0.17251071333885193, "learning_rate": 1e-05, "loss": 0.4876, "step": 5513 }, { "epoch": 1.5258423856638759, "grad_norm": 0.16869157552719116, "learning_rate": 1e-05, "loss": 0.4912, "step": 5514 }, { "epoch": 1.5261191448142255, "grad_norm": 0.16754625737667084, "learning_rate": 1e-05, "loss": 0.5069, "step": 5515 }, { "epoch": 1.5263959039645747, "grad_norm": 0.1733274906873703, "learning_rate": 1e-05, "loss": 0.4792, "step": 5516 }, { "epoch": 1.5266726631149243, "grad_norm": 0.17318284511566162, "learning_rate": 1e-05, "loss": 0.5067, "step": 5517 }, { "epoch": 1.5269494222652735, "grad_norm": 0.1773216873407364, "learning_rate": 1e-05, "loss": 0.5052, "step": 5518 }, { "epoch": 1.5272261814156232, "grad_norm": 0.17114794254302979, "learning_rate": 1e-05, "loss": 0.5248, "step": 5519 }, { "epoch": 1.5275029405659724, "grad_norm": 0.1699824184179306, "learning_rate": 1e-05, "loss": 0.4907, "step": 5520 }, { "epoch": 1.527779699716322, "grad_norm": 0.16477738320827484, "learning_rate": 1e-05, "loss": 0.5104, "step": 5521 }, { "epoch": 1.5280564588666712, "grad_norm": 0.17544320225715637, "learning_rate": 1e-05, "loss": 0.5205, "step": 5522 }, { "epoch": 1.5283332180170208, "grad_norm": 0.17763029038906097, "learning_rate": 1e-05, "loss": 0.5128, "step": 5523 }, { "epoch": 1.52860997716737, "grad_norm": 0.16578443348407745, "learning_rate": 1e-05, "loss": 0.4873, "step": 5524 }, { "epoch": 1.5288867363177197, "grad_norm": 0.18242493271827698, "learning_rate": 1e-05, "loss": 0.5162, "step": 5525 }, { "epoch": 1.5291634954680688, "grad_norm": 0.18760471045970917, "learning_rate": 1e-05, "loss": 0.5213, "step": 5526 }, { "epoch": 1.5294402546184185, "grad_norm": 0.17180529236793518, "learning_rate": 1e-05, "loss": 0.5302, "step": 5527 }, { "epoch": 1.5297170137687677, "grad_norm": 0.17982308566570282, "learning_rate": 1e-05, "loss": 0.5121, "step": 5528 }, { "epoch": 1.529993772919117, "grad_norm": 0.17373868823051453, "learning_rate": 1e-05, "loss": 0.5228, "step": 5529 }, { "epoch": 1.5302705320694665, "grad_norm": 0.17284032702445984, "learning_rate": 1e-05, "loss": 0.4981, "step": 5530 }, { "epoch": 1.530547291219816, "grad_norm": 0.17527534067630768, "learning_rate": 1e-05, "loss": 0.5116, "step": 5531 }, { "epoch": 1.5308240503701653, "grad_norm": 0.18018178641796112, "learning_rate": 1e-05, "loss": 0.4945, "step": 5532 }, { "epoch": 1.5311008095205147, "grad_norm": 0.17294028401374817, "learning_rate": 1e-05, "loss": 0.5198, "step": 5533 }, { "epoch": 1.5313775686708642, "grad_norm": 0.17064040899276733, "learning_rate": 1e-05, "loss": 0.5241, "step": 5534 }, { "epoch": 1.5316543278212136, "grad_norm": 0.17617972195148468, "learning_rate": 1e-05, "loss": 0.5184, "step": 5535 }, { "epoch": 1.531931086971563, "grad_norm": 0.17074741423130035, "learning_rate": 1e-05, "loss": 0.5293, "step": 5536 }, { "epoch": 1.5322078461219124, "grad_norm": 0.17156054079532623, "learning_rate": 1e-05, "loss": 0.4972, "step": 5537 }, { "epoch": 1.5324846052722618, "grad_norm": 0.18191182613372803, "learning_rate": 1e-05, "loss": 0.5063, "step": 5538 }, { "epoch": 1.5327613644226112, "grad_norm": 0.1670464277267456, "learning_rate": 1e-05, "loss": 0.5018, "step": 5539 }, { "epoch": 1.5330381235729607, "grad_norm": 0.16591855883598328, "learning_rate": 1e-05, "loss": 0.5168, "step": 5540 }, { "epoch": 1.53331488272331, "grad_norm": 0.18273968994617462, "learning_rate": 1e-05, "loss": 0.5207, "step": 5541 }, { "epoch": 1.5335916418736595, "grad_norm": 0.17526015639305115, "learning_rate": 1e-05, "loss": 0.5103, "step": 5542 }, { "epoch": 1.533868401024009, "grad_norm": 0.16978634893894196, "learning_rate": 1e-05, "loss": 0.4956, "step": 5543 }, { "epoch": 1.5341451601743583, "grad_norm": 0.1787637323141098, "learning_rate": 1e-05, "loss": 0.4903, "step": 5544 }, { "epoch": 1.5344219193247077, "grad_norm": 0.1719396710395813, "learning_rate": 1e-05, "loss": 0.5092, "step": 5545 }, { "epoch": 1.5346986784750571, "grad_norm": 0.17120912671089172, "learning_rate": 1e-05, "loss": 0.4799, "step": 5546 }, { "epoch": 1.5349754376254063, "grad_norm": 0.17518280446529388, "learning_rate": 1e-05, "loss": 0.5289, "step": 5547 }, { "epoch": 1.535252196775756, "grad_norm": 0.17256920039653778, "learning_rate": 1e-05, "loss": 0.5014, "step": 5548 }, { "epoch": 1.5355289559261052, "grad_norm": 0.177601158618927, "learning_rate": 1e-05, "loss": 0.5274, "step": 5549 }, { "epoch": 1.5358057150764548, "grad_norm": 0.2387613207101822, "learning_rate": 1e-05, "loss": 0.5052, "step": 5550 }, { "epoch": 1.536082474226804, "grad_norm": 0.17032182216644287, "learning_rate": 1e-05, "loss": 0.5298, "step": 5551 }, { "epoch": 1.5363592333771536, "grad_norm": 0.17871439456939697, "learning_rate": 1e-05, "loss": 0.4976, "step": 5552 }, { "epoch": 1.5366359925275028, "grad_norm": 0.16195665299892426, "learning_rate": 1e-05, "loss": 0.5301, "step": 5553 }, { "epoch": 1.5369127516778525, "grad_norm": 0.1718081831932068, "learning_rate": 1e-05, "loss": 0.499, "step": 5554 }, { "epoch": 1.5371895108282017, "grad_norm": 0.1767008751630783, "learning_rate": 1e-05, "loss": 0.5101, "step": 5555 }, { "epoch": 1.5374662699785513, "grad_norm": 0.1739596724510193, "learning_rate": 1e-05, "loss": 0.5082, "step": 5556 }, { "epoch": 1.5377430291289005, "grad_norm": 0.18191483616828918, "learning_rate": 1e-05, "loss": 0.4843, "step": 5557 }, { "epoch": 1.5380197882792501, "grad_norm": 0.16535553336143494, "learning_rate": 1e-05, "loss": 0.5212, "step": 5558 }, { "epoch": 1.5382965474295993, "grad_norm": 0.17453889548778534, "learning_rate": 1e-05, "loss": 0.5242, "step": 5559 }, { "epoch": 1.538573306579949, "grad_norm": 0.16963940858840942, "learning_rate": 1e-05, "loss": 0.5018, "step": 5560 }, { "epoch": 1.5388500657302981, "grad_norm": 0.1690494269132614, "learning_rate": 1e-05, "loss": 0.4958, "step": 5561 }, { "epoch": 1.5391268248806478, "grad_norm": 0.16752950847148895, "learning_rate": 1e-05, "loss": 0.4963, "step": 5562 }, { "epoch": 1.539403584030997, "grad_norm": 0.1677865982055664, "learning_rate": 1e-05, "loss": 0.503, "step": 5563 }, { "epoch": 1.5396803431813464, "grad_norm": 0.1757618486881256, "learning_rate": 1e-05, "loss": 0.5171, "step": 5564 }, { "epoch": 1.5399571023316958, "grad_norm": 0.1700177639722824, "learning_rate": 1e-05, "loss": 0.5067, "step": 5565 }, { "epoch": 1.5402338614820452, "grad_norm": 0.1643906682729721, "learning_rate": 1e-05, "loss": 0.4921, "step": 5566 }, { "epoch": 1.5405106206323946, "grad_norm": 0.1705968827009201, "learning_rate": 1e-05, "loss": 0.4841, "step": 5567 }, { "epoch": 1.540787379782744, "grad_norm": 0.1851474940776825, "learning_rate": 1e-05, "loss": 0.539, "step": 5568 }, { "epoch": 1.5410641389330935, "grad_norm": 0.17286232113838196, "learning_rate": 1e-05, "loss": 0.5046, "step": 5569 }, { "epoch": 1.5413408980834429, "grad_norm": 0.17292995750904083, "learning_rate": 1e-05, "loss": 0.515, "step": 5570 }, { "epoch": 1.5416176572337923, "grad_norm": 0.17470748722553253, "learning_rate": 1e-05, "loss": 0.529, "step": 5571 }, { "epoch": 1.5418944163841417, "grad_norm": 0.16533440351486206, "learning_rate": 1e-05, "loss": 0.4858, "step": 5572 }, { "epoch": 1.5421711755344911, "grad_norm": 0.1769029051065445, "learning_rate": 1e-05, "loss": 0.489, "step": 5573 }, { "epoch": 1.5424479346848405, "grad_norm": 0.17287257313728333, "learning_rate": 1e-05, "loss": 0.5153, "step": 5574 }, { "epoch": 1.54272469383519, "grad_norm": 0.1706818789243698, "learning_rate": 1e-05, "loss": 0.5222, "step": 5575 }, { "epoch": 1.5430014529855394, "grad_norm": 0.17794375121593475, "learning_rate": 1e-05, "loss": 0.5007, "step": 5576 }, { "epoch": 1.5432782121358888, "grad_norm": 0.17651726305484772, "learning_rate": 1e-05, "loss": 0.5486, "step": 5577 }, { "epoch": 1.5435549712862382, "grad_norm": 0.17145420610904694, "learning_rate": 1e-05, "loss": 0.5178, "step": 5578 }, { "epoch": 1.5438317304365876, "grad_norm": 0.16230034828186035, "learning_rate": 1e-05, "loss": 0.5093, "step": 5579 }, { "epoch": 1.5441084895869368, "grad_norm": 0.17784324288368225, "learning_rate": 1e-05, "loss": 0.4972, "step": 5580 }, { "epoch": 1.5443852487372864, "grad_norm": 0.16570639610290527, "learning_rate": 1e-05, "loss": 0.5228, "step": 5581 }, { "epoch": 1.5446620078876356, "grad_norm": 0.16963781416416168, "learning_rate": 1e-05, "loss": 0.5117, "step": 5582 }, { "epoch": 1.5449387670379853, "grad_norm": 0.1836441606283188, "learning_rate": 1e-05, "loss": 0.4989, "step": 5583 }, { "epoch": 1.5452155261883345, "grad_norm": 0.16546493768692017, "learning_rate": 1e-05, "loss": 0.4961, "step": 5584 }, { "epoch": 1.545492285338684, "grad_norm": 0.17037761211395264, "learning_rate": 1e-05, "loss": 0.5024, "step": 5585 }, { "epoch": 1.5457690444890333, "grad_norm": 0.16376586258411407, "learning_rate": 1e-05, "loss": 0.4927, "step": 5586 }, { "epoch": 1.546045803639383, "grad_norm": 0.1780576854944229, "learning_rate": 1e-05, "loss": 0.5142, "step": 5587 }, { "epoch": 1.5463225627897321, "grad_norm": 0.16772696375846863, "learning_rate": 1e-05, "loss": 0.4959, "step": 5588 }, { "epoch": 1.5465993219400818, "grad_norm": 0.17148491740226746, "learning_rate": 1e-05, "loss": 0.52, "step": 5589 }, { "epoch": 1.546876081090431, "grad_norm": 0.1795651763677597, "learning_rate": 1e-05, "loss": 0.5202, "step": 5590 }, { "epoch": 1.5471528402407806, "grad_norm": 0.1716371774673462, "learning_rate": 1e-05, "loss": 0.5241, "step": 5591 }, { "epoch": 1.5474295993911298, "grad_norm": 0.16768237948417664, "learning_rate": 1e-05, "loss": 0.5181, "step": 5592 }, { "epoch": 1.5477063585414794, "grad_norm": 0.17581747472286224, "learning_rate": 1e-05, "loss": 0.4984, "step": 5593 }, { "epoch": 1.5479831176918286, "grad_norm": 0.1637684404850006, "learning_rate": 1e-05, "loss": 0.5043, "step": 5594 }, { "epoch": 1.5482598768421783, "grad_norm": 0.1724802851676941, "learning_rate": 1e-05, "loss": 0.4744, "step": 5595 }, { "epoch": 1.5485366359925274, "grad_norm": 0.1651802510023117, "learning_rate": 1e-05, "loss": 0.4897, "step": 5596 }, { "epoch": 1.5488133951428769, "grad_norm": 0.16847868263721466, "learning_rate": 1e-05, "loss": 0.5026, "step": 5597 }, { "epoch": 1.5490901542932263, "grad_norm": 0.18243341147899628, "learning_rate": 1e-05, "loss": 0.5288, "step": 5598 }, { "epoch": 1.5493669134435757, "grad_norm": 0.17969931662082672, "learning_rate": 1e-05, "loss": 0.5, "step": 5599 }, { "epoch": 1.549643672593925, "grad_norm": 0.1641029417514801, "learning_rate": 1e-05, "loss": 0.5283, "step": 5600 }, { "epoch": 1.5499204317442745, "grad_norm": 0.17507734894752502, "learning_rate": 1e-05, "loss": 0.5236, "step": 5601 }, { "epoch": 1.550197190894624, "grad_norm": 0.17275892198085785, "learning_rate": 1e-05, "loss": 0.5022, "step": 5602 }, { "epoch": 1.5504739500449733, "grad_norm": 0.17389962077140808, "learning_rate": 1e-05, "loss": 0.5233, "step": 5603 }, { "epoch": 1.5507507091953228, "grad_norm": 0.16537395119667053, "learning_rate": 1e-05, "loss": 0.5211, "step": 5604 }, { "epoch": 1.5510274683456722, "grad_norm": 0.17696596682071686, "learning_rate": 1e-05, "loss": 0.4987, "step": 5605 }, { "epoch": 1.5513042274960216, "grad_norm": 0.16746583580970764, "learning_rate": 1e-05, "loss": 0.4576, "step": 5606 }, { "epoch": 1.551580986646371, "grad_norm": 0.17682461440563202, "learning_rate": 1e-05, "loss": 0.4952, "step": 5607 }, { "epoch": 1.5518577457967204, "grad_norm": 0.17310534417629242, "learning_rate": 1e-05, "loss": 0.5263, "step": 5608 }, { "epoch": 1.5521345049470698, "grad_norm": 0.1740623563528061, "learning_rate": 1e-05, "loss": 0.4847, "step": 5609 }, { "epoch": 1.5524112640974193, "grad_norm": 0.17232057452201843, "learning_rate": 1e-05, "loss": 0.5145, "step": 5610 }, { "epoch": 1.5526880232477687, "grad_norm": 0.17455489933490753, "learning_rate": 1e-05, "loss": 0.5066, "step": 5611 }, { "epoch": 1.552964782398118, "grad_norm": 0.17282025516033173, "learning_rate": 1e-05, "loss": 0.5078, "step": 5612 }, { "epoch": 1.5532415415484675, "grad_norm": 0.1731119304895401, "learning_rate": 1e-05, "loss": 0.5261, "step": 5613 }, { "epoch": 1.553518300698817, "grad_norm": 0.1712118238210678, "learning_rate": 1e-05, "loss": 0.5082, "step": 5614 }, { "epoch": 1.553795059849166, "grad_norm": 0.1708184778690338, "learning_rate": 1e-05, "loss": 0.4977, "step": 5615 }, { "epoch": 1.5540718189995157, "grad_norm": 0.1803012639284134, "learning_rate": 1e-05, "loss": 0.5261, "step": 5616 }, { "epoch": 1.554348578149865, "grad_norm": 0.1672341376543045, "learning_rate": 1e-05, "loss": 0.4905, "step": 5617 }, { "epoch": 1.5546253373002146, "grad_norm": 0.17729267477989197, "learning_rate": 1e-05, "loss": 0.5415, "step": 5618 }, { "epoch": 1.5549020964505638, "grad_norm": 0.16053657233715057, "learning_rate": 1e-05, "loss": 0.4959, "step": 5619 }, { "epoch": 1.5551788556009134, "grad_norm": 0.1740211844444275, "learning_rate": 1e-05, "loss": 0.5114, "step": 5620 }, { "epoch": 1.5554556147512626, "grad_norm": 0.17874187231063843, "learning_rate": 1e-05, "loss": 0.5404, "step": 5621 }, { "epoch": 1.5557323739016122, "grad_norm": 0.17429037392139435, "learning_rate": 1e-05, "loss": 0.5423, "step": 5622 }, { "epoch": 1.5560091330519614, "grad_norm": 0.17587517201900482, "learning_rate": 1e-05, "loss": 0.5084, "step": 5623 }, { "epoch": 1.556285892202311, "grad_norm": 0.17288078367710114, "learning_rate": 1e-05, "loss": 0.5149, "step": 5624 }, { "epoch": 1.5565626513526603, "grad_norm": 0.17831695079803467, "learning_rate": 1e-05, "loss": 0.5054, "step": 5625 }, { "epoch": 1.55683941050301, "grad_norm": 0.17287221550941467, "learning_rate": 1e-05, "loss": 0.5163, "step": 5626 }, { "epoch": 1.557116169653359, "grad_norm": 0.16309259831905365, "learning_rate": 1e-05, "loss": 0.4793, "step": 5627 }, { "epoch": 1.5573929288037087, "grad_norm": 0.1731535941362381, "learning_rate": 1e-05, "loss": 0.5251, "step": 5628 }, { "epoch": 1.557669687954058, "grad_norm": 0.1668880432844162, "learning_rate": 1e-05, "loss": 0.516, "step": 5629 }, { "epoch": 1.5579464471044076, "grad_norm": 0.17174455523490906, "learning_rate": 1e-05, "loss": 0.5023, "step": 5630 }, { "epoch": 1.5582232062547567, "grad_norm": 0.1661345511674881, "learning_rate": 1e-05, "loss": 0.5126, "step": 5631 }, { "epoch": 1.5584999654051062, "grad_norm": 0.17274552583694458, "learning_rate": 1e-05, "loss": 0.5094, "step": 5632 }, { "epoch": 1.5587767245554556, "grad_norm": 0.16545812785625458, "learning_rate": 1e-05, "loss": 0.4907, "step": 5633 }, { "epoch": 1.559053483705805, "grad_norm": 0.17161326110363007, "learning_rate": 1e-05, "loss": 0.496, "step": 5634 }, { "epoch": 1.5593302428561544, "grad_norm": 0.17941541969776154, "learning_rate": 1e-05, "loss": 0.5122, "step": 5635 }, { "epoch": 1.5596070020065038, "grad_norm": 0.17165735363960266, "learning_rate": 1e-05, "loss": 0.5157, "step": 5636 }, { "epoch": 1.5598837611568532, "grad_norm": 0.1655459851026535, "learning_rate": 1e-05, "loss": 0.519, "step": 5637 }, { "epoch": 1.5601605203072026, "grad_norm": 0.17070922255516052, "learning_rate": 1e-05, "loss": 0.5028, "step": 5638 }, { "epoch": 1.560437279457552, "grad_norm": 0.17112073302268982, "learning_rate": 1e-05, "loss": 0.5175, "step": 5639 }, { "epoch": 1.5607140386079015, "grad_norm": 0.1671195775270462, "learning_rate": 1e-05, "loss": 0.5308, "step": 5640 }, { "epoch": 1.560990797758251, "grad_norm": 0.17106610536575317, "learning_rate": 1e-05, "loss": 0.5461, "step": 5641 }, { "epoch": 1.5612675569086003, "grad_norm": 0.17259426414966583, "learning_rate": 1e-05, "loss": 0.5219, "step": 5642 }, { "epoch": 1.5615443160589497, "grad_norm": 0.17392605543136597, "learning_rate": 1e-05, "loss": 0.5253, "step": 5643 }, { "epoch": 1.5618210752092991, "grad_norm": 0.17072153091430664, "learning_rate": 1e-05, "loss": 0.4922, "step": 5644 }, { "epoch": 1.5620978343596486, "grad_norm": 0.16933976113796234, "learning_rate": 1e-05, "loss": 0.509, "step": 5645 }, { "epoch": 1.562374593509998, "grad_norm": 0.1742142289876938, "learning_rate": 1e-05, "loss": 0.5129, "step": 5646 }, { "epoch": 1.5626513526603474, "grad_norm": 0.16656316816806793, "learning_rate": 1e-05, "loss": 0.5023, "step": 5647 }, { "epoch": 1.5629281118106968, "grad_norm": 0.1713055670261383, "learning_rate": 1e-05, "loss": 0.5204, "step": 5648 }, { "epoch": 1.5632048709610462, "grad_norm": 0.1755751073360443, "learning_rate": 1e-05, "loss": 0.4958, "step": 5649 }, { "epoch": 1.5634816301113954, "grad_norm": 0.175009086728096, "learning_rate": 1e-05, "loss": 0.5025, "step": 5650 }, { "epoch": 1.563758389261745, "grad_norm": 0.1777065098285675, "learning_rate": 1e-05, "loss": 0.5006, "step": 5651 }, { "epoch": 1.5640351484120942, "grad_norm": 0.1740075647830963, "learning_rate": 1e-05, "loss": 0.5035, "step": 5652 }, { "epoch": 1.5643119075624439, "grad_norm": 0.1738838255405426, "learning_rate": 1e-05, "loss": 0.4894, "step": 5653 }, { "epoch": 1.564588666712793, "grad_norm": 0.17790064215660095, "learning_rate": 1e-05, "loss": 0.5314, "step": 5654 }, { "epoch": 1.5648654258631427, "grad_norm": 0.16342276334762573, "learning_rate": 1e-05, "loss": 0.4878, "step": 5655 }, { "epoch": 1.565142185013492, "grad_norm": 0.16411541402339935, "learning_rate": 1e-05, "loss": 0.5057, "step": 5656 }, { "epoch": 1.5654189441638415, "grad_norm": 0.17054811120033264, "learning_rate": 1e-05, "loss": 0.5008, "step": 5657 }, { "epoch": 1.5656957033141907, "grad_norm": 0.17036744952201843, "learning_rate": 1e-05, "loss": 0.502, "step": 5658 }, { "epoch": 1.5659724624645404, "grad_norm": 0.17090938985347748, "learning_rate": 1e-05, "loss": 0.4712, "step": 5659 }, { "epoch": 1.5662492216148896, "grad_norm": 0.16640573740005493, "learning_rate": 1e-05, "loss": 0.4985, "step": 5660 }, { "epoch": 1.5665259807652392, "grad_norm": 0.1697583943605423, "learning_rate": 1e-05, "loss": 0.4801, "step": 5661 }, { "epoch": 1.5668027399155884, "grad_norm": 0.16962522268295288, "learning_rate": 1e-05, "loss": 0.4965, "step": 5662 }, { "epoch": 1.567079499065938, "grad_norm": 0.17529359459877014, "learning_rate": 1e-05, "loss": 0.4969, "step": 5663 }, { "epoch": 1.5673562582162872, "grad_norm": 0.16958259046077728, "learning_rate": 1e-05, "loss": 0.5125, "step": 5664 }, { "epoch": 1.5676330173666368, "grad_norm": 0.16812457144260406, "learning_rate": 1e-05, "loss": 0.5118, "step": 5665 }, { "epoch": 1.567909776516986, "grad_norm": 0.17255106568336487, "learning_rate": 1e-05, "loss": 0.4945, "step": 5666 }, { "epoch": 1.5681865356673355, "grad_norm": 0.17178675532341003, "learning_rate": 1e-05, "loss": 0.5, "step": 5667 }, { "epoch": 1.5684632948176849, "grad_norm": 0.17581430077552795, "learning_rate": 1e-05, "loss": 0.5339, "step": 5668 }, { "epoch": 1.5687400539680343, "grad_norm": 0.17300571501255035, "learning_rate": 1e-05, "loss": 0.5119, "step": 5669 }, { "epoch": 1.5690168131183837, "grad_norm": 0.1717594861984253, "learning_rate": 1e-05, "loss": 0.4857, "step": 5670 }, { "epoch": 1.5692935722687331, "grad_norm": 0.16397596895694733, "learning_rate": 1e-05, "loss": 0.5122, "step": 5671 }, { "epoch": 1.5695703314190825, "grad_norm": 0.16672024130821228, "learning_rate": 1e-05, "loss": 0.5341, "step": 5672 }, { "epoch": 1.569847090569432, "grad_norm": 0.18337036669254303, "learning_rate": 1e-05, "loss": 0.5238, "step": 5673 }, { "epoch": 1.5701238497197814, "grad_norm": 0.17633889615535736, "learning_rate": 1e-05, "loss": 0.5, "step": 5674 }, { "epoch": 1.5704006088701308, "grad_norm": 0.16392289102077484, "learning_rate": 1e-05, "loss": 0.4878, "step": 5675 }, { "epoch": 1.5706773680204802, "grad_norm": 0.16510912775993347, "learning_rate": 1e-05, "loss": 0.4785, "step": 5676 }, { "epoch": 1.5709541271708296, "grad_norm": 0.16807737946510315, "learning_rate": 1e-05, "loss": 0.5215, "step": 5677 }, { "epoch": 1.571230886321179, "grad_norm": 0.1628909558057785, "learning_rate": 1e-05, "loss": 0.4965, "step": 5678 }, { "epoch": 1.5715076454715284, "grad_norm": 0.17603273689746857, "learning_rate": 1e-05, "loss": 0.506, "step": 5679 }, { "epoch": 1.5717844046218779, "grad_norm": 0.17621955275535583, "learning_rate": 1e-05, "loss": 0.5098, "step": 5680 }, { "epoch": 1.5720611637722273, "grad_norm": 0.17257875204086304, "learning_rate": 1e-05, "loss": 0.5287, "step": 5681 }, { "epoch": 1.5723379229225767, "grad_norm": 0.16752870380878448, "learning_rate": 1e-05, "loss": 0.4855, "step": 5682 }, { "epoch": 1.5726146820729259, "grad_norm": 0.16528524458408356, "learning_rate": 1e-05, "loss": 0.4981, "step": 5683 }, { "epoch": 1.5728914412232755, "grad_norm": 0.17386917769908905, "learning_rate": 1e-05, "loss": 0.5229, "step": 5684 }, { "epoch": 1.5731682003736247, "grad_norm": 0.1763981729745865, "learning_rate": 1e-05, "loss": 0.5287, "step": 5685 }, { "epoch": 1.5734449595239743, "grad_norm": 0.17385125160217285, "learning_rate": 1e-05, "loss": 0.4924, "step": 5686 }, { "epoch": 1.5737217186743235, "grad_norm": 0.1797492504119873, "learning_rate": 1e-05, "loss": 0.5088, "step": 5687 }, { "epoch": 1.5739984778246732, "grad_norm": 0.175181582570076, "learning_rate": 1e-05, "loss": 0.5196, "step": 5688 }, { "epoch": 1.5742752369750224, "grad_norm": 0.16993966698646545, "learning_rate": 1e-05, "loss": 0.5026, "step": 5689 }, { "epoch": 1.574551996125372, "grad_norm": 0.16818590462207794, "learning_rate": 1e-05, "loss": 0.5164, "step": 5690 }, { "epoch": 1.5748287552757212, "grad_norm": 0.17485186457633972, "learning_rate": 1e-05, "loss": 0.5163, "step": 5691 }, { "epoch": 1.5751055144260708, "grad_norm": 0.17355135083198547, "learning_rate": 1e-05, "loss": 0.4858, "step": 5692 }, { "epoch": 1.57538227357642, "grad_norm": 0.1675323247909546, "learning_rate": 1e-05, "loss": 0.4943, "step": 5693 }, { "epoch": 1.5756590327267697, "grad_norm": 0.168426051735878, "learning_rate": 1e-05, "loss": 0.5079, "step": 5694 }, { "epoch": 1.5759357918771189, "grad_norm": 0.1622667759656906, "learning_rate": 1e-05, "loss": 0.4899, "step": 5695 }, { "epoch": 1.5762125510274685, "grad_norm": 0.16612057387828827, "learning_rate": 1e-05, "loss": 0.4876, "step": 5696 }, { "epoch": 1.5764893101778177, "grad_norm": 0.16965220868587494, "learning_rate": 1e-05, "loss": 0.4905, "step": 5697 }, { "epoch": 1.5767660693281673, "grad_norm": 0.16889189183712006, "learning_rate": 1e-05, "loss": 0.5079, "step": 5698 }, { "epoch": 1.5770428284785165, "grad_norm": 0.17795978486537933, "learning_rate": 1e-05, "loss": 0.5056, "step": 5699 }, { "epoch": 1.577319587628866, "grad_norm": 0.1831134408712387, "learning_rate": 1e-05, "loss": 0.4809, "step": 5700 }, { "epoch": 1.5775963467792153, "grad_norm": 0.1694590300321579, "learning_rate": 1e-05, "loss": 0.4882, "step": 5701 }, { "epoch": 1.5778731059295648, "grad_norm": 0.18013592064380646, "learning_rate": 1e-05, "loss": 0.5076, "step": 5702 }, { "epoch": 1.5781498650799142, "grad_norm": 0.17952880263328552, "learning_rate": 1e-05, "loss": 0.4788, "step": 5703 }, { "epoch": 1.5784266242302636, "grad_norm": 0.1640058159828186, "learning_rate": 1e-05, "loss": 0.5088, "step": 5704 }, { "epoch": 1.578703383380613, "grad_norm": 0.1649142950773239, "learning_rate": 1e-05, "loss": 0.5212, "step": 5705 }, { "epoch": 1.5789801425309624, "grad_norm": 0.18179593980312347, "learning_rate": 1e-05, "loss": 0.503, "step": 5706 }, { "epoch": 1.5792569016813118, "grad_norm": 0.16977499425411224, "learning_rate": 1e-05, "loss": 0.5117, "step": 5707 }, { "epoch": 1.5795336608316612, "grad_norm": 0.17162209749221802, "learning_rate": 1e-05, "loss": 0.5262, "step": 5708 }, { "epoch": 1.5798104199820107, "grad_norm": 0.17586950957775116, "learning_rate": 1e-05, "loss": 0.52, "step": 5709 }, { "epoch": 1.58008717913236, "grad_norm": 0.16644896566867828, "learning_rate": 1e-05, "loss": 0.4967, "step": 5710 }, { "epoch": 1.5803639382827095, "grad_norm": 0.1671600341796875, "learning_rate": 1e-05, "loss": 0.4962, "step": 5711 }, { "epoch": 1.580640697433059, "grad_norm": 0.17468048632144928, "learning_rate": 1e-05, "loss": 0.5175, "step": 5712 }, { "epoch": 1.5809174565834083, "grad_norm": 0.1760796308517456, "learning_rate": 1e-05, "loss": 0.5338, "step": 5713 }, { "epoch": 1.5811942157337577, "grad_norm": 0.17441561818122864, "learning_rate": 1e-05, "loss": 0.5153, "step": 5714 }, { "epoch": 1.5814709748841072, "grad_norm": 0.16908729076385498, "learning_rate": 1e-05, "loss": 0.5078, "step": 5715 }, { "epoch": 1.5817477340344566, "grad_norm": 0.1695844680070877, "learning_rate": 1e-05, "loss": 0.4963, "step": 5716 }, { "epoch": 1.582024493184806, "grad_norm": 0.1710767149925232, "learning_rate": 1e-05, "loss": 0.4809, "step": 5717 }, { "epoch": 1.5823012523351552, "grad_norm": 0.16401228308677673, "learning_rate": 1e-05, "loss": 0.4948, "step": 5718 }, { "epoch": 1.5825780114855048, "grad_norm": 0.18401041626930237, "learning_rate": 1e-05, "loss": 0.5335, "step": 5719 }, { "epoch": 1.582854770635854, "grad_norm": 0.17032040655612946, "learning_rate": 1e-05, "loss": 0.5205, "step": 5720 }, { "epoch": 1.5831315297862036, "grad_norm": 0.17121270298957825, "learning_rate": 1e-05, "loss": 0.5184, "step": 5721 }, { "epoch": 1.5834082889365528, "grad_norm": 0.18738654255867004, "learning_rate": 1e-05, "loss": 0.5173, "step": 5722 }, { "epoch": 1.5836850480869025, "grad_norm": 0.16921672224998474, "learning_rate": 1e-05, "loss": 0.4852, "step": 5723 }, { "epoch": 1.5839618072372517, "grad_norm": 0.16714975237846375, "learning_rate": 1e-05, "loss": 0.521, "step": 5724 }, { "epoch": 1.5842385663876013, "grad_norm": 0.17190971970558167, "learning_rate": 1e-05, "loss": 0.5465, "step": 5725 }, { "epoch": 1.5845153255379505, "grad_norm": 0.18024985492229462, "learning_rate": 1e-05, "loss": 0.5201, "step": 5726 }, { "epoch": 1.5847920846883001, "grad_norm": 0.17099560797214508, "learning_rate": 1e-05, "loss": 0.4947, "step": 5727 }, { "epoch": 1.5850688438386493, "grad_norm": 0.17442205548286438, "learning_rate": 1e-05, "loss": 0.4818, "step": 5728 }, { "epoch": 1.585345602988999, "grad_norm": 0.17915278673171997, "learning_rate": 1e-05, "loss": 0.512, "step": 5729 }, { "epoch": 1.5856223621393482, "grad_norm": 0.16482464969158173, "learning_rate": 1e-05, "loss": 0.4955, "step": 5730 }, { "epoch": 1.5858991212896978, "grad_norm": 0.17201274633407593, "learning_rate": 1e-05, "loss": 0.4839, "step": 5731 }, { "epoch": 1.586175880440047, "grad_norm": 0.16392379999160767, "learning_rate": 1e-05, "loss": 0.4901, "step": 5732 }, { "epoch": 1.5864526395903966, "grad_norm": 0.17439429461956024, "learning_rate": 1e-05, "loss": 0.5084, "step": 5733 }, { "epoch": 1.5867293987407458, "grad_norm": 0.16551806032657623, "learning_rate": 1e-05, "loss": 0.5029, "step": 5734 }, { "epoch": 1.5870061578910952, "grad_norm": 0.1684618592262268, "learning_rate": 1e-05, "loss": 0.4987, "step": 5735 }, { "epoch": 1.5872829170414446, "grad_norm": 0.16836358606815338, "learning_rate": 1e-05, "loss": 0.5174, "step": 5736 }, { "epoch": 1.587559676191794, "grad_norm": 0.17014817893505096, "learning_rate": 1e-05, "loss": 0.515, "step": 5737 }, { "epoch": 1.5878364353421435, "grad_norm": 0.17169125378131866, "learning_rate": 1e-05, "loss": 0.5153, "step": 5738 }, { "epoch": 1.5881131944924929, "grad_norm": 0.1843564510345459, "learning_rate": 1e-05, "loss": 0.525, "step": 5739 }, { "epoch": 1.5883899536428423, "grad_norm": 0.17947663366794586, "learning_rate": 1e-05, "loss": 0.4901, "step": 5740 }, { "epoch": 1.5886667127931917, "grad_norm": 0.16851016879081726, "learning_rate": 1e-05, "loss": 0.513, "step": 5741 }, { "epoch": 1.5889434719435411, "grad_norm": 0.17058831453323364, "learning_rate": 1e-05, "loss": 0.4926, "step": 5742 }, { "epoch": 1.5892202310938905, "grad_norm": 0.17551659047603607, "learning_rate": 1e-05, "loss": 0.5218, "step": 5743 }, { "epoch": 1.58949699024424, "grad_norm": 0.1746964305639267, "learning_rate": 1e-05, "loss": 0.5032, "step": 5744 }, { "epoch": 1.5897737493945894, "grad_norm": 0.18663282692432404, "learning_rate": 1e-05, "loss": 0.5385, "step": 5745 }, { "epoch": 1.5900505085449388, "grad_norm": 0.17211610078811646, "learning_rate": 1e-05, "loss": 0.5253, "step": 5746 }, { "epoch": 1.5903272676952882, "grad_norm": 0.16983285546302795, "learning_rate": 1e-05, "loss": 0.5174, "step": 5747 }, { "epoch": 1.5906040268456376, "grad_norm": 0.17693859338760376, "learning_rate": 1e-05, "loss": 0.4966, "step": 5748 }, { "epoch": 1.590880785995987, "grad_norm": 0.17965437471866608, "learning_rate": 1e-05, "loss": 0.5012, "step": 5749 }, { "epoch": 1.5911575451463364, "grad_norm": 0.16691359877586365, "learning_rate": 1e-05, "loss": 0.503, "step": 5750 }, { "epoch": 1.5914343042966859, "grad_norm": 0.17010925710201263, "learning_rate": 1e-05, "loss": 0.4973, "step": 5751 }, { "epoch": 1.5917110634470353, "grad_norm": 0.17054122686386108, "learning_rate": 1e-05, "loss": 0.5087, "step": 5752 }, { "epoch": 1.5919878225973845, "grad_norm": 0.17961172759532928, "learning_rate": 1e-05, "loss": 0.5215, "step": 5753 }, { "epoch": 1.592264581747734, "grad_norm": 0.17318986356258392, "learning_rate": 1e-05, "loss": 0.4874, "step": 5754 }, { "epoch": 1.5925413408980833, "grad_norm": 0.16421698033809662, "learning_rate": 1e-05, "loss": 0.5093, "step": 5755 }, { "epoch": 1.592818100048433, "grad_norm": 0.17792829871177673, "learning_rate": 1e-05, "loss": 0.4997, "step": 5756 }, { "epoch": 1.5930948591987821, "grad_norm": 0.1728699654340744, "learning_rate": 1e-05, "loss": 0.5263, "step": 5757 }, { "epoch": 1.5933716183491318, "grad_norm": 0.17665359377861023, "learning_rate": 1e-05, "loss": 0.5164, "step": 5758 }, { "epoch": 1.593648377499481, "grad_norm": 0.18638744950294495, "learning_rate": 1e-05, "loss": 0.5221, "step": 5759 }, { "epoch": 1.5939251366498306, "grad_norm": 0.16749340295791626, "learning_rate": 1e-05, "loss": 0.5133, "step": 5760 }, { "epoch": 1.5942018958001798, "grad_norm": 0.1684061884880066, "learning_rate": 1e-05, "loss": 0.528, "step": 5761 }, { "epoch": 1.5944786549505294, "grad_norm": 0.1724083572626114, "learning_rate": 1e-05, "loss": 0.4877, "step": 5762 }, { "epoch": 1.5947554141008786, "grad_norm": 0.17439088225364685, "learning_rate": 1e-05, "loss": 0.4963, "step": 5763 }, { "epoch": 1.5950321732512283, "grad_norm": 0.19092658162117004, "learning_rate": 1e-05, "loss": 0.4945, "step": 5764 }, { "epoch": 1.5953089324015775, "grad_norm": 0.1660376638174057, "learning_rate": 1e-05, "loss": 0.5237, "step": 5765 }, { "epoch": 1.595585691551927, "grad_norm": 0.16748066246509552, "learning_rate": 1e-05, "loss": 0.5197, "step": 5766 }, { "epoch": 1.5958624507022763, "grad_norm": 0.16689451038837433, "learning_rate": 1e-05, "loss": 0.5168, "step": 5767 }, { "epoch": 1.596139209852626, "grad_norm": 0.1686849296092987, "learning_rate": 1e-05, "loss": 0.4972, "step": 5768 }, { "epoch": 1.596415969002975, "grad_norm": 0.17765061557292938, "learning_rate": 1e-05, "loss": 0.5322, "step": 5769 }, { "epoch": 1.5966927281533245, "grad_norm": 0.17062082886695862, "learning_rate": 1e-05, "loss": 0.4987, "step": 5770 }, { "epoch": 1.596969487303674, "grad_norm": 0.17056040465831757, "learning_rate": 1e-05, "loss": 0.4889, "step": 5771 }, { "epoch": 1.5972462464540234, "grad_norm": 0.16539424657821655, "learning_rate": 1e-05, "loss": 0.505, "step": 5772 }, { "epoch": 1.5975230056043728, "grad_norm": 0.16099949181079865, "learning_rate": 1e-05, "loss": 0.4959, "step": 5773 }, { "epoch": 1.5977997647547222, "grad_norm": 0.16027092933654785, "learning_rate": 1e-05, "loss": 0.4978, "step": 5774 }, { "epoch": 1.5980765239050716, "grad_norm": 0.17607682943344116, "learning_rate": 1e-05, "loss": 0.5077, "step": 5775 }, { "epoch": 1.598353283055421, "grad_norm": 0.1728534698486328, "learning_rate": 1e-05, "loss": 0.512, "step": 5776 }, { "epoch": 1.5986300422057704, "grad_norm": 0.18134160339832306, "learning_rate": 1e-05, "loss": 0.5144, "step": 5777 }, { "epoch": 1.5989068013561198, "grad_norm": 0.1681768000125885, "learning_rate": 1e-05, "loss": 0.5252, "step": 5778 }, { "epoch": 1.5991835605064693, "grad_norm": 0.17211002111434937, "learning_rate": 1e-05, "loss": 0.4944, "step": 5779 }, { "epoch": 1.5994603196568187, "grad_norm": 0.17236308753490448, "learning_rate": 1e-05, "loss": 0.5093, "step": 5780 }, { "epoch": 1.599737078807168, "grad_norm": 0.17409519851207733, "learning_rate": 1e-05, "loss": 0.4885, "step": 5781 }, { "epoch": 1.6000138379575175, "grad_norm": 0.1762915402650833, "learning_rate": 1e-05, "loss": 0.4834, "step": 5782 }, { "epoch": 1.600290597107867, "grad_norm": 0.16670182347297668, "learning_rate": 1e-05, "loss": 0.503, "step": 5783 }, { "epoch": 1.6005673562582163, "grad_norm": 0.16653065383434296, "learning_rate": 1e-05, "loss": 0.5198, "step": 5784 }, { "epoch": 1.6008441154085657, "grad_norm": 0.16849038004875183, "learning_rate": 1e-05, "loss": 0.5104, "step": 5785 }, { "epoch": 1.601120874558915, "grad_norm": 0.17561964690685272, "learning_rate": 1e-05, "loss": 0.5465, "step": 5786 }, { "epoch": 1.6013976337092646, "grad_norm": 0.1799599826335907, "learning_rate": 1e-05, "loss": 0.5247, "step": 5787 }, { "epoch": 1.6016743928596138, "grad_norm": 0.17260953783988953, "learning_rate": 1e-05, "loss": 0.4983, "step": 5788 }, { "epoch": 1.6019511520099634, "grad_norm": 0.1718526929616928, "learning_rate": 1e-05, "loss": 0.4973, "step": 5789 }, { "epoch": 1.6022279111603126, "grad_norm": 0.16982538998126984, "learning_rate": 1e-05, "loss": 0.497, "step": 5790 }, { "epoch": 1.6025046703106622, "grad_norm": 0.16763953864574432, "learning_rate": 1e-05, "loss": 0.4944, "step": 5791 }, { "epoch": 1.6027814294610114, "grad_norm": 0.16556593775749207, "learning_rate": 1e-05, "loss": 0.4967, "step": 5792 }, { "epoch": 1.603058188611361, "grad_norm": 0.176809161901474, "learning_rate": 1e-05, "loss": 0.4888, "step": 5793 }, { "epoch": 1.6033349477617103, "grad_norm": 0.16931764781475067, "learning_rate": 1e-05, "loss": 0.5034, "step": 5794 }, { "epoch": 1.60361170691206, "grad_norm": 0.1682468205690384, "learning_rate": 1e-05, "loss": 0.4983, "step": 5795 }, { "epoch": 1.603888466062409, "grad_norm": 0.17097486555576324, "learning_rate": 1e-05, "loss": 0.493, "step": 5796 }, { "epoch": 1.6041652252127587, "grad_norm": 0.17418880760669708, "learning_rate": 1e-05, "loss": 0.5343, "step": 5797 }, { "epoch": 1.604441984363108, "grad_norm": 0.17516599595546722, "learning_rate": 1e-05, "loss": 0.55, "step": 5798 }, { "epoch": 1.6047187435134576, "grad_norm": 0.1754528284072876, "learning_rate": 1e-05, "loss": 0.5154, "step": 5799 }, { "epoch": 1.6049955026638068, "grad_norm": 0.16541647911071777, "learning_rate": 1e-05, "loss": 0.5209, "step": 5800 }, { "epoch": 1.6052722618141564, "grad_norm": 0.1758469194173813, "learning_rate": 1e-05, "loss": 0.5122, "step": 5801 }, { "epoch": 1.6055490209645056, "grad_norm": 0.17850667238235474, "learning_rate": 1e-05, "loss": 0.488, "step": 5802 }, { "epoch": 1.605825780114855, "grad_norm": 0.16920027136802673, "learning_rate": 1e-05, "loss": 0.4989, "step": 5803 }, { "epoch": 1.6061025392652044, "grad_norm": 0.16550137102603912, "learning_rate": 1e-05, "loss": 0.5119, "step": 5804 }, { "epoch": 1.6063792984155538, "grad_norm": 0.17699706554412842, "learning_rate": 1e-05, "loss": 0.5217, "step": 5805 }, { "epoch": 1.6066560575659032, "grad_norm": 0.16978296637535095, "learning_rate": 1e-05, "loss": 0.518, "step": 5806 }, { "epoch": 1.6069328167162527, "grad_norm": 0.17308253049850464, "learning_rate": 1e-05, "loss": 0.5223, "step": 5807 }, { "epoch": 1.607209575866602, "grad_norm": 0.17000582814216614, "learning_rate": 1e-05, "loss": 0.5268, "step": 5808 }, { "epoch": 1.6074863350169515, "grad_norm": 0.17458511888980865, "learning_rate": 1e-05, "loss": 0.5105, "step": 5809 }, { "epoch": 1.607763094167301, "grad_norm": 0.16577064990997314, "learning_rate": 1e-05, "loss": 0.5134, "step": 5810 }, { "epoch": 1.6080398533176503, "grad_norm": 0.1717449575662613, "learning_rate": 1e-05, "loss": 0.5456, "step": 5811 }, { "epoch": 1.6083166124679997, "grad_norm": 0.16888763010501862, "learning_rate": 1e-05, "loss": 0.4911, "step": 5812 }, { "epoch": 1.6085933716183491, "grad_norm": 0.17645910382270813, "learning_rate": 1e-05, "loss": 0.5272, "step": 5813 }, { "epoch": 1.6088701307686986, "grad_norm": 0.17247799038887024, "learning_rate": 1e-05, "loss": 0.5379, "step": 5814 }, { "epoch": 1.609146889919048, "grad_norm": 0.17288580536842346, "learning_rate": 1e-05, "loss": 0.5192, "step": 5815 }, { "epoch": 1.6094236490693974, "grad_norm": 0.17059844732284546, "learning_rate": 1e-05, "loss": 0.4948, "step": 5816 }, { "epoch": 1.6097004082197468, "grad_norm": 0.17078298330307007, "learning_rate": 1e-05, "loss": 0.4817, "step": 5817 }, { "epoch": 1.6099771673700962, "grad_norm": 0.17406292259693146, "learning_rate": 1e-05, "loss": 0.5033, "step": 5818 }, { "epoch": 1.6102539265204456, "grad_norm": 0.16960349678993225, "learning_rate": 1e-05, "loss": 0.5225, "step": 5819 }, { "epoch": 1.610530685670795, "grad_norm": 0.17600028216838837, "learning_rate": 1e-05, "loss": 0.5356, "step": 5820 }, { "epoch": 1.6108074448211442, "grad_norm": 0.1791086345911026, "learning_rate": 1e-05, "loss": 0.5151, "step": 5821 }, { "epoch": 1.6110842039714939, "grad_norm": 0.16514602303504944, "learning_rate": 1e-05, "loss": 0.5078, "step": 5822 }, { "epoch": 1.611360963121843, "grad_norm": 0.18644843995571136, "learning_rate": 1e-05, "loss": 0.521, "step": 5823 }, { "epoch": 1.6116377222721927, "grad_norm": 0.19340302050113678, "learning_rate": 1e-05, "loss": 0.5148, "step": 5824 }, { "epoch": 1.611914481422542, "grad_norm": 0.16712981462478638, "learning_rate": 1e-05, "loss": 0.5216, "step": 5825 }, { "epoch": 1.6121912405728915, "grad_norm": 0.1711372435092926, "learning_rate": 1e-05, "loss": 0.519, "step": 5826 }, { "epoch": 1.6124679997232407, "grad_norm": 0.17897017300128937, "learning_rate": 1e-05, "loss": 0.4739, "step": 5827 }, { "epoch": 1.6127447588735904, "grad_norm": 0.1698460429906845, "learning_rate": 1e-05, "loss": 0.5077, "step": 5828 }, { "epoch": 1.6130215180239396, "grad_norm": 0.17198219895362854, "learning_rate": 1e-05, "loss": 0.497, "step": 5829 }, { "epoch": 1.6132982771742892, "grad_norm": 0.16914963722229004, "learning_rate": 1e-05, "loss": 0.4965, "step": 5830 }, { "epoch": 1.6135750363246384, "grad_norm": 0.1726287603378296, "learning_rate": 1e-05, "loss": 0.5097, "step": 5831 }, { "epoch": 1.613851795474988, "grad_norm": 0.17174668610095978, "learning_rate": 1e-05, "loss": 0.5111, "step": 5832 }, { "epoch": 1.6141285546253372, "grad_norm": 0.17087653279304504, "learning_rate": 1e-05, "loss": 0.4924, "step": 5833 }, { "epoch": 1.6144053137756869, "grad_norm": 0.16866803169250488, "learning_rate": 1e-05, "loss": 0.5062, "step": 5834 }, { "epoch": 1.614682072926036, "grad_norm": 0.1672082543373108, "learning_rate": 1e-05, "loss": 0.4843, "step": 5835 }, { "epoch": 1.6149588320763857, "grad_norm": 0.1777346432209015, "learning_rate": 1e-05, "loss": 0.4818, "step": 5836 }, { "epoch": 1.6152355912267349, "grad_norm": 0.1639283448457718, "learning_rate": 1e-05, "loss": 0.4948, "step": 5837 }, { "epoch": 1.6155123503770843, "grad_norm": 0.16705986857414246, "learning_rate": 1e-05, "loss": 0.5084, "step": 5838 }, { "epoch": 1.6157891095274337, "grad_norm": 0.17508046329021454, "learning_rate": 1e-05, "loss": 0.5038, "step": 5839 }, { "epoch": 1.6160658686777831, "grad_norm": 0.16575029492378235, "learning_rate": 1e-05, "loss": 0.4747, "step": 5840 }, { "epoch": 1.6163426278281325, "grad_norm": 0.17151056230068207, "learning_rate": 1e-05, "loss": 0.4982, "step": 5841 }, { "epoch": 1.616619386978482, "grad_norm": 0.1639341562986374, "learning_rate": 1e-05, "loss": 0.5241, "step": 5842 }, { "epoch": 1.6168961461288314, "grad_norm": 0.17360158264636993, "learning_rate": 1e-05, "loss": 0.5133, "step": 5843 }, { "epoch": 1.6171729052791808, "grad_norm": 0.17571304738521576, "learning_rate": 1e-05, "loss": 0.5003, "step": 5844 }, { "epoch": 1.6174496644295302, "grad_norm": 0.1751132309436798, "learning_rate": 1e-05, "loss": 0.509, "step": 5845 }, { "epoch": 1.6177264235798796, "grad_norm": 0.176322802901268, "learning_rate": 1e-05, "loss": 0.5067, "step": 5846 }, { "epoch": 1.618003182730229, "grad_norm": 0.17466476559638977, "learning_rate": 1e-05, "loss": 0.5107, "step": 5847 }, { "epoch": 1.6182799418805784, "grad_norm": 0.17535869777202606, "learning_rate": 1e-05, "loss": 0.528, "step": 5848 }, { "epoch": 1.6185567010309279, "grad_norm": 0.17048294842243195, "learning_rate": 1e-05, "loss": 0.52, "step": 5849 }, { "epoch": 1.6188334601812773, "grad_norm": 0.17114096879959106, "learning_rate": 1e-05, "loss": 0.5119, "step": 5850 }, { "epoch": 1.6191102193316267, "grad_norm": 0.16809187829494476, "learning_rate": 1e-05, "loss": 0.4912, "step": 5851 }, { "epoch": 1.619386978481976, "grad_norm": 0.1803630292415619, "learning_rate": 1e-05, "loss": 0.5193, "step": 5852 }, { "epoch": 1.6196637376323255, "grad_norm": 0.178384929895401, "learning_rate": 1e-05, "loss": 0.5363, "step": 5853 }, { "epoch": 1.619940496782675, "grad_norm": 0.17947761714458466, "learning_rate": 1e-05, "loss": 0.523, "step": 5854 }, { "epoch": 1.6202172559330243, "grad_norm": 0.1837102770805359, "learning_rate": 1e-05, "loss": 0.5158, "step": 5855 }, { "epoch": 1.6204940150833735, "grad_norm": 0.16690486669540405, "learning_rate": 1e-05, "loss": 0.4974, "step": 5856 }, { "epoch": 1.6207707742337232, "grad_norm": 0.16938598453998566, "learning_rate": 1e-05, "loss": 0.5037, "step": 5857 }, { "epoch": 1.6210475333840724, "grad_norm": 0.16970528662204742, "learning_rate": 1e-05, "loss": 0.498, "step": 5858 }, { "epoch": 1.621324292534422, "grad_norm": 0.1702519655227661, "learning_rate": 1e-05, "loss": 0.5224, "step": 5859 }, { "epoch": 1.6216010516847712, "grad_norm": 0.16875839233398438, "learning_rate": 1e-05, "loss": 0.5091, "step": 5860 }, { "epoch": 1.6218778108351208, "grad_norm": 0.16643162071704865, "learning_rate": 1e-05, "loss": 0.4936, "step": 5861 }, { "epoch": 1.62215456998547, "grad_norm": 0.1735793501138687, "learning_rate": 1e-05, "loss": 0.5058, "step": 5862 }, { "epoch": 1.6224313291358197, "grad_norm": 0.1672544926404953, "learning_rate": 1e-05, "loss": 0.5022, "step": 5863 }, { "epoch": 1.6227080882861689, "grad_norm": 0.17054034769535065, "learning_rate": 1e-05, "loss": 0.4985, "step": 5864 }, { "epoch": 1.6229848474365185, "grad_norm": 0.17510154843330383, "learning_rate": 1e-05, "loss": 0.5039, "step": 5865 }, { "epoch": 1.6232616065868677, "grad_norm": 0.16670522093772888, "learning_rate": 1e-05, "loss": 0.4975, "step": 5866 }, { "epoch": 1.6235383657372173, "grad_norm": 0.17308615148067474, "learning_rate": 1e-05, "loss": 0.5082, "step": 5867 }, { "epoch": 1.6238151248875665, "grad_norm": 0.17029649019241333, "learning_rate": 1e-05, "loss": 0.5026, "step": 5868 }, { "epoch": 1.6240918840379162, "grad_norm": 0.17294275760650635, "learning_rate": 1e-05, "loss": 0.494, "step": 5869 }, { "epoch": 1.6243686431882653, "grad_norm": 0.16363981366157532, "learning_rate": 1e-05, "loss": 0.5296, "step": 5870 }, { "epoch": 1.624645402338615, "grad_norm": 0.16731871664524078, "learning_rate": 1e-05, "loss": 0.5044, "step": 5871 }, { "epoch": 1.6249221614889642, "grad_norm": 0.1678381711244583, "learning_rate": 1e-05, "loss": 0.5047, "step": 5872 }, { "epoch": 1.6251989206393136, "grad_norm": 0.17910265922546387, "learning_rate": 1e-05, "loss": 0.547, "step": 5873 }, { "epoch": 1.625475679789663, "grad_norm": 0.18183207511901855, "learning_rate": 1e-05, "loss": 0.5217, "step": 5874 }, { "epoch": 1.6257524389400124, "grad_norm": 0.17215242981910706, "learning_rate": 1e-05, "loss": 0.4854, "step": 5875 }, { "epoch": 1.6260291980903618, "grad_norm": 0.16376492381095886, "learning_rate": 1e-05, "loss": 0.5275, "step": 5876 }, { "epoch": 1.6263059572407113, "grad_norm": 0.1747700273990631, "learning_rate": 1e-05, "loss": 0.521, "step": 5877 }, { "epoch": 1.6265827163910607, "grad_norm": 0.16453061997890472, "learning_rate": 1e-05, "loss": 0.5192, "step": 5878 }, { "epoch": 1.62685947554141, "grad_norm": 0.18050642311573029, "learning_rate": 1e-05, "loss": 0.5008, "step": 5879 }, { "epoch": 1.6271362346917595, "grad_norm": 0.1700354665517807, "learning_rate": 1e-05, "loss": 0.4926, "step": 5880 }, { "epoch": 1.627412993842109, "grad_norm": 0.17028385400772095, "learning_rate": 1e-05, "loss": 0.5432, "step": 5881 }, { "epoch": 1.6276897529924583, "grad_norm": 0.17262165248394012, "learning_rate": 1e-05, "loss": 0.4877, "step": 5882 }, { "epoch": 1.6279665121428077, "grad_norm": 0.17105869948863983, "learning_rate": 1e-05, "loss": 0.5233, "step": 5883 }, { "epoch": 1.6282432712931572, "grad_norm": 0.1687643676996231, "learning_rate": 1e-05, "loss": 0.4859, "step": 5884 }, { "epoch": 1.6285200304435066, "grad_norm": 0.16654205322265625, "learning_rate": 1e-05, "loss": 0.4875, "step": 5885 }, { "epoch": 1.628796789593856, "grad_norm": 0.17464777827262878, "learning_rate": 1e-05, "loss": 0.5327, "step": 5886 }, { "epoch": 1.6290735487442054, "grad_norm": 0.16786545515060425, "learning_rate": 1e-05, "loss": 0.5267, "step": 5887 }, { "epoch": 1.6293503078945548, "grad_norm": 0.1768689602613449, "learning_rate": 1e-05, "loss": 0.5051, "step": 5888 }, { "epoch": 1.629627067044904, "grad_norm": 0.17403097450733185, "learning_rate": 1e-05, "loss": 0.5207, "step": 5889 }, { "epoch": 1.6299038261952536, "grad_norm": 0.16533853113651276, "learning_rate": 1e-05, "loss": 0.5014, "step": 5890 }, { "epoch": 1.6301805853456028, "grad_norm": 0.16965003311634064, "learning_rate": 1e-05, "loss": 0.5051, "step": 5891 }, { "epoch": 1.6304573444959525, "grad_norm": 0.16661228239536285, "learning_rate": 1e-05, "loss": 0.4896, "step": 5892 }, { "epoch": 1.6307341036463017, "grad_norm": 0.1712050884962082, "learning_rate": 1e-05, "loss": 0.5077, "step": 5893 }, { "epoch": 1.6310108627966513, "grad_norm": 0.17278699576854706, "learning_rate": 1e-05, "loss": 0.4801, "step": 5894 }, { "epoch": 1.6312876219470005, "grad_norm": 0.1680762767791748, "learning_rate": 1e-05, "loss": 0.5085, "step": 5895 }, { "epoch": 1.6315643810973501, "grad_norm": 0.16475170850753784, "learning_rate": 1e-05, "loss": 0.5129, "step": 5896 }, { "epoch": 1.6318411402476993, "grad_norm": 0.16110259294509888, "learning_rate": 1e-05, "loss": 0.5046, "step": 5897 }, { "epoch": 1.632117899398049, "grad_norm": 0.1724584549665451, "learning_rate": 1e-05, "loss": 0.5071, "step": 5898 }, { "epoch": 1.6323946585483982, "grad_norm": 0.17023037374019623, "learning_rate": 1e-05, "loss": 0.482, "step": 5899 }, { "epoch": 1.6326714176987478, "grad_norm": 0.17401473224163055, "learning_rate": 1e-05, "loss": 0.5179, "step": 5900 }, { "epoch": 1.632948176849097, "grad_norm": 0.17424140870571136, "learning_rate": 1e-05, "loss": 0.5456, "step": 5901 }, { "epoch": 1.6332249359994466, "grad_norm": 0.1917986422777176, "learning_rate": 1e-05, "loss": 0.5383, "step": 5902 }, { "epoch": 1.6335016951497958, "grad_norm": 0.17898571491241455, "learning_rate": 1e-05, "loss": 0.5023, "step": 5903 }, { "epoch": 1.6337784543001455, "grad_norm": 0.1848554015159607, "learning_rate": 1e-05, "loss": 0.5004, "step": 5904 }, { "epoch": 1.6340552134504946, "grad_norm": 0.17927227914333344, "learning_rate": 1e-05, "loss": 0.5164, "step": 5905 }, { "epoch": 1.634331972600844, "grad_norm": 0.1607827991247177, "learning_rate": 1e-05, "loss": 0.5016, "step": 5906 }, { "epoch": 1.6346087317511935, "grad_norm": 0.16832353174686432, "learning_rate": 1e-05, "loss": 0.509, "step": 5907 }, { "epoch": 1.634885490901543, "grad_norm": 0.17121542990207672, "learning_rate": 1e-05, "loss": 0.5095, "step": 5908 }, { "epoch": 1.6351622500518923, "grad_norm": 0.17247889935970306, "learning_rate": 1e-05, "loss": 0.5161, "step": 5909 }, { "epoch": 1.6354390092022417, "grad_norm": 0.17618857324123383, "learning_rate": 1e-05, "loss": 0.5175, "step": 5910 }, { "epoch": 1.6357157683525911, "grad_norm": 0.17318306863307953, "learning_rate": 1e-05, "loss": 0.5189, "step": 5911 }, { "epoch": 1.6359925275029406, "grad_norm": 0.17279736697673798, "learning_rate": 1e-05, "loss": 0.5015, "step": 5912 }, { "epoch": 1.63626928665329, "grad_norm": 0.1828632801771164, "learning_rate": 1e-05, "loss": 0.4987, "step": 5913 }, { "epoch": 1.6365460458036394, "grad_norm": 0.17889811098575592, "learning_rate": 1e-05, "loss": 0.5253, "step": 5914 }, { "epoch": 1.6368228049539888, "grad_norm": 0.16689561307430267, "learning_rate": 1e-05, "loss": 0.4667, "step": 5915 }, { "epoch": 1.6370995641043382, "grad_norm": 0.15822042524814606, "learning_rate": 1e-05, "loss": 0.5152, "step": 5916 }, { "epoch": 1.6373763232546876, "grad_norm": 0.17635463178157806, "learning_rate": 1e-05, "loss": 0.5307, "step": 5917 }, { "epoch": 1.637653082405037, "grad_norm": 0.16877219080924988, "learning_rate": 1e-05, "loss": 0.525, "step": 5918 }, { "epoch": 1.6379298415553865, "grad_norm": 0.16768629848957062, "learning_rate": 1e-05, "loss": 0.513, "step": 5919 }, { "epoch": 1.6382066007057359, "grad_norm": 0.16763196885585785, "learning_rate": 1e-05, "loss": 0.4992, "step": 5920 }, { "epoch": 1.6384833598560853, "grad_norm": 0.16414153575897217, "learning_rate": 1e-05, "loss": 0.4705, "step": 5921 }, { "epoch": 1.6387601190064347, "grad_norm": 0.1685405969619751, "learning_rate": 1e-05, "loss": 0.4859, "step": 5922 }, { "epoch": 1.6390368781567841, "grad_norm": 0.16369196772575378, "learning_rate": 1e-05, "loss": 0.4806, "step": 5923 }, { "epoch": 1.6393136373071333, "grad_norm": 0.16857345402240753, "learning_rate": 1e-05, "loss": 0.5094, "step": 5924 }, { "epoch": 1.639590396457483, "grad_norm": 0.17250597476959229, "learning_rate": 1e-05, "loss": 0.4942, "step": 5925 }, { "epoch": 1.6398671556078321, "grad_norm": 0.16942137479782104, "learning_rate": 1e-05, "loss": 0.5131, "step": 5926 }, { "epoch": 1.6401439147581818, "grad_norm": 0.17095524072647095, "learning_rate": 1e-05, "loss": 0.4981, "step": 5927 }, { "epoch": 1.640420673908531, "grad_norm": 0.16972477734088898, "learning_rate": 1e-05, "loss": 0.5146, "step": 5928 }, { "epoch": 1.6406974330588806, "grad_norm": 0.16466180980205536, "learning_rate": 1e-05, "loss": 0.4918, "step": 5929 }, { "epoch": 1.6409741922092298, "grad_norm": 0.17069730162620544, "learning_rate": 1e-05, "loss": 0.4983, "step": 5930 }, { "epoch": 1.6412509513595794, "grad_norm": 0.16870243847370148, "learning_rate": 1e-05, "loss": 0.4926, "step": 5931 }, { "epoch": 1.6415277105099286, "grad_norm": 0.16711248457431793, "learning_rate": 1e-05, "loss": 0.4831, "step": 5932 }, { "epoch": 1.6418044696602783, "grad_norm": 0.17150075733661652, "learning_rate": 1e-05, "loss": 0.4839, "step": 5933 }, { "epoch": 1.6420812288106275, "grad_norm": 0.1681033968925476, "learning_rate": 1e-05, "loss": 0.4933, "step": 5934 }, { "epoch": 1.642357987960977, "grad_norm": 0.15976189076900482, "learning_rate": 1e-05, "loss": 0.4855, "step": 5935 }, { "epoch": 1.6426347471113263, "grad_norm": 0.16684547066688538, "learning_rate": 1e-05, "loss": 0.4994, "step": 5936 }, { "epoch": 1.642911506261676, "grad_norm": 0.1726096272468567, "learning_rate": 1e-05, "loss": 0.4929, "step": 5937 }, { "epoch": 1.6431882654120251, "grad_norm": 0.15509149432182312, "learning_rate": 1e-05, "loss": 0.5275, "step": 5938 }, { "epoch": 1.6434650245623748, "grad_norm": 0.16822798550128937, "learning_rate": 1e-05, "loss": 0.4885, "step": 5939 }, { "epoch": 1.643741783712724, "grad_norm": 0.1623859405517578, "learning_rate": 1e-05, "loss": 0.485, "step": 5940 }, { "epoch": 1.6440185428630734, "grad_norm": 0.1757320761680603, "learning_rate": 1e-05, "loss": 0.5186, "step": 5941 }, { "epoch": 1.6442953020134228, "grad_norm": 0.1960490196943283, "learning_rate": 1e-05, "loss": 0.5014, "step": 5942 }, { "epoch": 1.6445720611637722, "grad_norm": 0.1642327904701233, "learning_rate": 1e-05, "loss": 0.5036, "step": 5943 }, { "epoch": 1.6448488203141216, "grad_norm": 0.16672056913375854, "learning_rate": 1e-05, "loss": 0.483, "step": 5944 }, { "epoch": 1.645125579464471, "grad_norm": 0.1720082014799118, "learning_rate": 1e-05, "loss": 0.5302, "step": 5945 }, { "epoch": 1.6454023386148204, "grad_norm": 0.16673395037651062, "learning_rate": 1e-05, "loss": 0.4848, "step": 5946 }, { "epoch": 1.6456790977651699, "grad_norm": 0.16308003664016724, "learning_rate": 1e-05, "loss": 0.4671, "step": 5947 }, { "epoch": 1.6459558569155193, "grad_norm": 0.16924576461315155, "learning_rate": 1e-05, "loss": 0.4985, "step": 5948 }, { "epoch": 1.6462326160658687, "grad_norm": 0.18305538594722748, "learning_rate": 1e-05, "loss": 0.5428, "step": 5949 }, { "epoch": 1.646509375216218, "grad_norm": 0.1728527694940567, "learning_rate": 1e-05, "loss": 0.5182, "step": 5950 }, { "epoch": 1.6467861343665675, "grad_norm": 0.16963794827461243, "learning_rate": 1e-05, "loss": 0.5257, "step": 5951 }, { "epoch": 1.647062893516917, "grad_norm": 0.17481032013893127, "learning_rate": 1e-05, "loss": 0.5645, "step": 5952 }, { "epoch": 1.6473396526672663, "grad_norm": 0.17529229819774628, "learning_rate": 1e-05, "loss": 0.4819, "step": 5953 }, { "epoch": 1.6476164118176158, "grad_norm": 0.1711590588092804, "learning_rate": 1e-05, "loss": 0.4868, "step": 5954 }, { "epoch": 1.6478931709679652, "grad_norm": 0.1799977421760559, "learning_rate": 1e-05, "loss": 0.5027, "step": 5955 }, { "epoch": 1.6481699301183146, "grad_norm": 0.16798193752765656, "learning_rate": 1e-05, "loss": 0.4982, "step": 5956 }, { "epoch": 1.648446689268664, "grad_norm": 0.1645553708076477, "learning_rate": 1e-05, "loss": 0.494, "step": 5957 }, { "epoch": 1.6487234484190134, "grad_norm": 0.17830786108970642, "learning_rate": 1e-05, "loss": 0.5175, "step": 5958 }, { "epoch": 1.6490002075693626, "grad_norm": 0.16775688529014587, "learning_rate": 1e-05, "loss": 0.5525, "step": 5959 }, { "epoch": 1.6492769667197122, "grad_norm": 0.1832910180091858, "learning_rate": 1e-05, "loss": 0.4818, "step": 5960 }, { "epoch": 1.6495537258700614, "grad_norm": 0.17245620489120483, "learning_rate": 1e-05, "loss": 0.5182, "step": 5961 }, { "epoch": 1.649830485020411, "grad_norm": 0.17169158160686493, "learning_rate": 1e-05, "loss": 0.495, "step": 5962 }, { "epoch": 1.6501072441707603, "grad_norm": 0.16730763018131256, "learning_rate": 1e-05, "loss": 0.508, "step": 5963 }, { "epoch": 1.65038400332111, "grad_norm": 0.17465759813785553, "learning_rate": 1e-05, "loss": 0.5228, "step": 5964 }, { "epoch": 1.650660762471459, "grad_norm": 0.17438913881778717, "learning_rate": 1e-05, "loss": 0.528, "step": 5965 }, { "epoch": 1.6509375216218087, "grad_norm": 0.17578595876693726, "learning_rate": 1e-05, "loss": 0.5198, "step": 5966 }, { "epoch": 1.651214280772158, "grad_norm": 0.17366866767406464, "learning_rate": 1e-05, "loss": 0.4956, "step": 5967 }, { "epoch": 1.6514910399225076, "grad_norm": 0.16791042685508728, "learning_rate": 1e-05, "loss": 0.4702, "step": 5968 }, { "epoch": 1.6517677990728568, "grad_norm": 0.16560044884681702, "learning_rate": 1e-05, "loss": 0.4997, "step": 5969 }, { "epoch": 1.6520445582232064, "grad_norm": 0.17519859969615936, "learning_rate": 1e-05, "loss": 0.4861, "step": 5970 }, { "epoch": 1.6523213173735556, "grad_norm": 0.17330344021320343, "learning_rate": 1e-05, "loss": 0.4954, "step": 5971 }, { "epoch": 1.6525980765239052, "grad_norm": 0.17263644933700562, "learning_rate": 1e-05, "loss": 0.5106, "step": 5972 }, { "epoch": 1.6528748356742544, "grad_norm": 0.17510490119457245, "learning_rate": 1e-05, "loss": 0.5358, "step": 5973 }, { "epoch": 1.653151594824604, "grad_norm": 0.18033453822135925, "learning_rate": 1e-05, "loss": 0.5111, "step": 5974 }, { "epoch": 1.6534283539749532, "grad_norm": 0.17649924755096436, "learning_rate": 1e-05, "loss": 0.5127, "step": 5975 }, { "epoch": 1.6537051131253027, "grad_norm": 0.17631840705871582, "learning_rate": 1e-05, "loss": 0.5117, "step": 5976 }, { "epoch": 1.653981872275652, "grad_norm": 0.17171531915664673, "learning_rate": 1e-05, "loss": 0.4932, "step": 5977 }, { "epoch": 1.6542586314260015, "grad_norm": 0.18006464838981628, "learning_rate": 1e-05, "loss": 0.5313, "step": 5978 }, { "epoch": 1.654535390576351, "grad_norm": 0.17563305795192719, "learning_rate": 1e-05, "loss": 0.5331, "step": 5979 }, { "epoch": 1.6548121497267003, "grad_norm": 0.1596967577934265, "learning_rate": 1e-05, "loss": 0.5016, "step": 5980 }, { "epoch": 1.6550889088770497, "grad_norm": 0.17224957048892975, "learning_rate": 1e-05, "loss": 0.498, "step": 5981 }, { "epoch": 1.6553656680273992, "grad_norm": 0.17322970926761627, "learning_rate": 1e-05, "loss": 0.5073, "step": 5982 }, { "epoch": 1.6556424271777486, "grad_norm": 0.17039039731025696, "learning_rate": 1e-05, "loss": 0.4743, "step": 5983 }, { "epoch": 1.655919186328098, "grad_norm": 0.17098954319953918, "learning_rate": 1e-05, "loss": 0.5523, "step": 5984 }, { "epoch": 1.6561959454784474, "grad_norm": 0.187143012881279, "learning_rate": 1e-05, "loss": 0.5395, "step": 5985 }, { "epoch": 1.6564727046287968, "grad_norm": 0.17536428570747375, "learning_rate": 1e-05, "loss": 0.4981, "step": 5986 }, { "epoch": 1.6567494637791462, "grad_norm": 0.16986899077892303, "learning_rate": 1e-05, "loss": 0.5062, "step": 5987 }, { "epoch": 1.6570262229294956, "grad_norm": 0.1758688986301422, "learning_rate": 1e-05, "loss": 0.5466, "step": 5988 }, { "epoch": 1.657302982079845, "grad_norm": 0.17298631370067596, "learning_rate": 1e-05, "loss": 0.4979, "step": 5989 }, { "epoch": 1.6575797412301945, "grad_norm": 0.1697281301021576, "learning_rate": 1e-05, "loss": 0.5114, "step": 5990 }, { "epoch": 1.6578565003805439, "grad_norm": 0.16578039526939392, "learning_rate": 1e-05, "loss": 0.5001, "step": 5991 }, { "epoch": 1.658133259530893, "grad_norm": 0.16770094633102417, "learning_rate": 1e-05, "loss": 0.5042, "step": 5992 }, { "epoch": 1.6584100186812427, "grad_norm": 0.1747014969587326, "learning_rate": 1e-05, "loss": 0.5006, "step": 5993 }, { "epoch": 1.658686777831592, "grad_norm": 0.1731770932674408, "learning_rate": 1e-05, "loss": 0.4868, "step": 5994 }, { "epoch": 1.6589635369819415, "grad_norm": 0.16656634211540222, "learning_rate": 1e-05, "loss": 0.4965, "step": 5995 }, { "epoch": 1.6592402961322907, "grad_norm": 0.17083978652954102, "learning_rate": 1e-05, "loss": 0.5228, "step": 5996 }, { "epoch": 1.6595170552826404, "grad_norm": 0.17880558967590332, "learning_rate": 1e-05, "loss": 0.4965, "step": 5997 }, { "epoch": 1.6597938144329896, "grad_norm": 0.1691446602344513, "learning_rate": 1e-05, "loss": 0.5249, "step": 5998 }, { "epoch": 1.6600705735833392, "grad_norm": 0.17596489191055298, "learning_rate": 1e-05, "loss": 0.4806, "step": 5999 }, { "epoch": 1.6603473327336884, "grad_norm": 0.17522284388542175, "learning_rate": 1e-05, "loss": 0.5161, "step": 6000 }, { "epoch": 1.660624091884038, "grad_norm": 0.16780805587768555, "learning_rate": 1e-05, "loss": 0.4956, "step": 6001 }, { "epoch": 1.6609008510343872, "grad_norm": 0.1696794033050537, "learning_rate": 1e-05, "loss": 0.4912, "step": 6002 }, { "epoch": 1.6611776101847369, "grad_norm": 0.17183609306812286, "learning_rate": 1e-05, "loss": 0.4929, "step": 6003 }, { "epoch": 1.661454369335086, "grad_norm": 0.17101384699344635, "learning_rate": 1e-05, "loss": 0.5381, "step": 6004 }, { "epoch": 1.6617311284854357, "grad_norm": 0.17229634523391724, "learning_rate": 1e-05, "loss": 0.5191, "step": 6005 }, { "epoch": 1.6620078876357849, "grad_norm": 0.17397332191467285, "learning_rate": 1e-05, "loss": 0.5159, "step": 6006 }, { "epoch": 1.6622846467861345, "grad_norm": 0.1706422120332718, "learning_rate": 1e-05, "loss": 0.5028, "step": 6007 }, { "epoch": 1.6625614059364837, "grad_norm": 0.18035049736499786, "learning_rate": 1e-05, "loss": 0.4763, "step": 6008 }, { "epoch": 1.6628381650868331, "grad_norm": 0.16750891506671906, "learning_rate": 1e-05, "loss": 0.5022, "step": 6009 }, { "epoch": 1.6631149242371825, "grad_norm": 0.17115001380443573, "learning_rate": 1e-05, "loss": 0.5087, "step": 6010 }, { "epoch": 1.663391683387532, "grad_norm": 0.16918790340423584, "learning_rate": 1e-05, "loss": 0.5425, "step": 6011 }, { "epoch": 1.6636684425378814, "grad_norm": 0.16997317969799042, "learning_rate": 1e-05, "loss": 0.4942, "step": 6012 }, { "epoch": 1.6639452016882308, "grad_norm": 0.17116880416870117, "learning_rate": 1e-05, "loss": 0.5164, "step": 6013 }, { "epoch": 1.6642219608385802, "grad_norm": 0.17357806861400604, "learning_rate": 1e-05, "loss": 0.5146, "step": 6014 }, { "epoch": 1.6644987199889296, "grad_norm": 0.1649750918149948, "learning_rate": 1e-05, "loss": 0.5148, "step": 6015 }, { "epoch": 1.664775479139279, "grad_norm": 0.1658712923526764, "learning_rate": 1e-05, "loss": 0.4825, "step": 6016 }, { "epoch": 1.6650522382896285, "grad_norm": 0.17076587677001953, "learning_rate": 1e-05, "loss": 0.5111, "step": 6017 }, { "epoch": 1.6653289974399779, "grad_norm": 0.17314599454402924, "learning_rate": 1e-05, "loss": 0.5126, "step": 6018 }, { "epoch": 1.6656057565903273, "grad_norm": 0.1698824167251587, "learning_rate": 1e-05, "loss": 0.5085, "step": 6019 }, { "epoch": 1.6658825157406767, "grad_norm": 0.1706267148256302, "learning_rate": 1e-05, "loss": 0.4963, "step": 6020 }, { "epoch": 1.666159274891026, "grad_norm": 0.17703966796398163, "learning_rate": 1e-05, "loss": 0.5246, "step": 6021 }, { "epoch": 1.6664360340413755, "grad_norm": 0.17186583578586578, "learning_rate": 1e-05, "loss": 0.5086, "step": 6022 }, { "epoch": 1.666712793191725, "grad_norm": 0.17226974666118622, "learning_rate": 1e-05, "loss": 0.4901, "step": 6023 }, { "epoch": 1.6669895523420744, "grad_norm": 0.16961252689361572, "learning_rate": 1e-05, "loss": 0.5023, "step": 6024 }, { "epoch": 1.6672663114924238, "grad_norm": 0.1637255996465683, "learning_rate": 1e-05, "loss": 0.5199, "step": 6025 }, { "epoch": 1.6675430706427732, "grad_norm": 0.1695469319820404, "learning_rate": 1e-05, "loss": 0.5239, "step": 6026 }, { "epoch": 1.6678198297931224, "grad_norm": 0.1711612045764923, "learning_rate": 1e-05, "loss": 0.5106, "step": 6027 }, { "epoch": 1.668096588943472, "grad_norm": 0.17749269306659698, "learning_rate": 1e-05, "loss": 0.4973, "step": 6028 }, { "epoch": 1.6683733480938212, "grad_norm": 0.1700342446565628, "learning_rate": 1e-05, "loss": 0.4834, "step": 6029 }, { "epoch": 1.6686501072441708, "grad_norm": 0.17596827447414398, "learning_rate": 1e-05, "loss": 0.5124, "step": 6030 }, { "epoch": 1.66892686639452, "grad_norm": 0.17447051405906677, "learning_rate": 1e-05, "loss": 0.5243, "step": 6031 }, { "epoch": 1.6692036255448697, "grad_norm": 0.16389720141887665, "learning_rate": 1e-05, "loss": 0.4763, "step": 6032 }, { "epoch": 1.6694803846952189, "grad_norm": 0.16393239796161652, "learning_rate": 1e-05, "loss": 0.481, "step": 6033 }, { "epoch": 1.6697571438455685, "grad_norm": 0.17331865429878235, "learning_rate": 1e-05, "loss": 0.503, "step": 6034 }, { "epoch": 1.6700339029959177, "grad_norm": 0.17275124788284302, "learning_rate": 1e-05, "loss": 0.4861, "step": 6035 }, { "epoch": 1.6703106621462673, "grad_norm": 0.17557856440544128, "learning_rate": 1e-05, "loss": 0.5025, "step": 6036 }, { "epoch": 1.6705874212966165, "grad_norm": 0.18304193019866943, "learning_rate": 1e-05, "loss": 0.5154, "step": 6037 }, { "epoch": 1.6708641804469662, "grad_norm": 0.17719031870365143, "learning_rate": 1e-05, "loss": 0.5374, "step": 6038 }, { "epoch": 1.6711409395973154, "grad_norm": 0.17010410130023956, "learning_rate": 1e-05, "loss": 0.4912, "step": 6039 }, { "epoch": 1.671417698747665, "grad_norm": 0.15756548941135406, "learning_rate": 1e-05, "loss": 0.4989, "step": 6040 }, { "epoch": 1.6716944578980142, "grad_norm": 0.17345672845840454, "learning_rate": 1e-05, "loss": 0.481, "step": 6041 }, { "epoch": 1.6719712170483638, "grad_norm": 0.18310664594173431, "learning_rate": 1e-05, "loss": 0.5428, "step": 6042 }, { "epoch": 1.672247976198713, "grad_norm": 0.17230361700057983, "learning_rate": 1e-05, "loss": 0.5156, "step": 6043 }, { "epoch": 1.6725247353490624, "grad_norm": 0.1703483760356903, "learning_rate": 1e-05, "loss": 0.5165, "step": 6044 }, { "epoch": 1.6728014944994118, "grad_norm": 0.17310738563537598, "learning_rate": 1e-05, "loss": 0.4764, "step": 6045 }, { "epoch": 1.6730782536497613, "grad_norm": 0.16492490470409393, "learning_rate": 1e-05, "loss": 0.4703, "step": 6046 }, { "epoch": 1.6733550128001107, "grad_norm": 0.16557759046554565, "learning_rate": 1e-05, "loss": 0.4936, "step": 6047 }, { "epoch": 1.67363177195046, "grad_norm": 0.1775401085615158, "learning_rate": 1e-05, "loss": 0.526, "step": 6048 }, { "epoch": 1.6739085311008095, "grad_norm": 0.17524828016757965, "learning_rate": 1e-05, "loss": 0.5078, "step": 6049 }, { "epoch": 1.674185290251159, "grad_norm": 0.16986791789531708, "learning_rate": 1e-05, "loss": 0.5065, "step": 6050 }, { "epoch": 1.6744620494015083, "grad_norm": 0.17183645069599152, "learning_rate": 1e-05, "loss": 0.5386, "step": 6051 }, { "epoch": 1.6747388085518577, "grad_norm": 0.1694423407316208, "learning_rate": 1e-05, "loss": 0.4985, "step": 6052 }, { "epoch": 1.6750155677022072, "grad_norm": 0.1806463599205017, "learning_rate": 1e-05, "loss": 0.504, "step": 6053 }, { "epoch": 1.6752923268525566, "grad_norm": 0.17052604258060455, "learning_rate": 1e-05, "loss": 0.5254, "step": 6054 }, { "epoch": 1.675569086002906, "grad_norm": 0.17101146280765533, "learning_rate": 1e-05, "loss": 0.494, "step": 6055 }, { "epoch": 1.6758458451532554, "grad_norm": 0.16680417954921722, "learning_rate": 1e-05, "loss": 0.5305, "step": 6056 }, { "epoch": 1.6761226043036048, "grad_norm": 0.17263497412204742, "learning_rate": 1e-05, "loss": 0.4903, "step": 6057 }, { "epoch": 1.6763993634539542, "grad_norm": 0.169057235121727, "learning_rate": 1e-05, "loss": 0.5175, "step": 6058 }, { "epoch": 1.6766761226043037, "grad_norm": 0.17625099420547485, "learning_rate": 1e-05, "loss": 0.501, "step": 6059 }, { "epoch": 1.676952881754653, "grad_norm": 0.16741123795509338, "learning_rate": 1e-05, "loss": 0.5027, "step": 6060 }, { "epoch": 1.6772296409050025, "grad_norm": 0.17021137475967407, "learning_rate": 1e-05, "loss": 0.4911, "step": 6061 }, { "epoch": 1.6775064000553517, "grad_norm": 0.16385819017887115, "learning_rate": 1e-05, "loss": 0.5172, "step": 6062 }, { "epoch": 1.6777831592057013, "grad_norm": 0.17395374178886414, "learning_rate": 1e-05, "loss": 0.4865, "step": 6063 }, { "epoch": 1.6780599183560505, "grad_norm": 0.17289642989635468, "learning_rate": 1e-05, "loss": 0.4955, "step": 6064 }, { "epoch": 1.6783366775064001, "grad_norm": 0.164507195353508, "learning_rate": 1e-05, "loss": 0.5009, "step": 6065 }, { "epoch": 1.6786134366567493, "grad_norm": 0.1684229075908661, "learning_rate": 1e-05, "loss": 0.5025, "step": 6066 }, { "epoch": 1.678890195807099, "grad_norm": 0.17157740890979767, "learning_rate": 1e-05, "loss": 0.511, "step": 6067 }, { "epoch": 1.6791669549574482, "grad_norm": 0.17135997116565704, "learning_rate": 1e-05, "loss": 0.5092, "step": 6068 }, { "epoch": 1.6794437141077978, "grad_norm": 0.16743524372577667, "learning_rate": 1e-05, "loss": 0.5605, "step": 6069 }, { "epoch": 1.679720473258147, "grad_norm": 0.1598529815673828, "learning_rate": 1e-05, "loss": 0.4744, "step": 6070 }, { "epoch": 1.6799972324084966, "grad_norm": 0.16393804550170898, "learning_rate": 1e-05, "loss": 0.5146, "step": 6071 }, { "epoch": 1.6802739915588458, "grad_norm": 0.17524583637714386, "learning_rate": 1e-05, "loss": 0.5248, "step": 6072 }, { "epoch": 1.6805507507091955, "grad_norm": 0.16305853426456451, "learning_rate": 1e-05, "loss": 0.4974, "step": 6073 }, { "epoch": 1.6808275098595447, "grad_norm": 0.18356828391551971, "learning_rate": 1e-05, "loss": 0.5222, "step": 6074 }, { "epoch": 1.6811042690098943, "grad_norm": 0.17855672538280487, "learning_rate": 1e-05, "loss": 0.5041, "step": 6075 }, { "epoch": 1.6813810281602435, "grad_norm": 0.18053679168224335, "learning_rate": 1e-05, "loss": 0.5002, "step": 6076 }, { "epoch": 1.6816577873105931, "grad_norm": 0.17741647362709045, "learning_rate": 1e-05, "loss": 0.5106, "step": 6077 }, { "epoch": 1.6819345464609423, "grad_norm": 0.17524871230125427, "learning_rate": 1e-05, "loss": 0.4922, "step": 6078 }, { "epoch": 1.6822113056112917, "grad_norm": 0.17012080550193787, "learning_rate": 1e-05, "loss": 0.5005, "step": 6079 }, { "epoch": 1.6824880647616411, "grad_norm": 0.16819578409194946, "learning_rate": 1e-05, "loss": 0.4863, "step": 6080 }, { "epoch": 1.6827648239119906, "grad_norm": 0.18056397140026093, "learning_rate": 1e-05, "loss": 0.5088, "step": 6081 }, { "epoch": 1.68304158306234, "grad_norm": 0.17073100805282593, "learning_rate": 1e-05, "loss": 0.5035, "step": 6082 }, { "epoch": 1.6833183422126894, "grad_norm": 0.16606828570365906, "learning_rate": 1e-05, "loss": 0.494, "step": 6083 }, { "epoch": 1.6835951013630388, "grad_norm": 0.16630002856254578, "learning_rate": 1e-05, "loss": 0.5249, "step": 6084 }, { "epoch": 1.6838718605133882, "grad_norm": 0.1748114377260208, "learning_rate": 1e-05, "loss": 0.5492, "step": 6085 }, { "epoch": 1.6841486196637376, "grad_norm": 0.17726615071296692, "learning_rate": 1e-05, "loss": 0.5162, "step": 6086 }, { "epoch": 1.684425378814087, "grad_norm": 0.17369696497917175, "learning_rate": 1e-05, "loss": 0.4943, "step": 6087 }, { "epoch": 1.6847021379644365, "grad_norm": 0.16863469779491425, "learning_rate": 1e-05, "loss": 0.5231, "step": 6088 }, { "epoch": 1.6849788971147859, "grad_norm": 0.16950362920761108, "learning_rate": 1e-05, "loss": 0.5299, "step": 6089 }, { "epoch": 1.6852556562651353, "grad_norm": 0.17142881453037262, "learning_rate": 1e-05, "loss": 0.4818, "step": 6090 }, { "epoch": 1.6855324154154847, "grad_norm": 0.1724182516336441, "learning_rate": 1e-05, "loss": 0.5045, "step": 6091 }, { "epoch": 1.6858091745658341, "grad_norm": 0.16641242802143097, "learning_rate": 1e-05, "loss": 0.4889, "step": 6092 }, { "epoch": 1.6860859337161835, "grad_norm": 0.17150986194610596, "learning_rate": 1e-05, "loss": 0.493, "step": 6093 }, { "epoch": 1.686362692866533, "grad_norm": 0.16679160296916962, "learning_rate": 1e-05, "loss": 0.4722, "step": 6094 }, { "epoch": 1.6866394520168821, "grad_norm": 0.1645934134721756, "learning_rate": 1e-05, "loss": 0.4857, "step": 6095 }, { "epoch": 1.6869162111672318, "grad_norm": 0.1621428281068802, "learning_rate": 1e-05, "loss": 0.5039, "step": 6096 }, { "epoch": 1.687192970317581, "grad_norm": 0.16856038570404053, "learning_rate": 1e-05, "loss": 0.5092, "step": 6097 }, { "epoch": 1.6874697294679306, "grad_norm": 0.17888841032981873, "learning_rate": 1e-05, "loss": 0.5027, "step": 6098 }, { "epoch": 1.6877464886182798, "grad_norm": 0.15987257659435272, "learning_rate": 1e-05, "loss": 0.4856, "step": 6099 }, { "epoch": 1.6880232477686294, "grad_norm": 0.16568401455879211, "learning_rate": 1e-05, "loss": 0.5002, "step": 6100 }, { "epoch": 1.6883000069189786, "grad_norm": 0.17814312875270844, "learning_rate": 1e-05, "loss": 0.4948, "step": 6101 }, { "epoch": 1.6885767660693283, "grad_norm": 0.16907650232315063, "learning_rate": 1e-05, "loss": 0.5037, "step": 6102 }, { "epoch": 1.6888535252196775, "grad_norm": 0.1771148145198822, "learning_rate": 1e-05, "loss": 0.5014, "step": 6103 }, { "epoch": 1.689130284370027, "grad_norm": 0.17324265837669373, "learning_rate": 1e-05, "loss": 0.5201, "step": 6104 }, { "epoch": 1.6894070435203763, "grad_norm": 0.17987672984600067, "learning_rate": 1e-05, "loss": 0.5007, "step": 6105 }, { "epoch": 1.689683802670726, "grad_norm": 0.17274557054042816, "learning_rate": 1e-05, "loss": 0.5161, "step": 6106 }, { "epoch": 1.6899605618210751, "grad_norm": 0.17134585976600647, "learning_rate": 1e-05, "loss": 0.4852, "step": 6107 }, { "epoch": 1.6902373209714248, "grad_norm": 0.1622028797864914, "learning_rate": 1e-05, "loss": 0.5018, "step": 6108 }, { "epoch": 1.690514080121774, "grad_norm": 0.16545066237449646, "learning_rate": 1e-05, "loss": 0.4781, "step": 6109 }, { "epoch": 1.6907908392721236, "grad_norm": 0.183363139629364, "learning_rate": 1e-05, "loss": 0.5132, "step": 6110 }, { "epoch": 1.6910675984224728, "grad_norm": 0.17094071209430695, "learning_rate": 1e-05, "loss": 0.5041, "step": 6111 }, { "epoch": 1.6913443575728222, "grad_norm": 0.17951750755310059, "learning_rate": 1e-05, "loss": 0.4917, "step": 6112 }, { "epoch": 1.6916211167231716, "grad_norm": 0.1705288141965866, "learning_rate": 1e-05, "loss": 0.4971, "step": 6113 }, { "epoch": 1.691897875873521, "grad_norm": 0.17399586737155914, "learning_rate": 1e-05, "loss": 0.5261, "step": 6114 }, { "epoch": 1.6921746350238704, "grad_norm": 0.17331181466579437, "learning_rate": 1e-05, "loss": 0.5259, "step": 6115 }, { "epoch": 1.6924513941742199, "grad_norm": 0.16663707792758942, "learning_rate": 1e-05, "loss": 0.4829, "step": 6116 }, { "epoch": 1.6927281533245693, "grad_norm": 0.17427030205726624, "learning_rate": 1e-05, "loss": 0.5275, "step": 6117 }, { "epoch": 1.6930049124749187, "grad_norm": 0.17409612238407135, "learning_rate": 1e-05, "loss": 0.502, "step": 6118 }, { "epoch": 1.693281671625268, "grad_norm": 0.1709567755460739, "learning_rate": 1e-05, "loss": 0.4897, "step": 6119 }, { "epoch": 1.6935584307756175, "grad_norm": 0.16762343049049377, "learning_rate": 1e-05, "loss": 0.5246, "step": 6120 }, { "epoch": 1.693835189925967, "grad_norm": 0.17587189376354218, "learning_rate": 1e-05, "loss": 0.4757, "step": 6121 }, { "epoch": 1.6941119490763163, "grad_norm": 0.16767701506614685, "learning_rate": 1e-05, "loss": 0.5202, "step": 6122 }, { "epoch": 1.6943887082266658, "grad_norm": 0.17493027448654175, "learning_rate": 1e-05, "loss": 0.4819, "step": 6123 }, { "epoch": 1.6946654673770152, "grad_norm": 0.18262332677841187, "learning_rate": 1e-05, "loss": 0.5241, "step": 6124 }, { "epoch": 1.6949422265273646, "grad_norm": 0.17306409776210785, "learning_rate": 1e-05, "loss": 0.5035, "step": 6125 }, { "epoch": 1.695218985677714, "grad_norm": 0.1672939509153366, "learning_rate": 1e-05, "loss": 0.4612, "step": 6126 }, { "epoch": 1.6954957448280634, "grad_norm": 0.16521218419075012, "learning_rate": 1e-05, "loss": 0.5317, "step": 6127 }, { "epoch": 1.6957725039784128, "grad_norm": 0.1745358258485794, "learning_rate": 1e-05, "loss": 0.5297, "step": 6128 }, { "epoch": 1.6960492631287623, "grad_norm": 0.17298352718353271, "learning_rate": 1e-05, "loss": 0.5621, "step": 6129 }, { "epoch": 1.6963260222791114, "grad_norm": 0.1680069863796234, "learning_rate": 1e-05, "loss": 0.5051, "step": 6130 }, { "epoch": 1.696602781429461, "grad_norm": 0.16465018689632416, "learning_rate": 1e-05, "loss": 0.498, "step": 6131 }, { "epoch": 1.6968795405798103, "grad_norm": 0.1779024302959442, "learning_rate": 1e-05, "loss": 0.5219, "step": 6132 }, { "epoch": 1.69715629973016, "grad_norm": 0.1777128428220749, "learning_rate": 1e-05, "loss": 0.5176, "step": 6133 }, { "epoch": 1.697433058880509, "grad_norm": 0.17201995849609375, "learning_rate": 1e-05, "loss": 0.5032, "step": 6134 }, { "epoch": 1.6977098180308587, "grad_norm": 0.1732710599899292, "learning_rate": 1e-05, "loss": 0.5068, "step": 6135 }, { "epoch": 1.697986577181208, "grad_norm": 0.16691042482852936, "learning_rate": 1e-05, "loss": 0.508, "step": 6136 }, { "epoch": 1.6982633363315576, "grad_norm": 0.16828106343746185, "learning_rate": 1e-05, "loss": 0.4987, "step": 6137 }, { "epoch": 1.6985400954819068, "grad_norm": 0.17462797462940216, "learning_rate": 1e-05, "loss": 0.5213, "step": 6138 }, { "epoch": 1.6988168546322564, "grad_norm": 0.17905882000923157, "learning_rate": 1e-05, "loss": 0.5175, "step": 6139 }, { "epoch": 1.6990936137826056, "grad_norm": 0.1687583476305008, "learning_rate": 1e-05, "loss": 0.5036, "step": 6140 }, { "epoch": 1.6993703729329552, "grad_norm": 0.17601138353347778, "learning_rate": 1e-05, "loss": 0.491, "step": 6141 }, { "epoch": 1.6996471320833044, "grad_norm": 0.17077378928661346, "learning_rate": 1e-05, "loss": 0.5181, "step": 6142 }, { "epoch": 1.699923891233654, "grad_norm": 0.1741980016231537, "learning_rate": 1e-05, "loss": 0.5276, "step": 6143 }, { "epoch": 1.7002006503840033, "grad_norm": 0.17289435863494873, "learning_rate": 1e-05, "loss": 0.5083, "step": 6144 }, { "epoch": 1.700477409534353, "grad_norm": 0.17866678535938263, "learning_rate": 1e-05, "loss": 0.525, "step": 6145 }, { "epoch": 1.700754168684702, "grad_norm": 0.1810467690229416, "learning_rate": 1e-05, "loss": 0.4923, "step": 6146 }, { "epoch": 1.7010309278350515, "grad_norm": 0.16814211010932922, "learning_rate": 1e-05, "loss": 0.5235, "step": 6147 }, { "epoch": 1.701307686985401, "grad_norm": 0.1749701350927353, "learning_rate": 1e-05, "loss": 0.5022, "step": 6148 }, { "epoch": 1.7015844461357503, "grad_norm": 0.18219463527202606, "learning_rate": 1e-05, "loss": 0.498, "step": 6149 }, { "epoch": 1.7018612052860997, "grad_norm": 0.15689566731452942, "learning_rate": 1e-05, "loss": 0.5051, "step": 6150 }, { "epoch": 1.7021379644364492, "grad_norm": 0.18574625253677368, "learning_rate": 1e-05, "loss": 0.5084, "step": 6151 }, { "epoch": 1.7024147235867986, "grad_norm": 0.17968066036701202, "learning_rate": 1e-05, "loss": 0.5187, "step": 6152 }, { "epoch": 1.702691482737148, "grad_norm": 0.17270895838737488, "learning_rate": 1e-05, "loss": 0.5171, "step": 6153 }, { "epoch": 1.7029682418874974, "grad_norm": 0.17033430933952332, "learning_rate": 1e-05, "loss": 0.5139, "step": 6154 }, { "epoch": 1.7032450010378468, "grad_norm": 0.18498189747333527, "learning_rate": 1e-05, "loss": 0.5052, "step": 6155 }, { "epoch": 1.7035217601881962, "grad_norm": 0.17496004700660706, "learning_rate": 1e-05, "loss": 0.5519, "step": 6156 }, { "epoch": 1.7037985193385456, "grad_norm": 0.17960497736930847, "learning_rate": 1e-05, "loss": 0.515, "step": 6157 }, { "epoch": 1.704075278488895, "grad_norm": 0.1662442535161972, "learning_rate": 1e-05, "loss": 0.4547, "step": 6158 }, { "epoch": 1.7043520376392445, "grad_norm": 0.1724688708782196, "learning_rate": 1e-05, "loss": 0.4843, "step": 6159 }, { "epoch": 1.704628796789594, "grad_norm": 0.17179079353809357, "learning_rate": 1e-05, "loss": 0.5275, "step": 6160 }, { "epoch": 1.7049055559399433, "grad_norm": 0.16953271627426147, "learning_rate": 1e-05, "loss": 0.4879, "step": 6161 }, { "epoch": 1.7051823150902927, "grad_norm": 0.17916034162044525, "learning_rate": 1e-05, "loss": 0.5213, "step": 6162 }, { "epoch": 1.7054590742406421, "grad_norm": 0.17174023389816284, "learning_rate": 1e-05, "loss": 0.5122, "step": 6163 }, { "epoch": 1.7057358333909916, "grad_norm": 0.17040127515792847, "learning_rate": 1e-05, "loss": 0.5031, "step": 6164 }, { "epoch": 1.7060125925413407, "grad_norm": 0.1801949292421341, "learning_rate": 1e-05, "loss": 0.5192, "step": 6165 }, { "epoch": 1.7062893516916904, "grad_norm": 0.16834747791290283, "learning_rate": 1e-05, "loss": 0.5327, "step": 6166 }, { "epoch": 1.7065661108420396, "grad_norm": 0.17900803685188293, "learning_rate": 1e-05, "loss": 0.5124, "step": 6167 }, { "epoch": 1.7068428699923892, "grad_norm": 0.16474580764770508, "learning_rate": 1e-05, "loss": 0.5132, "step": 6168 }, { "epoch": 1.7071196291427384, "grad_norm": 0.18288972973823547, "learning_rate": 1e-05, "loss": 0.5149, "step": 6169 }, { "epoch": 1.707396388293088, "grad_norm": 0.1706988364458084, "learning_rate": 1e-05, "loss": 0.5177, "step": 6170 }, { "epoch": 1.7076731474434372, "grad_norm": 0.1754566878080368, "learning_rate": 1e-05, "loss": 0.4951, "step": 6171 }, { "epoch": 1.7079499065937869, "grad_norm": 0.17385923862457275, "learning_rate": 1e-05, "loss": 0.5268, "step": 6172 }, { "epoch": 1.708226665744136, "grad_norm": 0.17029263079166412, "learning_rate": 1e-05, "loss": 0.4935, "step": 6173 }, { "epoch": 1.7085034248944857, "grad_norm": 0.1672675758600235, "learning_rate": 1e-05, "loss": 0.513, "step": 6174 }, { "epoch": 1.708780184044835, "grad_norm": 0.1683223694562912, "learning_rate": 1e-05, "loss": 0.4904, "step": 6175 }, { "epoch": 1.7090569431951845, "grad_norm": 0.1720958650112152, "learning_rate": 1e-05, "loss": 0.5071, "step": 6176 }, { "epoch": 1.7093337023455337, "grad_norm": 0.17255452275276184, "learning_rate": 1e-05, "loss": 0.5215, "step": 6177 }, { "epoch": 1.7096104614958834, "grad_norm": 0.1772596538066864, "learning_rate": 1e-05, "loss": 0.493, "step": 6178 }, { "epoch": 1.7098872206462326, "grad_norm": 0.16349492967128754, "learning_rate": 1e-05, "loss": 0.4971, "step": 6179 }, { "epoch": 1.7101639797965822, "grad_norm": 0.1625099778175354, "learning_rate": 1e-05, "loss": 0.5154, "step": 6180 }, { "epoch": 1.7104407389469314, "grad_norm": 0.16998356580734253, "learning_rate": 1e-05, "loss": 0.5081, "step": 6181 }, { "epoch": 1.7107174980972808, "grad_norm": 0.17731580138206482, "learning_rate": 1e-05, "loss": 0.5347, "step": 6182 }, { "epoch": 1.7109942572476302, "grad_norm": 0.16463445127010345, "learning_rate": 1e-05, "loss": 0.5345, "step": 6183 }, { "epoch": 1.7112710163979796, "grad_norm": 0.16872170567512512, "learning_rate": 1e-05, "loss": 0.5079, "step": 6184 }, { "epoch": 1.711547775548329, "grad_norm": 0.17515242099761963, "learning_rate": 1e-05, "loss": 0.5007, "step": 6185 }, { "epoch": 1.7118245346986785, "grad_norm": 0.17123715579509735, "learning_rate": 1e-05, "loss": 0.5007, "step": 6186 }, { "epoch": 1.7121012938490279, "grad_norm": 0.17240312695503235, "learning_rate": 1e-05, "loss": 0.4917, "step": 6187 }, { "epoch": 1.7123780529993773, "grad_norm": 0.17464882135391235, "learning_rate": 1e-05, "loss": 0.5095, "step": 6188 }, { "epoch": 1.7126548121497267, "grad_norm": 0.17767582833766937, "learning_rate": 1e-05, "loss": 0.5088, "step": 6189 }, { "epoch": 1.7129315713000761, "grad_norm": 0.17918747663497925, "learning_rate": 1e-05, "loss": 0.5014, "step": 6190 }, { "epoch": 1.7132083304504255, "grad_norm": 0.17381997406482697, "learning_rate": 1e-05, "loss": 0.5066, "step": 6191 }, { "epoch": 1.713485089600775, "grad_norm": 0.17231114208698273, "learning_rate": 1e-05, "loss": 0.4909, "step": 6192 }, { "epoch": 1.7137618487511244, "grad_norm": 0.1663469672203064, "learning_rate": 1e-05, "loss": 0.5023, "step": 6193 }, { "epoch": 1.7140386079014738, "grad_norm": 0.1764151006937027, "learning_rate": 1e-05, "loss": 0.5366, "step": 6194 }, { "epoch": 1.7143153670518232, "grad_norm": 0.1787796914577484, "learning_rate": 1e-05, "loss": 0.5031, "step": 6195 }, { "epoch": 1.7145921262021726, "grad_norm": 0.17199693620204926, "learning_rate": 1e-05, "loss": 0.5107, "step": 6196 }, { "epoch": 1.714868885352522, "grad_norm": 0.16978298127651215, "learning_rate": 1e-05, "loss": 0.4954, "step": 6197 }, { "epoch": 1.7151456445028712, "grad_norm": 0.17405787110328674, "learning_rate": 1e-05, "loss": 0.5277, "step": 6198 }, { "epoch": 1.7154224036532209, "grad_norm": 0.17438335716724396, "learning_rate": 1e-05, "loss": 0.5031, "step": 6199 }, { "epoch": 1.71569916280357, "grad_norm": 0.18204469978809357, "learning_rate": 1e-05, "loss": 0.474, "step": 6200 }, { "epoch": 1.7159759219539197, "grad_norm": 0.1707959920167923, "learning_rate": 1e-05, "loss": 0.537, "step": 6201 }, { "epoch": 1.7162526811042689, "grad_norm": 0.1666335165500641, "learning_rate": 1e-05, "loss": 0.5141, "step": 6202 }, { "epoch": 1.7165294402546185, "grad_norm": 0.16836893558502197, "learning_rate": 1e-05, "loss": 0.498, "step": 6203 }, { "epoch": 1.7168061994049677, "grad_norm": 0.181685209274292, "learning_rate": 1e-05, "loss": 0.5139, "step": 6204 }, { "epoch": 1.7170829585553173, "grad_norm": 0.17999571561813354, "learning_rate": 1e-05, "loss": 0.5083, "step": 6205 }, { "epoch": 1.7173597177056665, "grad_norm": 0.18171413242816925, "learning_rate": 1e-05, "loss": 0.502, "step": 6206 }, { "epoch": 1.7176364768560162, "grad_norm": 0.17690572142601013, "learning_rate": 1e-05, "loss": 0.5133, "step": 6207 }, { "epoch": 1.7179132360063654, "grad_norm": 0.17906029522418976, "learning_rate": 1e-05, "loss": 0.4982, "step": 6208 }, { "epoch": 1.718189995156715, "grad_norm": 0.18568386137485504, "learning_rate": 1e-05, "loss": 0.5108, "step": 6209 }, { "epoch": 1.7184667543070642, "grad_norm": 0.17758947610855103, "learning_rate": 1e-05, "loss": 0.4902, "step": 6210 }, { "epoch": 1.7187435134574138, "grad_norm": 0.1787649244070053, "learning_rate": 1e-05, "loss": 0.5121, "step": 6211 }, { "epoch": 1.719020272607763, "grad_norm": 0.17384208738803864, "learning_rate": 1e-05, "loss": 0.4966, "step": 6212 }, { "epoch": 1.7192970317581127, "grad_norm": 0.1708037257194519, "learning_rate": 1e-05, "loss": 0.4819, "step": 6213 }, { "epoch": 1.7195737909084619, "grad_norm": 0.17423538863658905, "learning_rate": 1e-05, "loss": 0.5345, "step": 6214 }, { "epoch": 1.7198505500588113, "grad_norm": 0.16846278309822083, "learning_rate": 1e-05, "loss": 0.497, "step": 6215 }, { "epoch": 1.7201273092091607, "grad_norm": 0.17266733944416046, "learning_rate": 1e-05, "loss": 0.5289, "step": 6216 }, { "epoch": 1.72040406835951, "grad_norm": 0.17001406848430634, "learning_rate": 1e-05, "loss": 0.5074, "step": 6217 }, { "epoch": 1.7206808275098595, "grad_norm": 0.16244007647037506, "learning_rate": 1e-05, "loss": 0.4705, "step": 6218 }, { "epoch": 1.720957586660209, "grad_norm": 0.16991448402404785, "learning_rate": 1e-05, "loss": 0.5092, "step": 6219 }, { "epoch": 1.7212343458105583, "grad_norm": 0.17542314529418945, "learning_rate": 1e-05, "loss": 0.5206, "step": 6220 }, { "epoch": 1.7215111049609078, "grad_norm": 0.17779284715652466, "learning_rate": 1e-05, "loss": 0.5007, "step": 6221 }, { "epoch": 1.7217878641112572, "grad_norm": 0.1734480857849121, "learning_rate": 1e-05, "loss": 0.516, "step": 6222 }, { "epoch": 1.7220646232616066, "grad_norm": 0.17314580082893372, "learning_rate": 1e-05, "loss": 0.4963, "step": 6223 }, { "epoch": 1.722341382411956, "grad_norm": 0.1661941558122635, "learning_rate": 1e-05, "loss": 0.4998, "step": 6224 }, { "epoch": 1.7226181415623054, "grad_norm": 0.17836633324623108, "learning_rate": 1e-05, "loss": 0.5201, "step": 6225 }, { "epoch": 1.7228949007126548, "grad_norm": 0.17625240981578827, "learning_rate": 1e-05, "loss": 0.5364, "step": 6226 }, { "epoch": 1.7231716598630042, "grad_norm": 0.17369462549686432, "learning_rate": 1e-05, "loss": 0.4688, "step": 6227 }, { "epoch": 1.7234484190133537, "grad_norm": 0.1666778028011322, "learning_rate": 1e-05, "loss": 0.5214, "step": 6228 }, { "epoch": 1.723725178163703, "grad_norm": 0.16698835790157318, "learning_rate": 1e-05, "loss": 0.5148, "step": 6229 }, { "epoch": 1.7240019373140525, "grad_norm": 0.1726747751235962, "learning_rate": 1e-05, "loss": 0.4989, "step": 6230 }, { "epoch": 1.724278696464402, "grad_norm": 0.16040341556072235, "learning_rate": 1e-05, "loss": 0.5023, "step": 6231 }, { "epoch": 1.7245554556147513, "grad_norm": 0.1725771725177765, "learning_rate": 1e-05, "loss": 0.5169, "step": 6232 }, { "epoch": 1.7248322147651005, "grad_norm": 0.16963718831539154, "learning_rate": 1e-05, "loss": 0.4773, "step": 6233 }, { "epoch": 1.7251089739154502, "grad_norm": 0.1666351556777954, "learning_rate": 1e-05, "loss": 0.5451, "step": 6234 }, { "epoch": 1.7253857330657993, "grad_norm": 0.18039393424987793, "learning_rate": 1e-05, "loss": 0.4814, "step": 6235 }, { "epoch": 1.725662492216149, "grad_norm": 0.15513423085212708, "learning_rate": 1e-05, "loss": 0.5002, "step": 6236 }, { "epoch": 1.7259392513664982, "grad_norm": 0.16791652143001556, "learning_rate": 1e-05, "loss": 0.5235, "step": 6237 }, { "epoch": 1.7262160105168478, "grad_norm": 0.16865339875221252, "learning_rate": 1e-05, "loss": 0.5134, "step": 6238 }, { "epoch": 1.726492769667197, "grad_norm": 0.1653689593076706, "learning_rate": 1e-05, "loss": 0.5443, "step": 6239 }, { "epoch": 1.7267695288175466, "grad_norm": 0.1728152185678482, "learning_rate": 1e-05, "loss": 0.4973, "step": 6240 }, { "epoch": 1.7270462879678958, "grad_norm": 0.18081779778003693, "learning_rate": 1e-05, "loss": 0.5273, "step": 6241 }, { "epoch": 1.7273230471182455, "grad_norm": 0.17513655126094818, "learning_rate": 1e-05, "loss": 0.5189, "step": 6242 }, { "epoch": 1.7275998062685947, "grad_norm": 0.17601056396961212, "learning_rate": 1e-05, "loss": 0.4754, "step": 6243 }, { "epoch": 1.7278765654189443, "grad_norm": 0.16784974932670593, "learning_rate": 1e-05, "loss": 0.4754, "step": 6244 }, { "epoch": 1.7281533245692935, "grad_norm": 0.1730472445487976, "learning_rate": 1e-05, "loss": 0.4825, "step": 6245 }, { "epoch": 1.7284300837196431, "grad_norm": 0.18788373470306396, "learning_rate": 1e-05, "loss": 0.534, "step": 6246 }, { "epoch": 1.7287068428699923, "grad_norm": 0.1770414263010025, "learning_rate": 1e-05, "loss": 0.5192, "step": 6247 }, { "epoch": 1.728983602020342, "grad_norm": 0.16571131348609924, "learning_rate": 1e-05, "loss": 0.5312, "step": 6248 }, { "epoch": 1.7292603611706912, "grad_norm": 0.17151981592178345, "learning_rate": 1e-05, "loss": 0.4955, "step": 6249 }, { "epoch": 1.7295371203210406, "grad_norm": 0.16342923045158386, "learning_rate": 1e-05, "loss": 0.4913, "step": 6250 }, { "epoch": 1.72981387947139, "grad_norm": 0.16522186994552612, "learning_rate": 1e-05, "loss": 0.4916, "step": 6251 }, { "epoch": 1.7300906386217394, "grad_norm": 0.1644464135169983, "learning_rate": 1e-05, "loss": 0.5092, "step": 6252 }, { "epoch": 1.7303673977720888, "grad_norm": 0.17979303002357483, "learning_rate": 1e-05, "loss": 0.5035, "step": 6253 }, { "epoch": 1.7306441569224382, "grad_norm": 0.17417006194591522, "learning_rate": 1e-05, "loss": 0.5144, "step": 6254 }, { "epoch": 1.7309209160727876, "grad_norm": 0.16740283370018005, "learning_rate": 1e-05, "loss": 0.502, "step": 6255 }, { "epoch": 1.731197675223137, "grad_norm": 0.1758408099412918, "learning_rate": 1e-05, "loss": 0.5389, "step": 6256 }, { "epoch": 1.7314744343734865, "grad_norm": 0.17556889355182648, "learning_rate": 1e-05, "loss": 0.5095, "step": 6257 }, { "epoch": 1.7317511935238359, "grad_norm": 0.1754349172115326, "learning_rate": 1e-05, "loss": 0.5173, "step": 6258 }, { "epoch": 1.7320279526741853, "grad_norm": 0.17332583665847778, "learning_rate": 1e-05, "loss": 0.512, "step": 6259 }, { "epoch": 1.7323047118245347, "grad_norm": 0.16607613861560822, "learning_rate": 1e-05, "loss": 0.4911, "step": 6260 }, { "epoch": 1.7325814709748841, "grad_norm": 0.1686306744813919, "learning_rate": 1e-05, "loss": 0.5231, "step": 6261 }, { "epoch": 1.7328582301252335, "grad_norm": 0.17662955820560455, "learning_rate": 1e-05, "loss": 0.4939, "step": 6262 }, { "epoch": 1.733134989275583, "grad_norm": 0.16780810058116913, "learning_rate": 1e-05, "loss": 0.5101, "step": 6263 }, { "epoch": 1.7334117484259324, "grad_norm": 0.1793198585510254, "learning_rate": 1e-05, "loss": 0.5155, "step": 6264 }, { "epoch": 1.7336885075762818, "grad_norm": 0.17772556841373444, "learning_rate": 1e-05, "loss": 0.5502, "step": 6265 }, { "epoch": 1.7339652667266312, "grad_norm": 0.17299878597259521, "learning_rate": 1e-05, "loss": 0.5109, "step": 6266 }, { "epoch": 1.7342420258769806, "grad_norm": 0.17037752270698547, "learning_rate": 1e-05, "loss": 0.4834, "step": 6267 }, { "epoch": 1.7345187850273298, "grad_norm": 0.17013777792453766, "learning_rate": 1e-05, "loss": 0.4936, "step": 6268 }, { "epoch": 1.7347955441776794, "grad_norm": 0.16729308664798737, "learning_rate": 1e-05, "loss": 0.5006, "step": 6269 }, { "epoch": 1.7350723033280286, "grad_norm": 0.1769695281982422, "learning_rate": 1e-05, "loss": 0.5275, "step": 6270 }, { "epoch": 1.7353490624783783, "grad_norm": 0.1667885184288025, "learning_rate": 1e-05, "loss": 0.5139, "step": 6271 }, { "epoch": 1.7356258216287275, "grad_norm": 0.16894899308681488, "learning_rate": 1e-05, "loss": 0.5052, "step": 6272 }, { "epoch": 1.735902580779077, "grad_norm": 0.17949970066547394, "learning_rate": 1e-05, "loss": 0.5172, "step": 6273 }, { "epoch": 1.7361793399294263, "grad_norm": 0.17554211616516113, "learning_rate": 1e-05, "loss": 0.4959, "step": 6274 }, { "epoch": 1.736456099079776, "grad_norm": 0.1744442731142044, "learning_rate": 1e-05, "loss": 0.4951, "step": 6275 }, { "epoch": 1.7367328582301251, "grad_norm": 0.16416677832603455, "learning_rate": 1e-05, "loss": 0.5115, "step": 6276 }, { "epoch": 1.7370096173804748, "grad_norm": 0.17581364512443542, "learning_rate": 1e-05, "loss": 0.515, "step": 6277 }, { "epoch": 1.737286376530824, "grad_norm": 0.16860972344875336, "learning_rate": 1e-05, "loss": 0.4827, "step": 6278 }, { "epoch": 1.7375631356811736, "grad_norm": 0.1600889265537262, "learning_rate": 1e-05, "loss": 0.5079, "step": 6279 }, { "epoch": 1.7378398948315228, "grad_norm": 0.17616349458694458, "learning_rate": 1e-05, "loss": 0.521, "step": 6280 }, { "epoch": 1.7381166539818724, "grad_norm": 0.17856769263744354, "learning_rate": 1e-05, "loss": 0.5085, "step": 6281 }, { "epoch": 1.7383934131322216, "grad_norm": 0.16265471279621124, "learning_rate": 1e-05, "loss": 0.481, "step": 6282 }, { "epoch": 1.7386701722825713, "grad_norm": 0.17483049631118774, "learning_rate": 1e-05, "loss": 0.492, "step": 6283 }, { "epoch": 1.7389469314329205, "grad_norm": 0.17618755996227264, "learning_rate": 1e-05, "loss": 0.5221, "step": 6284 }, { "epoch": 1.7392236905832699, "grad_norm": 0.18371254205703735, "learning_rate": 1e-05, "loss": 0.5313, "step": 6285 }, { "epoch": 1.7395004497336193, "grad_norm": 0.17306703329086304, "learning_rate": 1e-05, "loss": 0.4978, "step": 6286 }, { "epoch": 1.7397772088839687, "grad_norm": 0.16807155311107635, "learning_rate": 1e-05, "loss": 0.5076, "step": 6287 }, { "epoch": 1.740053968034318, "grad_norm": 0.16838720440864563, "learning_rate": 1e-05, "loss": 0.5159, "step": 6288 }, { "epoch": 1.7403307271846675, "grad_norm": 0.16400596499443054, "learning_rate": 1e-05, "loss": 0.4867, "step": 6289 }, { "epoch": 1.740607486335017, "grad_norm": 0.16988398134708405, "learning_rate": 1e-05, "loss": 0.5073, "step": 6290 }, { "epoch": 1.7408842454853664, "grad_norm": 0.1729060560464859, "learning_rate": 1e-05, "loss": 0.5095, "step": 6291 }, { "epoch": 1.7411610046357158, "grad_norm": 0.16945038735866547, "learning_rate": 1e-05, "loss": 0.5093, "step": 6292 }, { "epoch": 1.7414377637860652, "grad_norm": 0.16927556693553925, "learning_rate": 1e-05, "loss": 0.5064, "step": 6293 }, { "epoch": 1.7417145229364146, "grad_norm": 0.16975750029087067, "learning_rate": 1e-05, "loss": 0.4996, "step": 6294 }, { "epoch": 1.741991282086764, "grad_norm": 0.18077172338962555, "learning_rate": 1e-05, "loss": 0.5374, "step": 6295 }, { "epoch": 1.7422680412371134, "grad_norm": 0.17063409090042114, "learning_rate": 1e-05, "loss": 0.4879, "step": 6296 }, { "epoch": 1.7425448003874628, "grad_norm": 0.16908836364746094, "learning_rate": 1e-05, "loss": 0.4967, "step": 6297 }, { "epoch": 1.7428215595378123, "grad_norm": 0.1690189391374588, "learning_rate": 1e-05, "loss": 0.5095, "step": 6298 }, { "epoch": 1.7430983186881617, "grad_norm": 0.1691601276397705, "learning_rate": 1e-05, "loss": 0.4977, "step": 6299 }, { "epoch": 1.743375077838511, "grad_norm": 0.17523206770420074, "learning_rate": 1e-05, "loss": 0.5095, "step": 6300 }, { "epoch": 1.7436518369888603, "grad_norm": 0.16810952126979828, "learning_rate": 1e-05, "loss": 0.4861, "step": 6301 }, { "epoch": 1.74392859613921, "grad_norm": 0.1603246033191681, "learning_rate": 1e-05, "loss": 0.4682, "step": 6302 }, { "epoch": 1.7442053552895591, "grad_norm": 0.17247290909290314, "learning_rate": 1e-05, "loss": 0.482, "step": 6303 }, { "epoch": 1.7444821144399087, "grad_norm": 0.16460196673870087, "learning_rate": 1e-05, "loss": 0.507, "step": 6304 }, { "epoch": 1.744758873590258, "grad_norm": 0.16950054466724396, "learning_rate": 1e-05, "loss": 0.5063, "step": 6305 }, { "epoch": 1.7450356327406076, "grad_norm": 0.17302267253398895, "learning_rate": 1e-05, "loss": 0.4986, "step": 6306 }, { "epoch": 1.7453123918909568, "grad_norm": 0.17666155099868774, "learning_rate": 1e-05, "loss": 0.483, "step": 6307 }, { "epoch": 1.7455891510413064, "grad_norm": 0.17557118833065033, "learning_rate": 1e-05, "loss": 0.5025, "step": 6308 }, { "epoch": 1.7458659101916556, "grad_norm": 0.17016060650348663, "learning_rate": 1e-05, "loss": 0.5155, "step": 6309 }, { "epoch": 1.7461426693420052, "grad_norm": 0.1742841601371765, "learning_rate": 1e-05, "loss": 0.5037, "step": 6310 }, { "epoch": 1.7464194284923544, "grad_norm": 0.17816729843616486, "learning_rate": 1e-05, "loss": 0.4835, "step": 6311 }, { "epoch": 1.746696187642704, "grad_norm": 0.17483851313591003, "learning_rate": 1e-05, "loss": 0.5398, "step": 6312 }, { "epoch": 1.7469729467930533, "grad_norm": 0.17093980312347412, "learning_rate": 1e-05, "loss": 0.4973, "step": 6313 }, { "epoch": 1.747249705943403, "grad_norm": 0.16997383534908295, "learning_rate": 1e-05, "loss": 0.4917, "step": 6314 }, { "epoch": 1.747526465093752, "grad_norm": 0.17650656402111053, "learning_rate": 1e-05, "loss": 0.4809, "step": 6315 }, { "epoch": 1.7478032242441017, "grad_norm": 0.16653701663017273, "learning_rate": 1e-05, "loss": 0.5083, "step": 6316 }, { "epoch": 1.748079983394451, "grad_norm": 0.17040015757083893, "learning_rate": 1e-05, "loss": 0.5121, "step": 6317 }, { "epoch": 1.7483567425448003, "grad_norm": 0.17662206292152405, "learning_rate": 1e-05, "loss": 0.5471, "step": 6318 }, { "epoch": 1.7486335016951498, "grad_norm": 0.16880561411380768, "learning_rate": 1e-05, "loss": 0.4986, "step": 6319 }, { "epoch": 1.7489102608454992, "grad_norm": 0.16715960204601288, "learning_rate": 1e-05, "loss": 0.4938, "step": 6320 }, { "epoch": 1.7491870199958486, "grad_norm": 0.17708207666873932, "learning_rate": 1e-05, "loss": 0.5232, "step": 6321 }, { "epoch": 1.749463779146198, "grad_norm": 0.1732901781797409, "learning_rate": 1e-05, "loss": 0.4865, "step": 6322 }, { "epoch": 1.7497405382965474, "grad_norm": 0.16644442081451416, "learning_rate": 1e-05, "loss": 0.4964, "step": 6323 }, { "epoch": 1.7500172974468968, "grad_norm": 0.17616118490695953, "learning_rate": 1e-05, "loss": 0.4891, "step": 6324 }, { "epoch": 1.7502940565972462, "grad_norm": 0.16895776987075806, "learning_rate": 1e-05, "loss": 0.4905, "step": 6325 }, { "epoch": 1.7505708157475957, "grad_norm": 0.1643703281879425, "learning_rate": 1e-05, "loss": 0.4923, "step": 6326 }, { "epoch": 1.750847574897945, "grad_norm": 0.1697327345609665, "learning_rate": 1e-05, "loss": 0.5476, "step": 6327 }, { "epoch": 1.7511243340482945, "grad_norm": 0.172739177942276, "learning_rate": 1e-05, "loss": 0.5011, "step": 6328 }, { "epoch": 1.751401093198644, "grad_norm": 0.1734713613986969, "learning_rate": 1e-05, "loss": 0.5085, "step": 6329 }, { "epoch": 1.7516778523489933, "grad_norm": 0.16974346339702606, "learning_rate": 1e-05, "loss": 0.5135, "step": 6330 }, { "epoch": 1.7519546114993427, "grad_norm": 0.17997027933597565, "learning_rate": 1e-05, "loss": 0.5262, "step": 6331 }, { "epoch": 1.7522313706496921, "grad_norm": 0.16544966399669647, "learning_rate": 1e-05, "loss": 0.4782, "step": 6332 }, { "epoch": 1.7525081298000416, "grad_norm": 0.1707121580839157, "learning_rate": 1e-05, "loss": 0.5181, "step": 6333 }, { "epoch": 1.752784888950391, "grad_norm": 0.1759522557258606, "learning_rate": 1e-05, "loss": 0.4879, "step": 6334 }, { "epoch": 1.7530616481007404, "grad_norm": 0.15792830288410187, "learning_rate": 1e-05, "loss": 0.5152, "step": 6335 }, { "epoch": 1.7533384072510896, "grad_norm": 0.17292965948581696, "learning_rate": 1e-05, "loss": 0.5455, "step": 6336 }, { "epoch": 1.7536151664014392, "grad_norm": 0.17321409285068512, "learning_rate": 1e-05, "loss": 0.535, "step": 6337 }, { "epoch": 1.7538919255517884, "grad_norm": 0.17304536700248718, "learning_rate": 1e-05, "loss": 0.5017, "step": 6338 }, { "epoch": 1.754168684702138, "grad_norm": 0.16746515035629272, "learning_rate": 1e-05, "loss": 0.505, "step": 6339 }, { "epoch": 1.7544454438524872, "grad_norm": 0.16670702397823334, "learning_rate": 1e-05, "loss": 0.5207, "step": 6340 }, { "epoch": 1.7547222030028369, "grad_norm": 0.1687997728586197, "learning_rate": 1e-05, "loss": 0.4832, "step": 6341 }, { "epoch": 1.754998962153186, "grad_norm": 0.16881567239761353, "learning_rate": 1e-05, "loss": 0.5036, "step": 6342 }, { "epoch": 1.7552757213035357, "grad_norm": 0.17380070686340332, "learning_rate": 1e-05, "loss": 0.504, "step": 6343 }, { "epoch": 1.755552480453885, "grad_norm": 0.17554247379302979, "learning_rate": 1e-05, "loss": 0.5013, "step": 6344 }, { "epoch": 1.7558292396042345, "grad_norm": 0.1709948629140854, "learning_rate": 1e-05, "loss": 0.5292, "step": 6345 }, { "epoch": 1.7561059987545837, "grad_norm": 0.16531479358673096, "learning_rate": 1e-05, "loss": 0.4781, "step": 6346 }, { "epoch": 1.7563827579049334, "grad_norm": 0.17761707305908203, "learning_rate": 1e-05, "loss": 0.4822, "step": 6347 }, { "epoch": 1.7566595170552826, "grad_norm": 0.1695275604724884, "learning_rate": 1e-05, "loss": 0.509, "step": 6348 }, { "epoch": 1.7569362762056322, "grad_norm": 0.17087402939796448, "learning_rate": 1e-05, "loss": 0.4944, "step": 6349 }, { "epoch": 1.7572130353559814, "grad_norm": 0.17478644847869873, "learning_rate": 1e-05, "loss": 0.5291, "step": 6350 }, { "epoch": 1.757489794506331, "grad_norm": 0.16761040687561035, "learning_rate": 1e-05, "loss": 0.5102, "step": 6351 }, { "epoch": 1.7577665536566802, "grad_norm": 0.1731005311012268, "learning_rate": 1e-05, "loss": 0.5158, "step": 6352 }, { "epoch": 1.7580433128070296, "grad_norm": 0.1713094711303711, "learning_rate": 1e-05, "loss": 0.4821, "step": 6353 }, { "epoch": 1.758320071957379, "grad_norm": 0.1718432456254959, "learning_rate": 1e-05, "loss": 0.4941, "step": 6354 }, { "epoch": 1.7585968311077285, "grad_norm": 0.16518092155456543, "learning_rate": 1e-05, "loss": 0.4727, "step": 6355 }, { "epoch": 1.7588735902580779, "grad_norm": 0.1710795760154724, "learning_rate": 1e-05, "loss": 0.5153, "step": 6356 }, { "epoch": 1.7591503494084273, "grad_norm": 0.17604073882102966, "learning_rate": 1e-05, "loss": 0.4873, "step": 6357 }, { "epoch": 1.7594271085587767, "grad_norm": 0.16942764818668365, "learning_rate": 1e-05, "loss": 0.484, "step": 6358 }, { "epoch": 1.7597038677091261, "grad_norm": 0.17408400774002075, "learning_rate": 1e-05, "loss": 0.487, "step": 6359 }, { "epoch": 1.7599806268594755, "grad_norm": 0.17304271459579468, "learning_rate": 1e-05, "loss": 0.4974, "step": 6360 }, { "epoch": 1.760257386009825, "grad_norm": 0.1653052270412445, "learning_rate": 1e-05, "loss": 0.5268, "step": 6361 }, { "epoch": 1.7605341451601744, "grad_norm": 0.1707390546798706, "learning_rate": 1e-05, "loss": 0.5078, "step": 6362 }, { "epoch": 1.7608109043105238, "grad_norm": 0.17758767306804657, "learning_rate": 1e-05, "loss": 0.4942, "step": 6363 }, { "epoch": 1.7610876634608732, "grad_norm": 0.1710953563451767, "learning_rate": 1e-05, "loss": 0.5118, "step": 6364 }, { "epoch": 1.7613644226112226, "grad_norm": 0.17240911722183228, "learning_rate": 1e-05, "loss": 0.5059, "step": 6365 }, { "epoch": 1.761641181761572, "grad_norm": 0.16956739127635956, "learning_rate": 1e-05, "loss": 0.5018, "step": 6366 }, { "epoch": 1.7619179409119214, "grad_norm": 0.17859488725662231, "learning_rate": 1e-05, "loss": 0.5196, "step": 6367 }, { "epoch": 1.7621947000622709, "grad_norm": 0.17156900465488434, "learning_rate": 1e-05, "loss": 0.5256, "step": 6368 }, { "epoch": 1.76247145921262, "grad_norm": 0.17310220003128052, "learning_rate": 1e-05, "loss": 0.492, "step": 6369 }, { "epoch": 1.7627482183629697, "grad_norm": 0.17425625026226044, "learning_rate": 1e-05, "loss": 0.5012, "step": 6370 }, { "epoch": 1.7630249775133189, "grad_norm": 0.17398016154766083, "learning_rate": 1e-05, "loss": 0.5135, "step": 6371 }, { "epoch": 1.7633017366636685, "grad_norm": 0.16246344149112701, "learning_rate": 1e-05, "loss": 0.4944, "step": 6372 }, { "epoch": 1.7635784958140177, "grad_norm": 0.16755585372447968, "learning_rate": 1e-05, "loss": 0.501, "step": 6373 }, { "epoch": 1.7638552549643673, "grad_norm": 0.17490072548389435, "learning_rate": 1e-05, "loss": 0.4864, "step": 6374 }, { "epoch": 1.7641320141147165, "grad_norm": 0.17218464612960815, "learning_rate": 1e-05, "loss": 0.5023, "step": 6375 }, { "epoch": 1.7644087732650662, "grad_norm": 0.17105060815811157, "learning_rate": 1e-05, "loss": 0.4838, "step": 6376 }, { "epoch": 1.7646855324154154, "grad_norm": 0.1669939160346985, "learning_rate": 1e-05, "loss": 0.486, "step": 6377 }, { "epoch": 1.764962291565765, "grad_norm": 0.17295773327350616, "learning_rate": 1e-05, "loss": 0.5103, "step": 6378 }, { "epoch": 1.7652390507161142, "grad_norm": 0.1735350340604782, "learning_rate": 1e-05, "loss": 0.5243, "step": 6379 }, { "epoch": 1.7655158098664638, "grad_norm": 0.16830286383628845, "learning_rate": 1e-05, "loss": 0.4979, "step": 6380 }, { "epoch": 1.765792569016813, "grad_norm": 0.16174089908599854, "learning_rate": 1e-05, "loss": 0.4842, "step": 6381 }, { "epoch": 1.7660693281671627, "grad_norm": 0.17305773496627808, "learning_rate": 1e-05, "loss": 0.5206, "step": 6382 }, { "epoch": 1.7663460873175119, "grad_norm": 0.16869167983531952, "learning_rate": 1e-05, "loss": 0.5175, "step": 6383 }, { "epoch": 1.7666228464678615, "grad_norm": 0.175107941031456, "learning_rate": 1e-05, "loss": 0.5252, "step": 6384 }, { "epoch": 1.7668996056182107, "grad_norm": 0.1751420497894287, "learning_rate": 1e-05, "loss": 0.5328, "step": 6385 }, { "epoch": 1.7671763647685603, "grad_norm": 0.17032641172409058, "learning_rate": 1e-05, "loss": 0.5071, "step": 6386 }, { "epoch": 1.7674531239189095, "grad_norm": 0.1674332618713379, "learning_rate": 1e-05, "loss": 0.4921, "step": 6387 }, { "epoch": 1.767729883069259, "grad_norm": 0.16877837479114532, "learning_rate": 1e-05, "loss": 0.5343, "step": 6388 }, { "epoch": 1.7680066422196083, "grad_norm": 0.17030338943004608, "learning_rate": 1e-05, "loss": 0.4868, "step": 6389 }, { "epoch": 1.7682834013699578, "grad_norm": 0.1699875295162201, "learning_rate": 1e-05, "loss": 0.5045, "step": 6390 }, { "epoch": 1.7685601605203072, "grad_norm": 0.1680808663368225, "learning_rate": 1e-05, "loss": 0.5078, "step": 6391 }, { "epoch": 1.7688369196706566, "grad_norm": 0.1658780425786972, "learning_rate": 1e-05, "loss": 0.5112, "step": 6392 }, { "epoch": 1.769113678821006, "grad_norm": 0.1764976531267166, "learning_rate": 1e-05, "loss": 0.4988, "step": 6393 }, { "epoch": 1.7693904379713554, "grad_norm": 0.16662631928920746, "learning_rate": 1e-05, "loss": 0.5114, "step": 6394 }, { "epoch": 1.7696671971217048, "grad_norm": 0.16419656574726105, "learning_rate": 1e-05, "loss": 0.5116, "step": 6395 }, { "epoch": 1.7699439562720543, "grad_norm": 0.1747572124004364, "learning_rate": 1e-05, "loss": 0.4953, "step": 6396 }, { "epoch": 1.7702207154224037, "grad_norm": 0.16971619427204132, "learning_rate": 1e-05, "loss": 0.4661, "step": 6397 }, { "epoch": 1.770497474572753, "grad_norm": 0.17109763622283936, "learning_rate": 1e-05, "loss": 0.5146, "step": 6398 }, { "epoch": 1.7707742337231025, "grad_norm": 0.17783544957637787, "learning_rate": 1e-05, "loss": 0.5214, "step": 6399 }, { "epoch": 1.771050992873452, "grad_norm": 0.172516331076622, "learning_rate": 1e-05, "loss": 0.5301, "step": 6400 }, { "epoch": 1.7713277520238013, "grad_norm": 0.17737102508544922, "learning_rate": 1e-05, "loss": 0.5163, "step": 6401 }, { "epoch": 1.7716045111741507, "grad_norm": 0.16868963837623596, "learning_rate": 1e-05, "loss": 0.4882, "step": 6402 }, { "epoch": 1.7718812703245002, "grad_norm": 0.16429980099201202, "learning_rate": 1e-05, "loss": 0.5199, "step": 6403 }, { "epoch": 1.7721580294748494, "grad_norm": 0.17626231908798218, "learning_rate": 1e-05, "loss": 0.5015, "step": 6404 }, { "epoch": 1.772434788625199, "grad_norm": 0.17755459249019623, "learning_rate": 1e-05, "loss": 0.4956, "step": 6405 }, { "epoch": 1.7727115477755482, "grad_norm": 0.1745491325855255, "learning_rate": 1e-05, "loss": 0.5574, "step": 6406 }, { "epoch": 1.7729883069258978, "grad_norm": 0.1799362748861313, "learning_rate": 1e-05, "loss": 0.5253, "step": 6407 }, { "epoch": 1.773265066076247, "grad_norm": 0.1755414754152298, "learning_rate": 1e-05, "loss": 0.4803, "step": 6408 }, { "epoch": 1.7735418252265966, "grad_norm": 0.16733768582344055, "learning_rate": 1e-05, "loss": 0.5134, "step": 6409 }, { "epoch": 1.7738185843769458, "grad_norm": 0.17188942432403564, "learning_rate": 1e-05, "loss": 0.5123, "step": 6410 }, { "epoch": 1.7740953435272955, "grad_norm": 0.16844041645526886, "learning_rate": 1e-05, "loss": 0.5032, "step": 6411 }, { "epoch": 1.7743721026776447, "grad_norm": 0.1718832105398178, "learning_rate": 1e-05, "loss": 0.5071, "step": 6412 }, { "epoch": 1.7746488618279943, "grad_norm": 0.169193834066391, "learning_rate": 1e-05, "loss": 0.5314, "step": 6413 }, { "epoch": 1.7749256209783435, "grad_norm": 0.1757100373506546, "learning_rate": 1e-05, "loss": 0.5, "step": 6414 }, { "epoch": 1.7752023801286931, "grad_norm": 0.1722625344991684, "learning_rate": 1e-05, "loss": 0.5154, "step": 6415 }, { "epoch": 1.7754791392790423, "grad_norm": 0.16319900751113892, "learning_rate": 1e-05, "loss": 0.497, "step": 6416 }, { "epoch": 1.775755898429392, "grad_norm": 0.1773792803287506, "learning_rate": 1e-05, "loss": 0.5082, "step": 6417 }, { "epoch": 1.7760326575797412, "grad_norm": 0.16540169715881348, "learning_rate": 1e-05, "loss": 0.5137, "step": 6418 }, { "epoch": 1.7763094167300908, "grad_norm": 0.16776050627231598, "learning_rate": 1e-05, "loss": 0.5023, "step": 6419 }, { "epoch": 1.77658617588044, "grad_norm": 0.1659727841615677, "learning_rate": 1e-05, "loss": 0.4965, "step": 6420 }, { "epoch": 1.7768629350307894, "grad_norm": 0.17185620963573456, "learning_rate": 1e-05, "loss": 0.4955, "step": 6421 }, { "epoch": 1.7771396941811388, "grad_norm": 0.1659061461687088, "learning_rate": 1e-05, "loss": 0.5138, "step": 6422 }, { "epoch": 1.7774164533314882, "grad_norm": 0.17947843670845032, "learning_rate": 1e-05, "loss": 0.5212, "step": 6423 }, { "epoch": 1.7776932124818376, "grad_norm": 0.17136727273464203, "learning_rate": 1e-05, "loss": 0.4859, "step": 6424 }, { "epoch": 1.777969971632187, "grad_norm": 0.16977132856845856, "learning_rate": 1e-05, "loss": 0.5003, "step": 6425 }, { "epoch": 1.7782467307825365, "grad_norm": 0.16648000478744507, "learning_rate": 1e-05, "loss": 0.5141, "step": 6426 }, { "epoch": 1.778523489932886, "grad_norm": 0.1737332046031952, "learning_rate": 1e-05, "loss": 0.487, "step": 6427 }, { "epoch": 1.7788002490832353, "grad_norm": 0.1649383157491684, "learning_rate": 1e-05, "loss": 0.474, "step": 6428 }, { "epoch": 1.7790770082335847, "grad_norm": 0.16504672169685364, "learning_rate": 1e-05, "loss": 0.4498, "step": 6429 }, { "epoch": 1.7793537673839341, "grad_norm": 0.16821366548538208, "learning_rate": 1e-05, "loss": 0.4643, "step": 6430 }, { "epoch": 1.7796305265342836, "grad_norm": 0.17096664011478424, "learning_rate": 1e-05, "loss": 0.5215, "step": 6431 }, { "epoch": 1.779907285684633, "grad_norm": 0.1672927737236023, "learning_rate": 1e-05, "loss": 0.5138, "step": 6432 }, { "epoch": 1.7801840448349824, "grad_norm": 0.17257104814052582, "learning_rate": 1e-05, "loss": 0.4985, "step": 6433 }, { "epoch": 1.7804608039853318, "grad_norm": 0.17047421634197235, "learning_rate": 1e-05, "loss": 0.476, "step": 6434 }, { "epoch": 1.7807375631356812, "grad_norm": 0.17095036804676056, "learning_rate": 1e-05, "loss": 0.4882, "step": 6435 }, { "epoch": 1.7810143222860306, "grad_norm": 0.16799446940422058, "learning_rate": 1e-05, "loss": 0.5026, "step": 6436 }, { "epoch": 1.78129108143638, "grad_norm": 0.18131396174430847, "learning_rate": 1e-05, "loss": 0.5318, "step": 6437 }, { "epoch": 1.7815678405867295, "grad_norm": 0.17381161451339722, "learning_rate": 1e-05, "loss": 0.4981, "step": 6438 }, { "epoch": 1.7818445997370786, "grad_norm": 0.1826120913028717, "learning_rate": 1e-05, "loss": 0.506, "step": 6439 }, { "epoch": 1.7821213588874283, "grad_norm": 0.1735212653875351, "learning_rate": 1e-05, "loss": 0.5256, "step": 6440 }, { "epoch": 1.7823981180377775, "grad_norm": 0.17447429895401, "learning_rate": 1e-05, "loss": 0.5405, "step": 6441 }, { "epoch": 1.7826748771881271, "grad_norm": 0.16819314658641815, "learning_rate": 1e-05, "loss": 0.5501, "step": 6442 }, { "epoch": 1.7829516363384763, "grad_norm": 0.17534977197647095, "learning_rate": 1e-05, "loss": 0.5015, "step": 6443 }, { "epoch": 1.783228395488826, "grad_norm": 0.1670835167169571, "learning_rate": 1e-05, "loss": 0.4944, "step": 6444 }, { "epoch": 1.7835051546391751, "grad_norm": 0.1655404269695282, "learning_rate": 1e-05, "loss": 0.481, "step": 6445 }, { "epoch": 1.7837819137895248, "grad_norm": 0.17629390954971313, "learning_rate": 1e-05, "loss": 0.5214, "step": 6446 }, { "epoch": 1.784058672939874, "grad_norm": 0.1680379956960678, "learning_rate": 1e-05, "loss": 0.4784, "step": 6447 }, { "epoch": 1.7843354320902236, "grad_norm": 0.16771575808525085, "learning_rate": 1e-05, "loss": 0.5306, "step": 6448 }, { "epoch": 1.7846121912405728, "grad_norm": 0.17356137931346893, "learning_rate": 1e-05, "loss": 0.5259, "step": 6449 }, { "epoch": 1.7848889503909224, "grad_norm": 0.17623931169509888, "learning_rate": 1e-05, "loss": 0.5538, "step": 6450 }, { "epoch": 1.7851657095412716, "grad_norm": 0.1706104725599289, "learning_rate": 1e-05, "loss": 0.5124, "step": 6451 }, { "epoch": 1.7854424686916213, "grad_norm": 0.16628435254096985, "learning_rate": 1e-05, "loss": 0.4915, "step": 6452 }, { "epoch": 1.7857192278419705, "grad_norm": 0.16854621469974518, "learning_rate": 1e-05, "loss": 0.4948, "step": 6453 }, { "epoch": 1.78599598699232, "grad_norm": 0.1720896065235138, "learning_rate": 1e-05, "loss": 0.5196, "step": 6454 }, { "epoch": 1.7862727461426693, "grad_norm": 0.1665019989013672, "learning_rate": 1e-05, "loss": 0.4838, "step": 6455 }, { "epoch": 1.7865495052930187, "grad_norm": 0.16888847947120667, "learning_rate": 1e-05, "loss": 0.4911, "step": 6456 }, { "epoch": 1.7868262644433681, "grad_norm": 0.17822198569774628, "learning_rate": 1e-05, "loss": 0.5051, "step": 6457 }, { "epoch": 1.7871030235937175, "grad_norm": 0.1713322401046753, "learning_rate": 1e-05, "loss": 0.4886, "step": 6458 }, { "epoch": 1.787379782744067, "grad_norm": 0.16582924127578735, "learning_rate": 1e-05, "loss": 0.5187, "step": 6459 }, { "epoch": 1.7876565418944164, "grad_norm": 0.17556558549404144, "learning_rate": 1e-05, "loss": 0.5072, "step": 6460 }, { "epoch": 1.7879333010447658, "grad_norm": 0.17094197869300842, "learning_rate": 1e-05, "loss": 0.499, "step": 6461 }, { "epoch": 1.7882100601951152, "grad_norm": 0.16997767984867096, "learning_rate": 1e-05, "loss": 0.4877, "step": 6462 }, { "epoch": 1.7884868193454646, "grad_norm": 0.16490525007247925, "learning_rate": 1e-05, "loss": 0.503, "step": 6463 }, { "epoch": 1.788763578495814, "grad_norm": 0.17027977108955383, "learning_rate": 1e-05, "loss": 0.5109, "step": 6464 }, { "epoch": 1.7890403376461634, "grad_norm": 0.1752227544784546, "learning_rate": 1e-05, "loss": 0.5023, "step": 6465 }, { "epoch": 1.7893170967965129, "grad_norm": 0.1714944839477539, "learning_rate": 1e-05, "loss": 0.4976, "step": 6466 }, { "epoch": 1.7895938559468623, "grad_norm": 0.17309489846229553, "learning_rate": 1e-05, "loss": 0.4829, "step": 6467 }, { "epoch": 1.7898706150972117, "grad_norm": 0.1766311079263687, "learning_rate": 1e-05, "loss": 0.5155, "step": 6468 }, { "epoch": 1.790147374247561, "grad_norm": 0.16803941130638123, "learning_rate": 1e-05, "loss": 0.4868, "step": 6469 }, { "epoch": 1.7904241333979105, "grad_norm": 0.17283757030963898, "learning_rate": 1e-05, "loss": 0.5116, "step": 6470 }, { "epoch": 1.79070089254826, "grad_norm": 0.1668374240398407, "learning_rate": 1e-05, "loss": 0.5124, "step": 6471 }, { "epoch": 1.7909776516986091, "grad_norm": 0.17400749027729034, "learning_rate": 1e-05, "loss": 0.5124, "step": 6472 }, { "epoch": 1.7912544108489588, "grad_norm": 0.19980177283287048, "learning_rate": 1e-05, "loss": 0.5146, "step": 6473 }, { "epoch": 1.791531169999308, "grad_norm": 0.16543881595134735, "learning_rate": 1e-05, "loss": 0.5108, "step": 6474 }, { "epoch": 1.7918079291496576, "grad_norm": 0.17070676386356354, "learning_rate": 1e-05, "loss": 0.4718, "step": 6475 }, { "epoch": 1.7920846883000068, "grad_norm": 0.17249108850955963, "learning_rate": 1e-05, "loss": 0.517, "step": 6476 }, { "epoch": 1.7923614474503564, "grad_norm": 0.1815020591020584, "learning_rate": 1e-05, "loss": 0.4883, "step": 6477 }, { "epoch": 1.7926382066007056, "grad_norm": 0.17219850420951843, "learning_rate": 1e-05, "loss": 0.4991, "step": 6478 }, { "epoch": 1.7929149657510552, "grad_norm": 0.17640522122383118, "learning_rate": 1e-05, "loss": 0.5078, "step": 6479 }, { "epoch": 1.7931917249014044, "grad_norm": 0.17634446918964386, "learning_rate": 1e-05, "loss": 0.5092, "step": 6480 }, { "epoch": 1.793468484051754, "grad_norm": 0.17429175972938538, "learning_rate": 1e-05, "loss": 0.5026, "step": 6481 }, { "epoch": 1.7937452432021033, "grad_norm": 0.17021730542182922, "learning_rate": 1e-05, "loss": 0.5039, "step": 6482 }, { "epoch": 1.794022002352453, "grad_norm": 0.16818684339523315, "learning_rate": 1e-05, "loss": 0.5057, "step": 6483 }, { "epoch": 1.794298761502802, "grad_norm": 0.17778652906417847, "learning_rate": 1e-05, "loss": 0.5077, "step": 6484 }, { "epoch": 1.7945755206531517, "grad_norm": 0.1810285747051239, "learning_rate": 1e-05, "loss": 0.5018, "step": 6485 }, { "epoch": 1.794852279803501, "grad_norm": 0.17569032311439514, "learning_rate": 1e-05, "loss": 0.522, "step": 6486 }, { "epoch": 1.7951290389538506, "grad_norm": 0.16445691883563995, "learning_rate": 1e-05, "loss": 0.4899, "step": 6487 }, { "epoch": 1.7954057981041998, "grad_norm": 0.17039285600185394, "learning_rate": 1e-05, "loss": 0.4953, "step": 6488 }, { "epoch": 1.7956825572545494, "grad_norm": 0.16224771738052368, "learning_rate": 1e-05, "loss": 0.5059, "step": 6489 }, { "epoch": 1.7959593164048986, "grad_norm": 0.16467459499835968, "learning_rate": 1e-05, "loss": 0.5015, "step": 6490 }, { "epoch": 1.796236075555248, "grad_norm": 0.16441191732883453, "learning_rate": 1e-05, "loss": 0.4784, "step": 6491 }, { "epoch": 1.7965128347055974, "grad_norm": 0.16269488632678986, "learning_rate": 1e-05, "loss": 0.4968, "step": 6492 }, { "epoch": 1.7967895938559468, "grad_norm": 0.16537532210350037, "learning_rate": 1e-05, "loss": 0.5134, "step": 6493 }, { "epoch": 1.7970663530062962, "grad_norm": 0.16908593475818634, "learning_rate": 1e-05, "loss": 0.5074, "step": 6494 }, { "epoch": 1.7973431121566457, "grad_norm": 0.17144963145256042, "learning_rate": 1e-05, "loss": 0.4942, "step": 6495 }, { "epoch": 1.797619871306995, "grad_norm": 0.16959847509860992, "learning_rate": 1e-05, "loss": 0.4885, "step": 6496 }, { "epoch": 1.7978966304573445, "grad_norm": 0.18043731153011322, "learning_rate": 1e-05, "loss": 0.473, "step": 6497 }, { "epoch": 1.798173389607694, "grad_norm": 0.1643078625202179, "learning_rate": 1e-05, "loss": 0.5072, "step": 6498 }, { "epoch": 1.7984501487580433, "grad_norm": 0.18153543770313263, "learning_rate": 1e-05, "loss": 0.4975, "step": 6499 }, { "epoch": 1.7987269079083927, "grad_norm": 0.17338427901268005, "learning_rate": 1e-05, "loss": 0.5123, "step": 6500 }, { "epoch": 1.7990036670587422, "grad_norm": 0.17174449563026428, "learning_rate": 1e-05, "loss": 0.4806, "step": 6501 }, { "epoch": 1.7992804262090916, "grad_norm": 0.1809949427843094, "learning_rate": 1e-05, "loss": 0.5104, "step": 6502 }, { "epoch": 1.799557185359441, "grad_norm": 0.17590714991092682, "learning_rate": 1e-05, "loss": 0.4985, "step": 6503 }, { "epoch": 1.7998339445097904, "grad_norm": 0.17133206129074097, "learning_rate": 1e-05, "loss": 0.5046, "step": 6504 }, { "epoch": 1.8001107036601398, "grad_norm": 0.17583534121513367, "learning_rate": 1e-05, "loss": 0.4952, "step": 6505 }, { "epoch": 1.8003874628104892, "grad_norm": 0.16655805706977844, "learning_rate": 1e-05, "loss": 0.4887, "step": 6506 }, { "epoch": 1.8006642219608384, "grad_norm": 0.17913183569908142, "learning_rate": 1e-05, "loss": 0.5326, "step": 6507 }, { "epoch": 1.800940981111188, "grad_norm": 0.1779475063085556, "learning_rate": 1e-05, "loss": 0.5045, "step": 6508 }, { "epoch": 1.8012177402615372, "grad_norm": 0.16647043824195862, "learning_rate": 1e-05, "loss": 0.4952, "step": 6509 }, { "epoch": 1.8014944994118869, "grad_norm": 0.18474271893501282, "learning_rate": 1e-05, "loss": 0.514, "step": 6510 }, { "epoch": 1.801771258562236, "grad_norm": 0.1634928435087204, "learning_rate": 1e-05, "loss": 0.4836, "step": 6511 }, { "epoch": 1.8020480177125857, "grad_norm": 0.16451483964920044, "learning_rate": 1e-05, "loss": 0.5071, "step": 6512 }, { "epoch": 1.802324776862935, "grad_norm": 0.16035839915275574, "learning_rate": 1e-05, "loss": 0.5252, "step": 6513 }, { "epoch": 1.8026015360132845, "grad_norm": 0.1802075207233429, "learning_rate": 1e-05, "loss": 0.5027, "step": 6514 }, { "epoch": 1.8028782951636337, "grad_norm": 0.1663169115781784, "learning_rate": 1e-05, "loss": 0.5207, "step": 6515 }, { "epoch": 1.8031550543139834, "grad_norm": 0.1738005429506302, "learning_rate": 1e-05, "loss": 0.5278, "step": 6516 }, { "epoch": 1.8034318134643326, "grad_norm": 0.17373721301555634, "learning_rate": 1e-05, "loss": 0.4985, "step": 6517 }, { "epoch": 1.8037085726146822, "grad_norm": 0.164003387093544, "learning_rate": 1e-05, "loss": 0.4917, "step": 6518 }, { "epoch": 1.8039853317650314, "grad_norm": 0.16493971645832062, "learning_rate": 1e-05, "loss": 0.4996, "step": 6519 }, { "epoch": 1.804262090915381, "grad_norm": 0.17041568458080292, "learning_rate": 1e-05, "loss": 0.5283, "step": 6520 }, { "epoch": 1.8045388500657302, "grad_norm": 0.17313772439956665, "learning_rate": 1e-05, "loss": 0.5087, "step": 6521 }, { "epoch": 1.8048156092160799, "grad_norm": 0.17156657576560974, "learning_rate": 1e-05, "loss": 0.4893, "step": 6522 }, { "epoch": 1.805092368366429, "grad_norm": 0.1711944341659546, "learning_rate": 1e-05, "loss": 0.5067, "step": 6523 }, { "epoch": 1.8053691275167785, "grad_norm": 0.17189648747444153, "learning_rate": 1e-05, "loss": 0.5153, "step": 6524 }, { "epoch": 1.8056458866671279, "grad_norm": 0.1732226461172104, "learning_rate": 1e-05, "loss": 0.4812, "step": 6525 }, { "epoch": 1.8059226458174773, "grad_norm": 0.17255082726478577, "learning_rate": 1e-05, "loss": 0.501, "step": 6526 }, { "epoch": 1.8061994049678267, "grad_norm": 0.16763989627361298, "learning_rate": 1e-05, "loss": 0.4768, "step": 6527 }, { "epoch": 1.8064761641181761, "grad_norm": 0.17115835845470428, "learning_rate": 1e-05, "loss": 0.4817, "step": 6528 }, { "epoch": 1.8067529232685255, "grad_norm": 0.16879938542842865, "learning_rate": 1e-05, "loss": 0.5335, "step": 6529 }, { "epoch": 1.807029682418875, "grad_norm": 0.17439761757850647, "learning_rate": 1e-05, "loss": 0.4922, "step": 6530 }, { "epoch": 1.8073064415692244, "grad_norm": 0.1751825362443924, "learning_rate": 1e-05, "loss": 0.4932, "step": 6531 }, { "epoch": 1.8075832007195738, "grad_norm": 0.17802338302135468, "learning_rate": 1e-05, "loss": 0.5029, "step": 6532 }, { "epoch": 1.8078599598699232, "grad_norm": 0.17831555008888245, "learning_rate": 1e-05, "loss": 0.4873, "step": 6533 }, { "epoch": 1.8081367190202726, "grad_norm": 0.16751839220523834, "learning_rate": 1e-05, "loss": 0.4819, "step": 6534 }, { "epoch": 1.808413478170622, "grad_norm": 0.17106583714485168, "learning_rate": 1e-05, "loss": 0.4945, "step": 6535 }, { "epoch": 1.8086902373209715, "grad_norm": 0.1716303676366806, "learning_rate": 1e-05, "loss": 0.5031, "step": 6536 }, { "epoch": 1.8089669964713209, "grad_norm": 0.16581648588180542, "learning_rate": 1e-05, "loss": 0.5044, "step": 6537 }, { "epoch": 1.8092437556216703, "grad_norm": 0.1653251349925995, "learning_rate": 1e-05, "loss": 0.48, "step": 6538 }, { "epoch": 1.8095205147720197, "grad_norm": 0.17167329788208008, "learning_rate": 1e-05, "loss": 0.4907, "step": 6539 }, { "epoch": 1.809797273922369, "grad_norm": 0.17246949672698975, "learning_rate": 1e-05, "loss": 0.5061, "step": 6540 }, { "epoch": 1.8100740330727185, "grad_norm": 0.1721271425485611, "learning_rate": 1e-05, "loss": 0.5431, "step": 6541 }, { "epoch": 1.8103507922230677, "grad_norm": 0.17198649048805237, "learning_rate": 1e-05, "loss": 0.4991, "step": 6542 }, { "epoch": 1.8106275513734174, "grad_norm": 0.17989644408226013, "learning_rate": 1e-05, "loss": 0.5156, "step": 6543 }, { "epoch": 1.8109043105237665, "grad_norm": 0.1786644607782364, "learning_rate": 1e-05, "loss": 0.5154, "step": 6544 }, { "epoch": 1.8111810696741162, "grad_norm": 0.1763104647397995, "learning_rate": 1e-05, "loss": 0.5047, "step": 6545 }, { "epoch": 1.8114578288244654, "grad_norm": 0.16815067827701569, "learning_rate": 1e-05, "loss": 0.5324, "step": 6546 }, { "epoch": 1.811734587974815, "grad_norm": 0.16714507341384888, "learning_rate": 1e-05, "loss": 0.4741, "step": 6547 }, { "epoch": 1.8120113471251642, "grad_norm": 0.17452383041381836, "learning_rate": 1e-05, "loss": 0.5082, "step": 6548 }, { "epoch": 1.8122881062755138, "grad_norm": 0.1839035302400589, "learning_rate": 1e-05, "loss": 0.5105, "step": 6549 }, { "epoch": 1.812564865425863, "grad_norm": 0.16887153685092926, "learning_rate": 1e-05, "loss": 0.4933, "step": 6550 }, { "epoch": 1.8128416245762127, "grad_norm": 0.16185668110847473, "learning_rate": 1e-05, "loss": 0.5065, "step": 6551 }, { "epoch": 1.8131183837265619, "grad_norm": 0.1676091104745865, "learning_rate": 1e-05, "loss": 0.5021, "step": 6552 }, { "epoch": 1.8133951428769115, "grad_norm": 0.16507655382156372, "learning_rate": 1e-05, "loss": 0.4979, "step": 6553 }, { "epoch": 1.8136719020272607, "grad_norm": 0.1746402382850647, "learning_rate": 1e-05, "loss": 0.4974, "step": 6554 }, { "epoch": 1.8139486611776103, "grad_norm": 0.16591234505176544, "learning_rate": 1e-05, "loss": 0.4968, "step": 6555 }, { "epoch": 1.8142254203279595, "grad_norm": 0.1609531044960022, "learning_rate": 1e-05, "loss": 0.5077, "step": 6556 }, { "epoch": 1.8145021794783092, "grad_norm": 0.16276243329048157, "learning_rate": 1e-05, "loss": 0.4874, "step": 6557 }, { "epoch": 1.8147789386286584, "grad_norm": 0.1714557707309723, "learning_rate": 1e-05, "loss": 0.521, "step": 6558 }, { "epoch": 1.8150556977790078, "grad_norm": 0.16360566020011902, "learning_rate": 1e-05, "loss": 0.5125, "step": 6559 }, { "epoch": 1.8153324569293572, "grad_norm": 0.1756782978773117, "learning_rate": 1e-05, "loss": 0.4898, "step": 6560 }, { "epoch": 1.8156092160797066, "grad_norm": 0.16977868974208832, "learning_rate": 1e-05, "loss": 0.5044, "step": 6561 }, { "epoch": 1.815885975230056, "grad_norm": 0.1713433563709259, "learning_rate": 1e-05, "loss": 0.5018, "step": 6562 }, { "epoch": 1.8161627343804054, "grad_norm": 0.18517006933689117, "learning_rate": 1e-05, "loss": 0.5383, "step": 6563 }, { "epoch": 1.8164394935307548, "grad_norm": 0.17016273736953735, "learning_rate": 1e-05, "loss": 0.4936, "step": 6564 }, { "epoch": 1.8167162526811043, "grad_norm": 0.17074370384216309, "learning_rate": 1e-05, "loss": 0.539, "step": 6565 }, { "epoch": 1.8169930118314537, "grad_norm": 0.17210406064987183, "learning_rate": 1e-05, "loss": 0.5124, "step": 6566 }, { "epoch": 1.817269770981803, "grad_norm": 0.1694697141647339, "learning_rate": 1e-05, "loss": 0.4817, "step": 6567 }, { "epoch": 1.8175465301321525, "grad_norm": 0.16814365983009338, "learning_rate": 1e-05, "loss": 0.4943, "step": 6568 }, { "epoch": 1.817823289282502, "grad_norm": 0.172588050365448, "learning_rate": 1e-05, "loss": 0.5225, "step": 6569 }, { "epoch": 1.8181000484328513, "grad_norm": 0.17766112089157104, "learning_rate": 1e-05, "loss": 0.5345, "step": 6570 }, { "epoch": 1.8183768075832007, "grad_norm": 0.17971506714820862, "learning_rate": 1e-05, "loss": 0.494, "step": 6571 }, { "epoch": 1.8186535667335502, "grad_norm": 0.16979674994945526, "learning_rate": 1e-05, "loss": 0.4898, "step": 6572 }, { "epoch": 1.8189303258838996, "grad_norm": 0.16391919553279877, "learning_rate": 1e-05, "loss": 0.4886, "step": 6573 }, { "epoch": 1.819207085034249, "grad_norm": 0.16892483830451965, "learning_rate": 1e-05, "loss": 0.5184, "step": 6574 }, { "epoch": 1.8194838441845982, "grad_norm": 0.1732088178396225, "learning_rate": 1e-05, "loss": 0.4999, "step": 6575 }, { "epoch": 1.8197606033349478, "grad_norm": 0.1695895940065384, "learning_rate": 1e-05, "loss": 0.5086, "step": 6576 }, { "epoch": 1.820037362485297, "grad_norm": 0.18224036693572998, "learning_rate": 1e-05, "loss": 0.5308, "step": 6577 }, { "epoch": 1.8203141216356467, "grad_norm": 0.17206256091594696, "learning_rate": 1e-05, "loss": 0.4966, "step": 6578 }, { "epoch": 1.8205908807859958, "grad_norm": 0.16415809094905853, "learning_rate": 1e-05, "loss": 0.5044, "step": 6579 }, { "epoch": 1.8208676399363455, "grad_norm": 0.178815558552742, "learning_rate": 1e-05, "loss": 0.5102, "step": 6580 }, { "epoch": 1.8211443990866947, "grad_norm": 0.17444060742855072, "learning_rate": 1e-05, "loss": 0.489, "step": 6581 }, { "epoch": 1.8214211582370443, "grad_norm": 0.16829970479011536, "learning_rate": 1e-05, "loss": 0.4889, "step": 6582 }, { "epoch": 1.8216979173873935, "grad_norm": 0.16890408098697662, "learning_rate": 1e-05, "loss": 0.4808, "step": 6583 }, { "epoch": 1.8219746765377431, "grad_norm": 0.16514086723327637, "learning_rate": 1e-05, "loss": 0.4903, "step": 6584 }, { "epoch": 1.8222514356880923, "grad_norm": 0.16443315148353577, "learning_rate": 1e-05, "loss": 0.5041, "step": 6585 }, { "epoch": 1.822528194838442, "grad_norm": 0.168974831700325, "learning_rate": 1e-05, "loss": 0.4632, "step": 6586 }, { "epoch": 1.8228049539887912, "grad_norm": 0.17497827112674713, "learning_rate": 1e-05, "loss": 0.5169, "step": 6587 }, { "epoch": 1.8230817131391408, "grad_norm": 0.16165849566459656, "learning_rate": 1e-05, "loss": 0.482, "step": 6588 }, { "epoch": 1.82335847228949, "grad_norm": 0.16651898622512817, "learning_rate": 1e-05, "loss": 0.4963, "step": 6589 }, { "epoch": 1.8236352314398396, "grad_norm": 0.17299798130989075, "learning_rate": 1e-05, "loss": 0.4782, "step": 6590 }, { "epoch": 1.8239119905901888, "grad_norm": 0.1667848825454712, "learning_rate": 1e-05, "loss": 0.5195, "step": 6591 }, { "epoch": 1.8241887497405385, "grad_norm": 0.17522922158241272, "learning_rate": 1e-05, "loss": 0.5061, "step": 6592 }, { "epoch": 1.8244655088908877, "grad_norm": 0.17753629386425018, "learning_rate": 1e-05, "loss": 0.4969, "step": 6593 }, { "epoch": 1.824742268041237, "grad_norm": 0.17177274823188782, "learning_rate": 1e-05, "loss": 0.5261, "step": 6594 }, { "epoch": 1.8250190271915865, "grad_norm": 0.17279672622680664, "learning_rate": 1e-05, "loss": 0.529, "step": 6595 }, { "epoch": 1.825295786341936, "grad_norm": 0.16893035173416138, "learning_rate": 1e-05, "loss": 0.4932, "step": 6596 }, { "epoch": 1.8255725454922853, "grad_norm": 0.16453605890274048, "learning_rate": 1e-05, "loss": 0.4893, "step": 6597 }, { "epoch": 1.8258493046426347, "grad_norm": 0.1686197966337204, "learning_rate": 1e-05, "loss": 0.5017, "step": 6598 }, { "epoch": 1.8261260637929841, "grad_norm": 0.17164993286132812, "learning_rate": 1e-05, "loss": 0.4878, "step": 6599 }, { "epoch": 1.8264028229433336, "grad_norm": 0.16957925260066986, "learning_rate": 1e-05, "loss": 0.5013, "step": 6600 }, { "epoch": 1.826679582093683, "grad_norm": 0.184022456407547, "learning_rate": 1e-05, "loss": 0.5165, "step": 6601 }, { "epoch": 1.8269563412440324, "grad_norm": 0.17513953149318695, "learning_rate": 1e-05, "loss": 0.4917, "step": 6602 }, { "epoch": 1.8272331003943818, "grad_norm": 0.17856121063232422, "learning_rate": 1e-05, "loss": 0.487, "step": 6603 }, { "epoch": 1.8275098595447312, "grad_norm": 0.18248172104358673, "learning_rate": 1e-05, "loss": 0.5304, "step": 6604 }, { "epoch": 1.8277866186950806, "grad_norm": 0.17507420480251312, "learning_rate": 1e-05, "loss": 0.4907, "step": 6605 }, { "epoch": 1.82806337784543, "grad_norm": 0.18070504069328308, "learning_rate": 1e-05, "loss": 0.5288, "step": 6606 }, { "epoch": 1.8283401369957795, "grad_norm": 0.1785566657781601, "learning_rate": 1e-05, "loss": 0.5185, "step": 6607 }, { "epoch": 1.8286168961461289, "grad_norm": 0.1732756793498993, "learning_rate": 1e-05, "loss": 0.4758, "step": 6608 }, { "epoch": 1.8288936552964783, "grad_norm": 0.1679460108280182, "learning_rate": 1e-05, "loss": 0.4771, "step": 6609 }, { "epoch": 1.8291704144468275, "grad_norm": 0.16309447586536407, "learning_rate": 1e-05, "loss": 0.514, "step": 6610 }, { "epoch": 1.8294471735971771, "grad_norm": 0.1696697324514389, "learning_rate": 1e-05, "loss": 0.5047, "step": 6611 }, { "epoch": 1.8297239327475263, "grad_norm": 0.1717231273651123, "learning_rate": 1e-05, "loss": 0.4987, "step": 6612 }, { "epoch": 1.830000691897876, "grad_norm": 0.16493982076644897, "learning_rate": 1e-05, "loss": 0.4743, "step": 6613 }, { "epoch": 1.8302774510482251, "grad_norm": 0.16896219551563263, "learning_rate": 1e-05, "loss": 0.4799, "step": 6614 }, { "epoch": 1.8305542101985748, "grad_norm": 0.16555699706077576, "learning_rate": 1e-05, "loss": 0.5115, "step": 6615 }, { "epoch": 1.830830969348924, "grad_norm": 0.17504216730594635, "learning_rate": 1e-05, "loss": 0.5264, "step": 6616 }, { "epoch": 1.8311077284992736, "grad_norm": 0.176222026348114, "learning_rate": 1e-05, "loss": 0.519, "step": 6617 }, { "epoch": 1.8313844876496228, "grad_norm": 0.17043818533420563, "learning_rate": 1e-05, "loss": 0.5238, "step": 6618 }, { "epoch": 1.8316612467999724, "grad_norm": 0.17689163982868195, "learning_rate": 1e-05, "loss": 0.4996, "step": 6619 }, { "epoch": 1.8319380059503216, "grad_norm": 0.16526509821414948, "learning_rate": 1e-05, "loss": 0.4971, "step": 6620 }, { "epoch": 1.8322147651006713, "grad_norm": 0.17556846141815186, "learning_rate": 1e-05, "loss": 0.5328, "step": 6621 }, { "epoch": 1.8324915242510205, "grad_norm": 0.16920319199562073, "learning_rate": 1e-05, "loss": 0.5026, "step": 6622 }, { "epoch": 1.83276828340137, "grad_norm": 0.18902944028377533, "learning_rate": 1e-05, "loss": 0.5054, "step": 6623 }, { "epoch": 1.8330450425517193, "grad_norm": 0.17143772542476654, "learning_rate": 1e-05, "loss": 0.5102, "step": 6624 }, { "epoch": 1.833321801702069, "grad_norm": 0.17112931609153748, "learning_rate": 1e-05, "loss": 0.5166, "step": 6625 }, { "epoch": 1.8335985608524181, "grad_norm": 0.18437552452087402, "learning_rate": 1e-05, "loss": 0.5475, "step": 6626 }, { "epoch": 1.8338753200027675, "grad_norm": 0.16769346594810486, "learning_rate": 1e-05, "loss": 0.478, "step": 6627 }, { "epoch": 1.834152079153117, "grad_norm": 0.1683136373758316, "learning_rate": 1e-05, "loss": 0.4952, "step": 6628 }, { "epoch": 1.8344288383034664, "grad_norm": 0.16431285440921783, "learning_rate": 1e-05, "loss": 0.4786, "step": 6629 }, { "epoch": 1.8347055974538158, "grad_norm": 0.17424048483371735, "learning_rate": 1e-05, "loss": 0.4902, "step": 6630 }, { "epoch": 1.8349823566041652, "grad_norm": 0.16460928320884705, "learning_rate": 1e-05, "loss": 0.4788, "step": 6631 }, { "epoch": 1.8352591157545146, "grad_norm": 0.16917888820171356, "learning_rate": 1e-05, "loss": 0.503, "step": 6632 }, { "epoch": 1.835535874904864, "grad_norm": 0.16351303458213806, "learning_rate": 1e-05, "loss": 0.4906, "step": 6633 }, { "epoch": 1.8358126340552134, "grad_norm": 0.17595893144607544, "learning_rate": 1e-05, "loss": 0.5144, "step": 6634 }, { "epoch": 1.8360893932055629, "grad_norm": 0.17492303252220154, "learning_rate": 1e-05, "loss": 0.4898, "step": 6635 }, { "epoch": 1.8363661523559123, "grad_norm": 0.1635165959596634, "learning_rate": 1e-05, "loss": 0.4824, "step": 6636 }, { "epoch": 1.8366429115062617, "grad_norm": 0.1660250425338745, "learning_rate": 1e-05, "loss": 0.5019, "step": 6637 }, { "epoch": 1.836919670656611, "grad_norm": 0.17162016034126282, "learning_rate": 1e-05, "loss": 0.4997, "step": 6638 }, { "epoch": 1.8371964298069605, "grad_norm": 0.16540928184986115, "learning_rate": 1e-05, "loss": 0.4991, "step": 6639 }, { "epoch": 1.83747318895731, "grad_norm": 0.17505063116550446, "learning_rate": 1e-05, "loss": 0.4988, "step": 6640 }, { "epoch": 1.8377499481076593, "grad_norm": 0.17054483294487, "learning_rate": 1e-05, "loss": 0.5225, "step": 6641 }, { "epoch": 1.8380267072580088, "grad_norm": 0.18084025382995605, "learning_rate": 1e-05, "loss": 0.5305, "step": 6642 }, { "epoch": 1.8383034664083582, "grad_norm": 0.16776615381240845, "learning_rate": 1e-05, "loss": 0.4858, "step": 6643 }, { "epoch": 1.8385802255587076, "grad_norm": 0.17138786613941193, "learning_rate": 1e-05, "loss": 0.513, "step": 6644 }, { "epoch": 1.8388569847090568, "grad_norm": 0.1695219725370407, "learning_rate": 1e-05, "loss": 0.5221, "step": 6645 }, { "epoch": 1.8391337438594064, "grad_norm": 0.1771034151315689, "learning_rate": 1e-05, "loss": 0.5024, "step": 6646 }, { "epoch": 1.8394105030097556, "grad_norm": 0.16730040311813354, "learning_rate": 1e-05, "loss": 0.5267, "step": 6647 }, { "epoch": 1.8396872621601053, "grad_norm": 0.1711491346359253, "learning_rate": 1e-05, "loss": 0.5001, "step": 6648 }, { "epoch": 1.8399640213104544, "grad_norm": 0.17217639088630676, "learning_rate": 1e-05, "loss": 0.4942, "step": 6649 }, { "epoch": 1.840240780460804, "grad_norm": 0.16960008442401886, "learning_rate": 1e-05, "loss": 0.4821, "step": 6650 }, { "epoch": 1.8405175396111533, "grad_norm": 0.17863833904266357, "learning_rate": 1e-05, "loss": 0.5223, "step": 6651 }, { "epoch": 1.840794298761503, "grad_norm": 0.17570120096206665, "learning_rate": 1e-05, "loss": 0.4931, "step": 6652 }, { "epoch": 1.841071057911852, "grad_norm": 0.17437152564525604, "learning_rate": 1e-05, "loss": 0.4974, "step": 6653 }, { "epoch": 1.8413478170622017, "grad_norm": 0.1704869121313095, "learning_rate": 1e-05, "loss": 0.5173, "step": 6654 }, { "epoch": 1.841624576212551, "grad_norm": 0.17269431054592133, "learning_rate": 1e-05, "loss": 0.5114, "step": 6655 }, { "epoch": 1.8419013353629006, "grad_norm": 0.17291125655174255, "learning_rate": 1e-05, "loss": 0.4787, "step": 6656 }, { "epoch": 1.8421780945132498, "grad_norm": 0.16152511537075043, "learning_rate": 1e-05, "loss": 0.4863, "step": 6657 }, { "epoch": 1.8424548536635994, "grad_norm": 0.17400480806827545, "learning_rate": 1e-05, "loss": 0.5277, "step": 6658 }, { "epoch": 1.8427316128139486, "grad_norm": 0.1767897754907608, "learning_rate": 1e-05, "loss": 0.513, "step": 6659 }, { "epoch": 1.8430083719642982, "grad_norm": 0.1742643266916275, "learning_rate": 1e-05, "loss": 0.5101, "step": 6660 }, { "epoch": 1.8432851311146474, "grad_norm": 0.17352023720741272, "learning_rate": 1e-05, "loss": 0.5478, "step": 6661 }, { "epoch": 1.8435618902649968, "grad_norm": 0.16897717118263245, "learning_rate": 1e-05, "loss": 0.491, "step": 6662 }, { "epoch": 1.8438386494153463, "grad_norm": 0.1779177039861679, "learning_rate": 1e-05, "loss": 0.5033, "step": 6663 }, { "epoch": 1.8441154085656957, "grad_norm": 0.16907243430614471, "learning_rate": 1e-05, "loss": 0.5043, "step": 6664 }, { "epoch": 1.844392167716045, "grad_norm": 0.16639897227287292, "learning_rate": 1e-05, "loss": 0.4887, "step": 6665 }, { "epoch": 1.8446689268663945, "grad_norm": 0.17250661551952362, "learning_rate": 1e-05, "loss": 0.4867, "step": 6666 }, { "epoch": 1.844945686016744, "grad_norm": 0.1708008348941803, "learning_rate": 1e-05, "loss": 0.5114, "step": 6667 }, { "epoch": 1.8452224451670933, "grad_norm": 0.17030270397663116, "learning_rate": 1e-05, "loss": 0.4787, "step": 6668 }, { "epoch": 1.8454992043174427, "grad_norm": 0.168690487742424, "learning_rate": 1e-05, "loss": 0.4918, "step": 6669 }, { "epoch": 1.8457759634677922, "grad_norm": 0.1770293414592743, "learning_rate": 1e-05, "loss": 0.4914, "step": 6670 }, { "epoch": 1.8460527226181416, "grad_norm": 0.1814899742603302, "learning_rate": 1e-05, "loss": 0.5521, "step": 6671 }, { "epoch": 1.846329481768491, "grad_norm": 0.16993583738803864, "learning_rate": 1e-05, "loss": 0.5287, "step": 6672 }, { "epoch": 1.8466062409188404, "grad_norm": 0.17323975265026093, "learning_rate": 1e-05, "loss": 0.5143, "step": 6673 }, { "epoch": 1.8468830000691898, "grad_norm": 0.17647139728069305, "learning_rate": 1e-05, "loss": 0.5102, "step": 6674 }, { "epoch": 1.8471597592195392, "grad_norm": 0.1827564388513565, "learning_rate": 1e-05, "loss": 0.5371, "step": 6675 }, { "epoch": 1.8474365183698886, "grad_norm": 0.16821050643920898, "learning_rate": 1e-05, "loss": 0.4927, "step": 6676 }, { "epoch": 1.847713277520238, "grad_norm": 0.16234458982944489, "learning_rate": 1e-05, "loss": 0.4985, "step": 6677 }, { "epoch": 1.8479900366705873, "grad_norm": 0.1711266040802002, "learning_rate": 1e-05, "loss": 0.5275, "step": 6678 }, { "epoch": 1.848266795820937, "grad_norm": 0.1781141608953476, "learning_rate": 1e-05, "loss": 0.5167, "step": 6679 }, { "epoch": 1.848543554971286, "grad_norm": 0.1729665845632553, "learning_rate": 1e-05, "loss": 0.4631, "step": 6680 }, { "epoch": 1.8488203141216357, "grad_norm": 0.17515365779399872, "learning_rate": 1e-05, "loss": 0.4921, "step": 6681 }, { "epoch": 1.849097073271985, "grad_norm": 0.16598346829414368, "learning_rate": 1e-05, "loss": 0.5054, "step": 6682 }, { "epoch": 1.8493738324223346, "grad_norm": 0.1755456030368805, "learning_rate": 1e-05, "loss": 0.5163, "step": 6683 }, { "epoch": 1.8496505915726837, "grad_norm": 0.16835622489452362, "learning_rate": 1e-05, "loss": 0.4954, "step": 6684 }, { "epoch": 1.8499273507230334, "grad_norm": 0.17369574308395386, "learning_rate": 1e-05, "loss": 0.5294, "step": 6685 }, { "epoch": 1.8502041098733826, "grad_norm": 0.1610938459634781, "learning_rate": 1e-05, "loss": 0.4862, "step": 6686 }, { "epoch": 1.8504808690237322, "grad_norm": 0.16791386902332306, "learning_rate": 1e-05, "loss": 0.4865, "step": 6687 }, { "epoch": 1.8507576281740814, "grad_norm": 0.16398020088672638, "learning_rate": 1e-05, "loss": 0.4928, "step": 6688 }, { "epoch": 1.851034387324431, "grad_norm": 0.1696365922689438, "learning_rate": 1e-05, "loss": 0.4674, "step": 6689 }, { "epoch": 1.8513111464747802, "grad_norm": 0.16957035660743713, "learning_rate": 1e-05, "loss": 0.4916, "step": 6690 }, { "epoch": 1.8515879056251299, "grad_norm": 0.16850674152374268, "learning_rate": 1e-05, "loss": 0.5207, "step": 6691 }, { "epoch": 1.851864664775479, "grad_norm": 0.18464823067188263, "learning_rate": 1e-05, "loss": 0.4915, "step": 6692 }, { "epoch": 1.8521414239258287, "grad_norm": 0.16847334802150726, "learning_rate": 1e-05, "loss": 0.4857, "step": 6693 }, { "epoch": 1.852418183076178, "grad_norm": 0.16955116391181946, "learning_rate": 1e-05, "loss": 0.5231, "step": 6694 }, { "epoch": 1.8526949422265275, "grad_norm": 0.17693789303302765, "learning_rate": 1e-05, "loss": 0.4887, "step": 6695 }, { "epoch": 1.8529717013768767, "grad_norm": 0.16745707392692566, "learning_rate": 1e-05, "loss": 0.507, "step": 6696 }, { "epoch": 1.8532484605272261, "grad_norm": 0.16598904132843018, "learning_rate": 1e-05, "loss": 0.4862, "step": 6697 }, { "epoch": 1.8535252196775756, "grad_norm": 0.17735619843006134, "learning_rate": 1e-05, "loss": 0.5047, "step": 6698 }, { "epoch": 1.853801978827925, "grad_norm": 0.17965976893901825, "learning_rate": 1e-05, "loss": 0.5023, "step": 6699 }, { "epoch": 1.8540787379782744, "grad_norm": 0.16598273813724518, "learning_rate": 1e-05, "loss": 0.496, "step": 6700 }, { "epoch": 1.8543554971286238, "grad_norm": 0.181862473487854, "learning_rate": 1e-05, "loss": 0.5078, "step": 6701 }, { "epoch": 1.8546322562789732, "grad_norm": 0.17603372037410736, "learning_rate": 1e-05, "loss": 0.5313, "step": 6702 }, { "epoch": 1.8549090154293226, "grad_norm": 0.1717301607131958, "learning_rate": 1e-05, "loss": 0.482, "step": 6703 }, { "epoch": 1.855185774579672, "grad_norm": 0.17016620934009552, "learning_rate": 1e-05, "loss": 0.4986, "step": 6704 }, { "epoch": 1.8554625337300215, "grad_norm": 0.1692950576543808, "learning_rate": 1e-05, "loss": 0.4932, "step": 6705 }, { "epoch": 1.8557392928803709, "grad_norm": 0.16508187353610992, "learning_rate": 1e-05, "loss": 0.4767, "step": 6706 }, { "epoch": 1.8560160520307203, "grad_norm": 0.17106792330741882, "learning_rate": 1e-05, "loss": 0.5106, "step": 6707 }, { "epoch": 1.8562928111810697, "grad_norm": 0.18407095968723297, "learning_rate": 1e-05, "loss": 0.4884, "step": 6708 }, { "epoch": 1.8565695703314191, "grad_norm": 0.16536560654640198, "learning_rate": 1e-05, "loss": 0.4648, "step": 6709 }, { "epoch": 1.8568463294817685, "grad_norm": 0.17455440759658813, "learning_rate": 1e-05, "loss": 0.5056, "step": 6710 }, { "epoch": 1.857123088632118, "grad_norm": 0.17202875018119812, "learning_rate": 1e-05, "loss": 0.499, "step": 6711 }, { "epoch": 1.8573998477824674, "grad_norm": 0.174351766705513, "learning_rate": 1e-05, "loss": 0.5035, "step": 6712 }, { "epoch": 1.8576766069328166, "grad_norm": 0.17144775390625, "learning_rate": 1e-05, "loss": 0.5121, "step": 6713 }, { "epoch": 1.8579533660831662, "grad_norm": 0.1760314702987671, "learning_rate": 1e-05, "loss": 0.5068, "step": 6714 }, { "epoch": 1.8582301252335154, "grad_norm": 0.17045293748378754, "learning_rate": 1e-05, "loss": 0.5435, "step": 6715 }, { "epoch": 1.858506884383865, "grad_norm": 0.1712949573993683, "learning_rate": 1e-05, "loss": 0.4902, "step": 6716 }, { "epoch": 1.8587836435342142, "grad_norm": 0.17494475841522217, "learning_rate": 1e-05, "loss": 0.4906, "step": 6717 }, { "epoch": 1.8590604026845639, "grad_norm": 0.18036483228206635, "learning_rate": 1e-05, "loss": 0.5191, "step": 6718 }, { "epoch": 1.859337161834913, "grad_norm": 0.1704304814338684, "learning_rate": 1e-05, "loss": 0.5247, "step": 6719 }, { "epoch": 1.8596139209852627, "grad_norm": 0.1693689525127411, "learning_rate": 1e-05, "loss": 0.4991, "step": 6720 }, { "epoch": 1.8598906801356119, "grad_norm": 0.18201246857643127, "learning_rate": 1e-05, "loss": 0.4955, "step": 6721 }, { "epoch": 1.8601674392859615, "grad_norm": 0.17766070365905762, "learning_rate": 1e-05, "loss": 0.5057, "step": 6722 }, { "epoch": 1.8604441984363107, "grad_norm": 0.1670549511909485, "learning_rate": 1e-05, "loss": 0.5063, "step": 6723 }, { "epoch": 1.8607209575866603, "grad_norm": 0.17637160420417786, "learning_rate": 1e-05, "loss": 0.4922, "step": 6724 }, { "epoch": 1.8609977167370095, "grad_norm": 0.16560162603855133, "learning_rate": 1e-05, "loss": 0.4953, "step": 6725 }, { "epoch": 1.8612744758873592, "grad_norm": 0.17252109944820404, "learning_rate": 1e-05, "loss": 0.4968, "step": 6726 }, { "epoch": 1.8615512350377084, "grad_norm": 0.17262408137321472, "learning_rate": 1e-05, "loss": 0.4904, "step": 6727 }, { "epoch": 1.861827994188058, "grad_norm": 0.18315304815769196, "learning_rate": 1e-05, "loss": 0.5122, "step": 6728 }, { "epoch": 1.8621047533384072, "grad_norm": 0.17318616807460785, "learning_rate": 1e-05, "loss": 0.512, "step": 6729 }, { "epoch": 1.8623815124887566, "grad_norm": 0.17325009405612946, "learning_rate": 1e-05, "loss": 0.5137, "step": 6730 }, { "epoch": 1.862658271639106, "grad_norm": 0.17824605107307434, "learning_rate": 1e-05, "loss": 0.5329, "step": 6731 }, { "epoch": 1.8629350307894554, "grad_norm": 0.17192180454730988, "learning_rate": 1e-05, "loss": 0.5179, "step": 6732 }, { "epoch": 1.8632117899398049, "grad_norm": 0.18464988470077515, "learning_rate": 1e-05, "loss": 0.5302, "step": 6733 }, { "epoch": 1.8634885490901543, "grad_norm": 0.16646474599838257, "learning_rate": 1e-05, "loss": 0.4769, "step": 6734 }, { "epoch": 1.8637653082405037, "grad_norm": 0.1689535528421402, "learning_rate": 1e-05, "loss": 0.5061, "step": 6735 }, { "epoch": 1.864042067390853, "grad_norm": 0.17281809449195862, "learning_rate": 1e-05, "loss": 0.5109, "step": 6736 }, { "epoch": 1.8643188265412025, "grad_norm": 0.17257492244243622, "learning_rate": 1e-05, "loss": 0.5204, "step": 6737 }, { "epoch": 1.864595585691552, "grad_norm": 0.1779623180627823, "learning_rate": 1e-05, "loss": 0.5193, "step": 6738 }, { "epoch": 1.8648723448419013, "grad_norm": 0.17025333642959595, "learning_rate": 1e-05, "loss": 0.5283, "step": 6739 }, { "epoch": 1.8651491039922508, "grad_norm": 0.16819562017917633, "learning_rate": 1e-05, "loss": 0.5017, "step": 6740 }, { "epoch": 1.8654258631426002, "grad_norm": 0.17520268261432648, "learning_rate": 1e-05, "loss": 0.5041, "step": 6741 }, { "epoch": 1.8657026222929496, "grad_norm": 0.17823240160942078, "learning_rate": 1e-05, "loss": 0.5449, "step": 6742 }, { "epoch": 1.865979381443299, "grad_norm": 0.16670849919319153, "learning_rate": 1e-05, "loss": 0.4956, "step": 6743 }, { "epoch": 1.8662561405936484, "grad_norm": 0.17097963392734528, "learning_rate": 1e-05, "loss": 0.5204, "step": 6744 }, { "epoch": 1.8665328997439978, "grad_norm": 0.17518381774425507, "learning_rate": 1e-05, "loss": 0.5383, "step": 6745 }, { "epoch": 1.8668096588943472, "grad_norm": 0.1658421754837036, "learning_rate": 1e-05, "loss": 0.5109, "step": 6746 }, { "epoch": 1.8670864180446967, "grad_norm": 0.17476727068424225, "learning_rate": 1e-05, "loss": 0.4845, "step": 6747 }, { "epoch": 1.8673631771950459, "grad_norm": 0.16837731003761292, "learning_rate": 1e-05, "loss": 0.5094, "step": 6748 }, { "epoch": 1.8676399363453955, "grad_norm": 0.16658003628253937, "learning_rate": 1e-05, "loss": 0.4977, "step": 6749 }, { "epoch": 1.8679166954957447, "grad_norm": 0.16911789774894714, "learning_rate": 1e-05, "loss": 0.4825, "step": 6750 }, { "epoch": 1.8681934546460943, "grad_norm": 0.17192009091377258, "learning_rate": 1e-05, "loss": 0.496, "step": 6751 }, { "epoch": 1.8684702137964435, "grad_norm": 0.16735784709453583, "learning_rate": 1e-05, "loss": 0.4933, "step": 6752 }, { "epoch": 1.8687469729467932, "grad_norm": 0.16751883924007416, "learning_rate": 1e-05, "loss": 0.4715, "step": 6753 }, { "epoch": 1.8690237320971423, "grad_norm": 0.17265771329402924, "learning_rate": 1e-05, "loss": 0.4955, "step": 6754 }, { "epoch": 1.869300491247492, "grad_norm": 0.16995461285114288, "learning_rate": 1e-05, "loss": 0.4984, "step": 6755 }, { "epoch": 1.8695772503978412, "grad_norm": 0.1726040542125702, "learning_rate": 1e-05, "loss": 0.5011, "step": 6756 }, { "epoch": 1.8698540095481908, "grad_norm": 0.16905716061592102, "learning_rate": 1e-05, "loss": 0.4907, "step": 6757 }, { "epoch": 1.87013076869854, "grad_norm": 0.1726750284433365, "learning_rate": 1e-05, "loss": 0.477, "step": 6758 }, { "epoch": 1.8704075278488896, "grad_norm": 0.16850383579730988, "learning_rate": 1e-05, "loss": 0.5, "step": 6759 }, { "epoch": 1.8706842869992388, "grad_norm": 0.17099712789058685, "learning_rate": 1e-05, "loss": 0.5079, "step": 6760 }, { "epoch": 1.8709610461495885, "grad_norm": 0.17507892847061157, "learning_rate": 1e-05, "loss": 0.4965, "step": 6761 }, { "epoch": 1.8712378052999377, "grad_norm": 0.16306282579898834, "learning_rate": 1e-05, "loss": 0.4874, "step": 6762 }, { "epoch": 1.8715145644502873, "grad_norm": 0.16662326455116272, "learning_rate": 1e-05, "loss": 0.4923, "step": 6763 }, { "epoch": 1.8717913236006365, "grad_norm": 0.1714225858449936, "learning_rate": 1e-05, "loss": 0.5212, "step": 6764 }, { "epoch": 1.872068082750986, "grad_norm": 0.18077436089515686, "learning_rate": 1e-05, "loss": 0.5182, "step": 6765 }, { "epoch": 1.8723448419013353, "grad_norm": 0.17168231308460236, "learning_rate": 1e-05, "loss": 0.5039, "step": 6766 }, { "epoch": 1.8726216010516847, "grad_norm": 0.18298646807670593, "learning_rate": 1e-05, "loss": 0.4862, "step": 6767 }, { "epoch": 1.8728983602020342, "grad_norm": 0.17309704422950745, "learning_rate": 1e-05, "loss": 0.5267, "step": 6768 }, { "epoch": 1.8731751193523836, "grad_norm": 0.1637178361415863, "learning_rate": 1e-05, "loss": 0.483, "step": 6769 }, { "epoch": 1.873451878502733, "grad_norm": 0.1682642698287964, "learning_rate": 1e-05, "loss": 0.5, "step": 6770 }, { "epoch": 1.8737286376530824, "grad_norm": 0.1758577525615692, "learning_rate": 1e-05, "loss": 0.5247, "step": 6771 }, { "epoch": 1.8740053968034318, "grad_norm": 0.17022842168807983, "learning_rate": 1e-05, "loss": 0.4735, "step": 6772 }, { "epoch": 1.8742821559537812, "grad_norm": 0.16443747282028198, "learning_rate": 1e-05, "loss": 0.5083, "step": 6773 }, { "epoch": 1.8745589151041306, "grad_norm": 0.16718575358390808, "learning_rate": 1e-05, "loss": 0.4958, "step": 6774 }, { "epoch": 1.87483567425448, "grad_norm": 0.1711537390947342, "learning_rate": 1e-05, "loss": 0.5009, "step": 6775 }, { "epoch": 1.8751124334048295, "grad_norm": 0.17752303183078766, "learning_rate": 1e-05, "loss": 0.4876, "step": 6776 }, { "epoch": 1.8753891925551789, "grad_norm": 0.16681750118732452, "learning_rate": 1e-05, "loss": 0.5173, "step": 6777 }, { "epoch": 1.8756659517055283, "grad_norm": 0.1728724092245102, "learning_rate": 1e-05, "loss": 0.5092, "step": 6778 }, { "epoch": 1.8759427108558777, "grad_norm": 0.168376162648201, "learning_rate": 1e-05, "loss": 0.5033, "step": 6779 }, { "epoch": 1.8762194700062271, "grad_norm": 0.16674727201461792, "learning_rate": 1e-05, "loss": 0.5281, "step": 6780 }, { "epoch": 1.8764962291565763, "grad_norm": 0.1758211851119995, "learning_rate": 1e-05, "loss": 0.5317, "step": 6781 }, { "epoch": 1.876772988306926, "grad_norm": 0.17733584344387054, "learning_rate": 1e-05, "loss": 0.4887, "step": 6782 }, { "epoch": 1.8770497474572752, "grad_norm": 0.15923592448234558, "learning_rate": 1e-05, "loss": 0.4792, "step": 6783 }, { "epoch": 1.8773265066076248, "grad_norm": 0.16284406185150146, "learning_rate": 1e-05, "loss": 0.4791, "step": 6784 }, { "epoch": 1.877603265757974, "grad_norm": 0.16616277396678925, "learning_rate": 1e-05, "loss": 0.4921, "step": 6785 }, { "epoch": 1.8778800249083236, "grad_norm": 0.17742027342319489, "learning_rate": 1e-05, "loss": 0.5032, "step": 6786 }, { "epoch": 1.8781567840586728, "grad_norm": 0.17492859065532684, "learning_rate": 1e-05, "loss": 0.5086, "step": 6787 }, { "epoch": 1.8784335432090224, "grad_norm": 0.16835883259773254, "learning_rate": 1e-05, "loss": 0.4993, "step": 6788 }, { "epoch": 1.8787103023593716, "grad_norm": 0.17954027652740479, "learning_rate": 1e-05, "loss": 0.5001, "step": 6789 }, { "epoch": 1.8789870615097213, "grad_norm": 0.16390261054039001, "learning_rate": 1e-05, "loss": 0.5098, "step": 6790 }, { "epoch": 1.8792638206600705, "grad_norm": 0.17787446081638336, "learning_rate": 1e-05, "loss": 0.5115, "step": 6791 }, { "epoch": 1.87954057981042, "grad_norm": 0.16900335252285004, "learning_rate": 1e-05, "loss": 0.5077, "step": 6792 }, { "epoch": 1.8798173389607693, "grad_norm": 0.17347390949726105, "learning_rate": 1e-05, "loss": 0.5279, "step": 6793 }, { "epoch": 1.880094098111119, "grad_norm": 0.1734606772661209, "learning_rate": 1e-05, "loss": 0.4975, "step": 6794 }, { "epoch": 1.8803708572614681, "grad_norm": 0.17719942331314087, "learning_rate": 1e-05, "loss": 0.5106, "step": 6795 }, { "epoch": 1.8806476164118178, "grad_norm": 0.17556826770305634, "learning_rate": 1e-05, "loss": 0.5287, "step": 6796 }, { "epoch": 1.880924375562167, "grad_norm": 0.17251256108283997, "learning_rate": 1e-05, "loss": 0.5081, "step": 6797 }, { "epoch": 1.8812011347125166, "grad_norm": 0.174111470580101, "learning_rate": 1e-05, "loss": 0.5254, "step": 6798 }, { "epoch": 1.8814778938628658, "grad_norm": 0.17688804864883423, "learning_rate": 1e-05, "loss": 0.4731, "step": 6799 }, { "epoch": 1.8817546530132152, "grad_norm": 0.17325879633426666, "learning_rate": 1e-05, "loss": 0.5125, "step": 6800 }, { "epoch": 1.8820314121635646, "grad_norm": 0.17151105403900146, "learning_rate": 1e-05, "loss": 0.5244, "step": 6801 }, { "epoch": 1.882308171313914, "grad_norm": 0.1824830174446106, "learning_rate": 1e-05, "loss": 0.4934, "step": 6802 }, { "epoch": 1.8825849304642635, "grad_norm": 0.16927959024906158, "learning_rate": 1e-05, "loss": 0.5129, "step": 6803 }, { "epoch": 1.8828616896146129, "grad_norm": 0.17903093993663788, "learning_rate": 1e-05, "loss": 0.4988, "step": 6804 }, { "epoch": 1.8831384487649623, "grad_norm": 0.16472265124320984, "learning_rate": 1e-05, "loss": 0.4814, "step": 6805 }, { "epoch": 1.8834152079153117, "grad_norm": 0.16846731305122375, "learning_rate": 1e-05, "loss": 0.4971, "step": 6806 }, { "epoch": 1.883691967065661, "grad_norm": 0.17657259106636047, "learning_rate": 1e-05, "loss": 0.5111, "step": 6807 }, { "epoch": 1.8839687262160105, "grad_norm": 0.1897525042295456, "learning_rate": 1e-05, "loss": 0.5226, "step": 6808 }, { "epoch": 1.88424548536636, "grad_norm": 0.17206096649169922, "learning_rate": 1e-05, "loss": 0.4897, "step": 6809 }, { "epoch": 1.8845222445167094, "grad_norm": 0.16145116090774536, "learning_rate": 1e-05, "loss": 0.4955, "step": 6810 }, { "epoch": 1.8847990036670588, "grad_norm": 0.1580570936203003, "learning_rate": 1e-05, "loss": 0.4844, "step": 6811 }, { "epoch": 1.8850757628174082, "grad_norm": 0.1631481647491455, "learning_rate": 1e-05, "loss": 0.4909, "step": 6812 }, { "epoch": 1.8853525219677576, "grad_norm": 0.17916929721832275, "learning_rate": 1e-05, "loss": 0.4816, "step": 6813 }, { "epoch": 1.885629281118107, "grad_norm": 0.17131704092025757, "learning_rate": 1e-05, "loss": 0.4982, "step": 6814 }, { "epoch": 1.8859060402684564, "grad_norm": 0.16809797286987305, "learning_rate": 1e-05, "loss": 0.47, "step": 6815 }, { "epoch": 1.8861827994188056, "grad_norm": 0.16808564960956573, "learning_rate": 1e-05, "loss": 0.5115, "step": 6816 }, { "epoch": 1.8864595585691553, "grad_norm": 0.17329540848731995, "learning_rate": 1e-05, "loss": 0.5259, "step": 6817 }, { "epoch": 1.8867363177195045, "grad_norm": 0.17803886532783508, "learning_rate": 1e-05, "loss": 0.5135, "step": 6818 }, { "epoch": 1.887013076869854, "grad_norm": 0.17054763436317444, "learning_rate": 1e-05, "loss": 0.4905, "step": 6819 }, { "epoch": 1.8872898360202033, "grad_norm": 0.16158384084701538, "learning_rate": 1e-05, "loss": 0.4995, "step": 6820 }, { "epoch": 1.887566595170553, "grad_norm": 0.16699004173278809, "learning_rate": 1e-05, "loss": 0.531, "step": 6821 }, { "epoch": 1.8878433543209021, "grad_norm": 0.16779056191444397, "learning_rate": 1e-05, "loss": 0.4845, "step": 6822 }, { "epoch": 1.8881201134712517, "grad_norm": 0.1663236916065216, "learning_rate": 1e-05, "loss": 0.4957, "step": 6823 }, { "epoch": 1.888396872621601, "grad_norm": 0.17934906482696533, "learning_rate": 1e-05, "loss": 0.5149, "step": 6824 }, { "epoch": 1.8886736317719506, "grad_norm": 0.1827680468559265, "learning_rate": 1e-05, "loss": 0.5183, "step": 6825 }, { "epoch": 1.8889503909222998, "grad_norm": 0.16998803615570068, "learning_rate": 1e-05, "loss": 0.469, "step": 6826 }, { "epoch": 1.8892271500726494, "grad_norm": 0.16665047407150269, "learning_rate": 1e-05, "loss": 0.5316, "step": 6827 }, { "epoch": 1.8895039092229986, "grad_norm": 0.1639798879623413, "learning_rate": 1e-05, "loss": 0.4833, "step": 6828 }, { "epoch": 1.8897806683733482, "grad_norm": 0.1724008023738861, "learning_rate": 1e-05, "loss": 0.4555, "step": 6829 }, { "epoch": 1.8900574275236974, "grad_norm": 0.16701041162014008, "learning_rate": 1e-05, "loss": 0.5303, "step": 6830 }, { "epoch": 1.890334186674047, "grad_norm": 0.18158480525016785, "learning_rate": 1e-05, "loss": 0.5146, "step": 6831 }, { "epoch": 1.8906109458243963, "grad_norm": 0.16735628247261047, "learning_rate": 1e-05, "loss": 0.4985, "step": 6832 }, { "epoch": 1.8908877049747457, "grad_norm": 0.17038285732269287, "learning_rate": 1e-05, "loss": 0.5122, "step": 6833 }, { "epoch": 1.891164464125095, "grad_norm": 0.1749151349067688, "learning_rate": 1e-05, "loss": 0.5068, "step": 6834 }, { "epoch": 1.8914412232754445, "grad_norm": 0.16763199865818024, "learning_rate": 1e-05, "loss": 0.537, "step": 6835 }, { "epoch": 1.891717982425794, "grad_norm": 0.180137038230896, "learning_rate": 1e-05, "loss": 0.537, "step": 6836 }, { "epoch": 1.8919947415761433, "grad_norm": 0.17502233386039734, "learning_rate": 1e-05, "loss": 0.5026, "step": 6837 }, { "epoch": 1.8922715007264928, "grad_norm": 0.16907159984111786, "learning_rate": 1e-05, "loss": 0.4823, "step": 6838 }, { "epoch": 1.8925482598768422, "grad_norm": 0.1765422821044922, "learning_rate": 1e-05, "loss": 0.4733, "step": 6839 }, { "epoch": 1.8928250190271916, "grad_norm": 0.1675274819135666, "learning_rate": 1e-05, "loss": 0.5009, "step": 6840 }, { "epoch": 1.893101778177541, "grad_norm": 0.17125487327575684, "learning_rate": 1e-05, "loss": 0.5, "step": 6841 }, { "epoch": 1.8933785373278904, "grad_norm": 0.16666823625564575, "learning_rate": 1e-05, "loss": 0.4887, "step": 6842 }, { "epoch": 1.8936552964782398, "grad_norm": 0.17001448571681976, "learning_rate": 1e-05, "loss": 0.5157, "step": 6843 }, { "epoch": 1.8939320556285892, "grad_norm": 0.16296732425689697, "learning_rate": 1e-05, "loss": 0.504, "step": 6844 }, { "epoch": 1.8942088147789387, "grad_norm": 0.17999015748500824, "learning_rate": 1e-05, "loss": 0.5082, "step": 6845 }, { "epoch": 1.894485573929288, "grad_norm": 0.16520218551158905, "learning_rate": 1e-05, "loss": 0.5024, "step": 6846 }, { "epoch": 1.8947623330796375, "grad_norm": 0.16316622495651245, "learning_rate": 1e-05, "loss": 0.5004, "step": 6847 }, { "epoch": 1.895039092229987, "grad_norm": 0.1719667613506317, "learning_rate": 1e-05, "loss": 0.4919, "step": 6848 }, { "epoch": 1.8953158513803363, "grad_norm": 0.1721828430891037, "learning_rate": 1e-05, "loss": 0.5299, "step": 6849 }, { "epoch": 1.8955926105306857, "grad_norm": 0.17449049651622772, "learning_rate": 1e-05, "loss": 0.4941, "step": 6850 }, { "epoch": 1.895869369681035, "grad_norm": 0.1683373749256134, "learning_rate": 1e-05, "loss": 0.4927, "step": 6851 }, { "epoch": 1.8961461288313846, "grad_norm": 0.17758533358573914, "learning_rate": 1e-05, "loss": 0.51, "step": 6852 }, { "epoch": 1.8964228879817338, "grad_norm": 0.17559179663658142, "learning_rate": 1e-05, "loss": 0.5139, "step": 6853 }, { "epoch": 1.8966996471320834, "grad_norm": 0.17315270006656647, "learning_rate": 1e-05, "loss": 0.5172, "step": 6854 }, { "epoch": 1.8969764062824326, "grad_norm": 0.17055167257785797, "learning_rate": 1e-05, "loss": 0.5082, "step": 6855 }, { "epoch": 1.8972531654327822, "grad_norm": 0.1759941279888153, "learning_rate": 1e-05, "loss": 0.484, "step": 6856 }, { "epoch": 1.8975299245831314, "grad_norm": 0.15927563607692719, "learning_rate": 1e-05, "loss": 0.4962, "step": 6857 }, { "epoch": 1.897806683733481, "grad_norm": 0.1732201874256134, "learning_rate": 1e-05, "loss": 0.5044, "step": 6858 }, { "epoch": 1.8980834428838302, "grad_norm": 0.16884131729602814, "learning_rate": 1e-05, "loss": 0.5079, "step": 6859 }, { "epoch": 1.8983602020341799, "grad_norm": 0.16740643978118896, "learning_rate": 1e-05, "loss": 0.4879, "step": 6860 }, { "epoch": 1.898636961184529, "grad_norm": 0.17231519520282745, "learning_rate": 1e-05, "loss": 0.5082, "step": 6861 }, { "epoch": 1.8989137203348787, "grad_norm": 0.18361572921276093, "learning_rate": 1e-05, "loss": 0.5171, "step": 6862 }, { "epoch": 1.899190479485228, "grad_norm": 0.17438724637031555, "learning_rate": 1e-05, "loss": 0.4817, "step": 6863 }, { "epoch": 1.8994672386355775, "grad_norm": 0.16841387748718262, "learning_rate": 1e-05, "loss": 0.5128, "step": 6864 }, { "epoch": 1.8997439977859267, "grad_norm": 0.17486616969108582, "learning_rate": 1e-05, "loss": 0.5081, "step": 6865 }, { "epoch": 1.9000207569362764, "grad_norm": 0.16955846548080444, "learning_rate": 1e-05, "loss": 0.502, "step": 6866 }, { "epoch": 1.9002975160866256, "grad_norm": 0.16865456104278564, "learning_rate": 1e-05, "loss": 0.5084, "step": 6867 }, { "epoch": 1.900574275236975, "grad_norm": 0.17397360503673553, "learning_rate": 1e-05, "loss": 0.5049, "step": 6868 }, { "epoch": 1.9008510343873244, "grad_norm": 0.17512409389019012, "learning_rate": 1e-05, "loss": 0.5, "step": 6869 }, { "epoch": 1.9011277935376738, "grad_norm": 0.1664109230041504, "learning_rate": 1e-05, "loss": 0.517, "step": 6870 }, { "epoch": 1.9014045526880232, "grad_norm": 0.17268244922161102, "learning_rate": 1e-05, "loss": 0.4975, "step": 6871 }, { "epoch": 1.9016813118383726, "grad_norm": 0.16860030591487885, "learning_rate": 1e-05, "loss": 0.4891, "step": 6872 }, { "epoch": 1.901958070988722, "grad_norm": 0.17016978561878204, "learning_rate": 1e-05, "loss": 0.522, "step": 6873 }, { "epoch": 1.9022348301390715, "grad_norm": 0.17268089950084686, "learning_rate": 1e-05, "loss": 0.4907, "step": 6874 }, { "epoch": 1.9025115892894209, "grad_norm": 0.18135017156600952, "learning_rate": 1e-05, "loss": 0.5118, "step": 6875 }, { "epoch": 1.9027883484397703, "grad_norm": 0.17624740302562714, "learning_rate": 1e-05, "loss": 0.4861, "step": 6876 }, { "epoch": 1.9030651075901197, "grad_norm": 0.1635412722826004, "learning_rate": 1e-05, "loss": 0.4851, "step": 6877 }, { "epoch": 1.9033418667404691, "grad_norm": 0.17844104766845703, "learning_rate": 1e-05, "loss": 0.5183, "step": 6878 }, { "epoch": 1.9036186258908185, "grad_norm": 0.17236381769180298, "learning_rate": 1e-05, "loss": 0.5256, "step": 6879 }, { "epoch": 1.903895385041168, "grad_norm": 0.17218422889709473, "learning_rate": 1e-05, "loss": 0.5059, "step": 6880 }, { "epoch": 1.9041721441915174, "grad_norm": 0.18679864704608917, "learning_rate": 1e-05, "loss": 0.5265, "step": 6881 }, { "epoch": 1.9044489033418668, "grad_norm": 0.17179740965366364, "learning_rate": 1e-05, "loss": 0.5285, "step": 6882 }, { "epoch": 1.9047256624922162, "grad_norm": 0.16961027681827545, "learning_rate": 1e-05, "loss": 0.4723, "step": 6883 }, { "epoch": 1.9050024216425654, "grad_norm": 0.18077968060970306, "learning_rate": 1e-05, "loss": 0.5169, "step": 6884 }, { "epoch": 1.905279180792915, "grad_norm": 0.17231371998786926, "learning_rate": 1e-05, "loss": 0.5, "step": 6885 }, { "epoch": 1.9055559399432642, "grad_norm": 0.17430981993675232, "learning_rate": 1e-05, "loss": 0.5099, "step": 6886 }, { "epoch": 1.9058326990936139, "grad_norm": 0.1627783328294754, "learning_rate": 1e-05, "loss": 0.5238, "step": 6887 }, { "epoch": 1.906109458243963, "grad_norm": 0.1777431070804596, "learning_rate": 1e-05, "loss": 0.5037, "step": 6888 }, { "epoch": 1.9063862173943127, "grad_norm": 0.16541780531406403, "learning_rate": 1e-05, "loss": 0.5132, "step": 6889 }, { "epoch": 1.9066629765446619, "grad_norm": 0.17349451780319214, "learning_rate": 1e-05, "loss": 0.5049, "step": 6890 }, { "epoch": 1.9069397356950115, "grad_norm": 0.16933095455169678, "learning_rate": 1e-05, "loss": 0.4745, "step": 6891 }, { "epoch": 1.9072164948453607, "grad_norm": 0.17592956125736237, "learning_rate": 1e-05, "loss": 0.4668, "step": 6892 }, { "epoch": 1.9074932539957103, "grad_norm": 0.16793982684612274, "learning_rate": 1e-05, "loss": 0.5262, "step": 6893 }, { "epoch": 1.9077700131460595, "grad_norm": 0.17210756242275238, "learning_rate": 1e-05, "loss": 0.49, "step": 6894 }, { "epoch": 1.9080467722964092, "grad_norm": 0.16942045092582703, "learning_rate": 1e-05, "loss": 0.5308, "step": 6895 }, { "epoch": 1.9083235314467584, "grad_norm": 0.1689607948064804, "learning_rate": 1e-05, "loss": 0.4962, "step": 6896 }, { "epoch": 1.908600290597108, "grad_norm": 0.16803641617298126, "learning_rate": 1e-05, "loss": 0.4982, "step": 6897 }, { "epoch": 1.9088770497474572, "grad_norm": 0.17370133101940155, "learning_rate": 1e-05, "loss": 0.5016, "step": 6898 }, { "epoch": 1.9091538088978068, "grad_norm": 0.16850923001766205, "learning_rate": 1e-05, "loss": 0.4918, "step": 6899 }, { "epoch": 1.909430568048156, "grad_norm": 0.17654825747013092, "learning_rate": 1e-05, "loss": 0.498, "step": 6900 }, { "epoch": 1.9097073271985057, "grad_norm": 0.16763371229171753, "learning_rate": 1e-05, "loss": 0.5014, "step": 6901 }, { "epoch": 1.9099840863488549, "grad_norm": 0.16651314496994019, "learning_rate": 1e-05, "loss": 0.5085, "step": 6902 }, { "epoch": 1.9102608454992043, "grad_norm": 0.1685929149389267, "learning_rate": 1e-05, "loss": 0.5088, "step": 6903 }, { "epoch": 1.9105376046495537, "grad_norm": 0.16832417249679565, "learning_rate": 1e-05, "loss": 0.4905, "step": 6904 }, { "epoch": 1.910814363799903, "grad_norm": 0.16483725607395172, "learning_rate": 1e-05, "loss": 0.5277, "step": 6905 }, { "epoch": 1.9110911229502525, "grad_norm": 0.16624599695205688, "learning_rate": 1e-05, "loss": 0.4848, "step": 6906 }, { "epoch": 1.911367882100602, "grad_norm": 0.17193295061588287, "learning_rate": 1e-05, "loss": 0.5136, "step": 6907 }, { "epoch": 1.9116446412509513, "grad_norm": 0.16941525042057037, "learning_rate": 1e-05, "loss": 0.4848, "step": 6908 }, { "epoch": 1.9119214004013008, "grad_norm": 0.17368027567863464, "learning_rate": 1e-05, "loss": 0.4897, "step": 6909 }, { "epoch": 1.9121981595516502, "grad_norm": 0.1789715439081192, "learning_rate": 1e-05, "loss": 0.4961, "step": 6910 }, { "epoch": 1.9124749187019996, "grad_norm": 0.1735939383506775, "learning_rate": 1e-05, "loss": 0.4789, "step": 6911 }, { "epoch": 1.912751677852349, "grad_norm": 0.17593248188495636, "learning_rate": 1e-05, "loss": 0.4971, "step": 6912 }, { "epoch": 1.9130284370026984, "grad_norm": 0.1712503731250763, "learning_rate": 1e-05, "loss": 0.5148, "step": 6913 }, { "epoch": 1.9133051961530478, "grad_norm": 0.18055716156959534, "learning_rate": 1e-05, "loss": 0.4928, "step": 6914 }, { "epoch": 1.9135819553033973, "grad_norm": 0.16927474737167358, "learning_rate": 1e-05, "loss": 0.5238, "step": 6915 }, { "epoch": 1.9138587144537467, "grad_norm": 0.17563897371292114, "learning_rate": 1e-05, "loss": 0.5086, "step": 6916 }, { "epoch": 1.914135473604096, "grad_norm": 0.165673166513443, "learning_rate": 1e-05, "loss": 0.5122, "step": 6917 }, { "epoch": 1.9144122327544455, "grad_norm": 0.18777164816856384, "learning_rate": 1e-05, "loss": 0.4855, "step": 6918 }, { "epoch": 1.9146889919047947, "grad_norm": 0.16808052361011505, "learning_rate": 1e-05, "loss": 0.4866, "step": 6919 }, { "epoch": 1.9149657510551443, "grad_norm": 0.16895130276679993, "learning_rate": 1e-05, "loss": 0.4832, "step": 6920 }, { "epoch": 1.9152425102054935, "grad_norm": 0.17730775475502014, "learning_rate": 1e-05, "loss": 0.4941, "step": 6921 }, { "epoch": 1.9155192693558432, "grad_norm": 0.17318664491176605, "learning_rate": 1e-05, "loss": 0.5015, "step": 6922 }, { "epoch": 1.9157960285061923, "grad_norm": 0.1713665872812271, "learning_rate": 1e-05, "loss": 0.5239, "step": 6923 }, { "epoch": 1.916072787656542, "grad_norm": 0.16929695010185242, "learning_rate": 1e-05, "loss": 0.4718, "step": 6924 }, { "epoch": 1.9163495468068912, "grad_norm": 0.16848310828208923, "learning_rate": 1e-05, "loss": 0.4815, "step": 6925 }, { "epoch": 1.9166263059572408, "grad_norm": 0.1765384078025818, "learning_rate": 1e-05, "loss": 0.5052, "step": 6926 }, { "epoch": 1.91690306510759, "grad_norm": 0.17094585299491882, "learning_rate": 1e-05, "loss": 0.4851, "step": 6927 }, { "epoch": 1.9171798242579396, "grad_norm": 0.17093785107135773, "learning_rate": 1e-05, "loss": 0.5165, "step": 6928 }, { "epoch": 1.9174565834082888, "grad_norm": 0.16695399582386017, "learning_rate": 1e-05, "loss": 0.4959, "step": 6929 }, { "epoch": 1.9177333425586385, "grad_norm": 0.1747526377439499, "learning_rate": 1e-05, "loss": 0.4926, "step": 6930 }, { "epoch": 1.9180101017089877, "grad_norm": 0.1695503145456314, "learning_rate": 1e-05, "loss": 0.4821, "step": 6931 }, { "epoch": 1.9182868608593373, "grad_norm": 0.16669173538684845, "learning_rate": 1e-05, "loss": 0.48, "step": 6932 }, { "epoch": 1.9185636200096865, "grad_norm": 0.17562362551689148, "learning_rate": 1e-05, "loss": 0.5451, "step": 6933 }, { "epoch": 1.9188403791600361, "grad_norm": 0.16841454803943634, "learning_rate": 1e-05, "loss": 0.4752, "step": 6934 }, { "epoch": 1.9191171383103853, "grad_norm": 0.17587986588478088, "learning_rate": 1e-05, "loss": 0.5195, "step": 6935 }, { "epoch": 1.9193938974607347, "grad_norm": 0.1718565672636032, "learning_rate": 1e-05, "loss": 0.5234, "step": 6936 }, { "epoch": 1.9196706566110842, "grad_norm": 0.16607855260372162, "learning_rate": 1e-05, "loss": 0.5077, "step": 6937 }, { "epoch": 1.9199474157614336, "grad_norm": 0.16201049089431763, "learning_rate": 1e-05, "loss": 0.4997, "step": 6938 }, { "epoch": 1.920224174911783, "grad_norm": 0.1757412701845169, "learning_rate": 1e-05, "loss": 0.4851, "step": 6939 }, { "epoch": 1.9205009340621324, "grad_norm": 0.16688136756420135, "learning_rate": 1e-05, "loss": 0.4921, "step": 6940 }, { "epoch": 1.9207776932124818, "grad_norm": 0.16815133392810822, "learning_rate": 1e-05, "loss": 0.4968, "step": 6941 }, { "epoch": 1.9210544523628312, "grad_norm": 0.16976416110992432, "learning_rate": 1e-05, "loss": 0.4801, "step": 6942 }, { "epoch": 1.9213312115131806, "grad_norm": 0.1729305386543274, "learning_rate": 1e-05, "loss": 0.5069, "step": 6943 }, { "epoch": 1.92160797066353, "grad_norm": 0.17223906517028809, "learning_rate": 1e-05, "loss": 0.5017, "step": 6944 }, { "epoch": 1.9218847298138795, "grad_norm": 0.17259907722473145, "learning_rate": 1e-05, "loss": 0.5105, "step": 6945 }, { "epoch": 1.922161488964229, "grad_norm": 0.16661906242370605, "learning_rate": 1e-05, "loss": 0.4771, "step": 6946 }, { "epoch": 1.9224382481145783, "grad_norm": 0.17949220538139343, "learning_rate": 1e-05, "loss": 0.5804, "step": 6947 }, { "epoch": 1.9227150072649277, "grad_norm": 0.17717377841472626, "learning_rate": 1e-05, "loss": 0.5066, "step": 6948 }, { "epoch": 1.9229917664152771, "grad_norm": 0.17243556678295135, "learning_rate": 1e-05, "loss": 0.5164, "step": 6949 }, { "epoch": 1.9232685255656266, "grad_norm": 0.17810998857021332, "learning_rate": 1e-05, "loss": 0.5016, "step": 6950 }, { "epoch": 1.923545284715976, "grad_norm": 0.16285105049610138, "learning_rate": 1e-05, "loss": 0.518, "step": 6951 }, { "epoch": 1.9238220438663254, "grad_norm": 0.16957274079322815, "learning_rate": 1e-05, "loss": 0.5319, "step": 6952 }, { "epoch": 1.9240988030166748, "grad_norm": 0.180589497089386, "learning_rate": 1e-05, "loss": 0.5107, "step": 6953 }, { "epoch": 1.924375562167024, "grad_norm": 0.16940809786319733, "learning_rate": 1e-05, "loss": 0.5, "step": 6954 }, { "epoch": 1.9246523213173736, "grad_norm": 0.17593468725681305, "learning_rate": 1e-05, "loss": 0.4823, "step": 6955 }, { "epoch": 1.9249290804677228, "grad_norm": 0.17096497118473053, "learning_rate": 1e-05, "loss": 0.5197, "step": 6956 }, { "epoch": 1.9252058396180725, "grad_norm": 0.1777517944574356, "learning_rate": 1e-05, "loss": 0.4989, "step": 6957 }, { "epoch": 1.9254825987684216, "grad_norm": 0.1717834323644638, "learning_rate": 1e-05, "loss": 0.492, "step": 6958 }, { "epoch": 1.9257593579187713, "grad_norm": 0.16193285584449768, "learning_rate": 1e-05, "loss": 0.4862, "step": 6959 }, { "epoch": 1.9260361170691205, "grad_norm": 0.166663259267807, "learning_rate": 1e-05, "loss": 0.5121, "step": 6960 }, { "epoch": 1.9263128762194701, "grad_norm": 0.17236898839473724, "learning_rate": 1e-05, "loss": 0.5047, "step": 6961 }, { "epoch": 1.9265896353698193, "grad_norm": 0.15969207882881165, "learning_rate": 1e-05, "loss": 0.4693, "step": 6962 }, { "epoch": 1.926866394520169, "grad_norm": 0.16767261922359467, "learning_rate": 1e-05, "loss": 0.4665, "step": 6963 }, { "epoch": 1.9271431536705181, "grad_norm": 0.16420350968837738, "learning_rate": 1e-05, "loss": 0.4867, "step": 6964 }, { "epoch": 1.9274199128208678, "grad_norm": 0.16817714273929596, "learning_rate": 1e-05, "loss": 0.4943, "step": 6965 }, { "epoch": 1.927696671971217, "grad_norm": 0.175885409116745, "learning_rate": 1e-05, "loss": 0.4899, "step": 6966 }, { "epoch": 1.9279734311215666, "grad_norm": 0.1745532900094986, "learning_rate": 1e-05, "loss": 0.5145, "step": 6967 }, { "epoch": 1.9282501902719158, "grad_norm": 0.1674342155456543, "learning_rate": 1e-05, "loss": 0.475, "step": 6968 }, { "epoch": 1.9285269494222654, "grad_norm": 0.1698269098997116, "learning_rate": 1e-05, "loss": 0.4975, "step": 6969 }, { "epoch": 1.9288037085726146, "grad_norm": 0.17391645908355713, "learning_rate": 1e-05, "loss": 0.5133, "step": 6970 }, { "epoch": 1.929080467722964, "grad_norm": 0.16893503069877625, "learning_rate": 1e-05, "loss": 0.5016, "step": 6971 }, { "epoch": 1.9293572268733135, "grad_norm": 0.18270662426948547, "learning_rate": 1e-05, "loss": 0.5339, "step": 6972 }, { "epoch": 1.9296339860236629, "grad_norm": 0.17331652343273163, "learning_rate": 1e-05, "loss": 0.5275, "step": 6973 }, { "epoch": 1.9299107451740123, "grad_norm": 0.17113709449768066, "learning_rate": 1e-05, "loss": 0.4852, "step": 6974 }, { "epoch": 1.9301875043243617, "grad_norm": 0.17000548541545868, "learning_rate": 1e-05, "loss": 0.4949, "step": 6975 }, { "epoch": 1.9304642634747111, "grad_norm": 0.1660185605287552, "learning_rate": 1e-05, "loss": 0.5184, "step": 6976 }, { "epoch": 1.9307410226250605, "grad_norm": 0.16957472264766693, "learning_rate": 1e-05, "loss": 0.5351, "step": 6977 }, { "epoch": 1.93101778177541, "grad_norm": 0.17508898675441742, "learning_rate": 1e-05, "loss": 0.5257, "step": 6978 }, { "epoch": 1.9312945409257594, "grad_norm": 0.1699003130197525, "learning_rate": 1e-05, "loss": 0.5039, "step": 6979 }, { "epoch": 1.9315713000761088, "grad_norm": 0.1749863475561142, "learning_rate": 1e-05, "loss": 0.4969, "step": 6980 }, { "epoch": 1.9318480592264582, "grad_norm": 0.17287494242191315, "learning_rate": 1e-05, "loss": 0.5107, "step": 6981 }, { "epoch": 1.9321248183768076, "grad_norm": 0.16709184646606445, "learning_rate": 1e-05, "loss": 0.507, "step": 6982 }, { "epoch": 1.932401577527157, "grad_norm": 0.1658182442188263, "learning_rate": 1e-05, "loss": 0.4899, "step": 6983 }, { "epoch": 1.9326783366775064, "grad_norm": 0.16803555190563202, "learning_rate": 1e-05, "loss": 0.515, "step": 6984 }, { "epoch": 1.9329550958278559, "grad_norm": 0.17221838235855103, "learning_rate": 1e-05, "loss": 0.4922, "step": 6985 }, { "epoch": 1.9332318549782053, "grad_norm": 0.17711853981018066, "learning_rate": 1e-05, "loss": 0.4834, "step": 6986 }, { "epoch": 1.9335086141285545, "grad_norm": 0.17197829484939575, "learning_rate": 1e-05, "loss": 0.5013, "step": 6987 }, { "epoch": 1.933785373278904, "grad_norm": 0.16835276782512665, "learning_rate": 1e-05, "loss": 0.4903, "step": 6988 }, { "epoch": 1.9340621324292533, "grad_norm": 0.1631055623292923, "learning_rate": 1e-05, "loss": 0.465, "step": 6989 }, { "epoch": 1.934338891579603, "grad_norm": 0.16289150714874268, "learning_rate": 1e-05, "loss": 0.4912, "step": 6990 }, { "epoch": 1.9346156507299521, "grad_norm": 0.16975624859333038, "learning_rate": 1e-05, "loss": 0.5219, "step": 6991 }, { "epoch": 1.9348924098803018, "grad_norm": 0.17254433035850525, "learning_rate": 1e-05, "loss": 0.5037, "step": 6992 }, { "epoch": 1.935169169030651, "grad_norm": 0.17434673011302948, "learning_rate": 1e-05, "loss": 0.4989, "step": 6993 }, { "epoch": 1.9354459281810006, "grad_norm": 0.17164556682109833, "learning_rate": 1e-05, "loss": 0.5264, "step": 6994 }, { "epoch": 1.9357226873313498, "grad_norm": 0.17129553854465485, "learning_rate": 1e-05, "loss": 0.5162, "step": 6995 }, { "epoch": 1.9359994464816994, "grad_norm": 0.17086878418922424, "learning_rate": 1e-05, "loss": 0.5003, "step": 6996 }, { "epoch": 1.9362762056320486, "grad_norm": 0.1575172394514084, "learning_rate": 1e-05, "loss": 0.5057, "step": 6997 }, { "epoch": 1.9365529647823982, "grad_norm": 0.16808557510375977, "learning_rate": 1e-05, "loss": 0.5149, "step": 6998 }, { "epoch": 1.9368297239327474, "grad_norm": 0.17769697308540344, "learning_rate": 1e-05, "loss": 0.4877, "step": 6999 }, { "epoch": 1.937106483083097, "grad_norm": 0.17468108236789703, "learning_rate": 1e-05, "loss": 0.4905, "step": 7000 }, { "epoch": 1.9373832422334463, "grad_norm": 0.1688360720872879, "learning_rate": 1e-05, "loss": 0.5311, "step": 7001 }, { "epoch": 1.937660001383796, "grad_norm": 0.16773594915866852, "learning_rate": 1e-05, "loss": 0.5075, "step": 7002 }, { "epoch": 1.937936760534145, "grad_norm": 0.17193861305713654, "learning_rate": 1e-05, "loss": 0.5043, "step": 7003 }, { "epoch": 1.9382135196844947, "grad_norm": 0.17120587825775146, "learning_rate": 1e-05, "loss": 0.5042, "step": 7004 }, { "epoch": 1.938490278834844, "grad_norm": 0.17065590620040894, "learning_rate": 1e-05, "loss": 0.5062, "step": 7005 }, { "epoch": 1.9387670379851933, "grad_norm": 0.16842980682849884, "learning_rate": 1e-05, "loss": 0.4843, "step": 7006 }, { "epoch": 1.9390437971355428, "grad_norm": 0.17147402465343475, "learning_rate": 1e-05, "loss": 0.4846, "step": 7007 }, { "epoch": 1.9393205562858922, "grad_norm": 0.17833302915096283, "learning_rate": 1e-05, "loss": 0.4933, "step": 7008 }, { "epoch": 1.9395973154362416, "grad_norm": 0.16694068908691406, "learning_rate": 1e-05, "loss": 0.4849, "step": 7009 }, { "epoch": 1.939874074586591, "grad_norm": 0.1702204793691635, "learning_rate": 1e-05, "loss": 0.5013, "step": 7010 }, { "epoch": 1.9401508337369404, "grad_norm": 0.1754356175661087, "learning_rate": 1e-05, "loss": 0.4822, "step": 7011 }, { "epoch": 1.9404275928872898, "grad_norm": 0.17071063816547394, "learning_rate": 1e-05, "loss": 0.5136, "step": 7012 }, { "epoch": 1.9407043520376392, "grad_norm": 0.169433131814003, "learning_rate": 1e-05, "loss": 0.5455, "step": 7013 }, { "epoch": 1.9409811111879887, "grad_norm": 0.18203555047512054, "learning_rate": 1e-05, "loss": 0.5099, "step": 7014 }, { "epoch": 1.941257870338338, "grad_norm": 0.17061077058315277, "learning_rate": 1e-05, "loss": 0.4821, "step": 7015 }, { "epoch": 1.9415346294886875, "grad_norm": 0.16266736388206482, "learning_rate": 1e-05, "loss": 0.4704, "step": 7016 }, { "epoch": 1.941811388639037, "grad_norm": 0.17017726600170135, "learning_rate": 1e-05, "loss": 0.495, "step": 7017 }, { "epoch": 1.9420881477893863, "grad_norm": 0.1710035800933838, "learning_rate": 1e-05, "loss": 0.4912, "step": 7018 }, { "epoch": 1.9423649069397357, "grad_norm": 0.16256971657276154, "learning_rate": 1e-05, "loss": 0.4981, "step": 7019 }, { "epoch": 1.9426416660900852, "grad_norm": 0.17382419109344482, "learning_rate": 1e-05, "loss": 0.5117, "step": 7020 }, { "epoch": 1.9429184252404346, "grad_norm": 0.1693197637796402, "learning_rate": 1e-05, "loss": 0.489, "step": 7021 }, { "epoch": 1.9431951843907838, "grad_norm": 0.16921454668045044, "learning_rate": 1e-05, "loss": 0.4779, "step": 7022 }, { "epoch": 1.9434719435411334, "grad_norm": 0.16940318048000336, "learning_rate": 1e-05, "loss": 0.5035, "step": 7023 }, { "epoch": 1.9437487026914826, "grad_norm": 0.16895563900470734, "learning_rate": 1e-05, "loss": 0.5019, "step": 7024 }, { "epoch": 1.9440254618418322, "grad_norm": 0.16613861918449402, "learning_rate": 1e-05, "loss": 0.4893, "step": 7025 }, { "epoch": 1.9443022209921814, "grad_norm": 0.17504428327083588, "learning_rate": 1e-05, "loss": 0.5114, "step": 7026 }, { "epoch": 1.944578980142531, "grad_norm": 0.17210859060287476, "learning_rate": 1e-05, "loss": 0.5147, "step": 7027 }, { "epoch": 1.9448557392928802, "grad_norm": 0.16452424228191376, "learning_rate": 1e-05, "loss": 0.5056, "step": 7028 }, { "epoch": 1.9451324984432299, "grad_norm": 0.17416289448738098, "learning_rate": 1e-05, "loss": 0.5014, "step": 7029 }, { "epoch": 1.945409257593579, "grad_norm": 0.17765581607818604, "learning_rate": 1e-05, "loss": 0.5257, "step": 7030 }, { "epoch": 1.9456860167439287, "grad_norm": 0.16915880143642426, "learning_rate": 1e-05, "loss": 0.4908, "step": 7031 }, { "epoch": 1.945962775894278, "grad_norm": 0.17253071069717407, "learning_rate": 1e-05, "loss": 0.4795, "step": 7032 }, { "epoch": 1.9462395350446275, "grad_norm": 0.17461025714874268, "learning_rate": 1e-05, "loss": 0.4965, "step": 7033 }, { "epoch": 1.9465162941949767, "grad_norm": 0.16512788832187653, "learning_rate": 1e-05, "loss": 0.5084, "step": 7034 }, { "epoch": 1.9467930533453264, "grad_norm": 0.17199201881885529, "learning_rate": 1e-05, "loss": 0.5017, "step": 7035 }, { "epoch": 1.9470698124956756, "grad_norm": 0.17167818546295166, "learning_rate": 1e-05, "loss": 0.5212, "step": 7036 }, { "epoch": 1.9473465716460252, "grad_norm": 0.16839317977428436, "learning_rate": 1e-05, "loss": 0.4915, "step": 7037 }, { "epoch": 1.9476233307963744, "grad_norm": 0.16965711116790771, "learning_rate": 1e-05, "loss": 0.4947, "step": 7038 }, { "epoch": 1.9479000899467238, "grad_norm": 0.17766188085079193, "learning_rate": 1e-05, "loss": 0.5149, "step": 7039 }, { "epoch": 1.9481768490970732, "grad_norm": 0.16679750382900238, "learning_rate": 1e-05, "loss": 0.5106, "step": 7040 }, { "epoch": 1.9484536082474226, "grad_norm": 0.1743587702512741, "learning_rate": 1e-05, "loss": 0.4837, "step": 7041 }, { "epoch": 1.948730367397772, "grad_norm": 0.17183911800384521, "learning_rate": 1e-05, "loss": 0.5406, "step": 7042 }, { "epoch": 1.9490071265481215, "grad_norm": 0.17110487818717957, "learning_rate": 1e-05, "loss": 0.4706, "step": 7043 }, { "epoch": 1.9492838856984709, "grad_norm": 0.1668350249528885, "learning_rate": 1e-05, "loss": 0.4964, "step": 7044 }, { "epoch": 1.9495606448488203, "grad_norm": 0.17973574995994568, "learning_rate": 1e-05, "loss": 0.4808, "step": 7045 }, { "epoch": 1.9498374039991697, "grad_norm": 0.17719677090644836, "learning_rate": 1e-05, "loss": 0.4958, "step": 7046 }, { "epoch": 1.9501141631495191, "grad_norm": 0.1607174575328827, "learning_rate": 1e-05, "loss": 0.4981, "step": 7047 }, { "epoch": 1.9503909222998685, "grad_norm": 0.17022211849689484, "learning_rate": 1e-05, "loss": 0.5239, "step": 7048 }, { "epoch": 1.950667681450218, "grad_norm": 0.1737339049577713, "learning_rate": 1e-05, "loss": 0.4925, "step": 7049 }, { "epoch": 1.9509444406005674, "grad_norm": 0.1800965815782547, "learning_rate": 1e-05, "loss": 0.4933, "step": 7050 }, { "epoch": 1.9512211997509168, "grad_norm": 0.17075864970684052, "learning_rate": 1e-05, "loss": 0.5045, "step": 7051 }, { "epoch": 1.9514979589012662, "grad_norm": 0.17429548501968384, "learning_rate": 1e-05, "loss": 0.5137, "step": 7052 }, { "epoch": 1.9517747180516156, "grad_norm": 0.16621540486812592, "learning_rate": 1e-05, "loss": 0.4927, "step": 7053 }, { "epoch": 1.952051477201965, "grad_norm": 0.1704307198524475, "learning_rate": 1e-05, "loss": 0.501, "step": 7054 }, { "epoch": 1.9523282363523145, "grad_norm": 0.1625424176454544, "learning_rate": 1e-05, "loss": 0.4831, "step": 7055 }, { "epoch": 1.9526049955026639, "grad_norm": 0.16554254293441772, "learning_rate": 1e-05, "loss": 0.485, "step": 7056 }, { "epoch": 1.952881754653013, "grad_norm": 0.16242580115795135, "learning_rate": 1e-05, "loss": 0.5424, "step": 7057 }, { "epoch": 1.9531585138033627, "grad_norm": 0.17826701700687408, "learning_rate": 1e-05, "loss": 0.5126, "step": 7058 }, { "epoch": 1.9534352729537119, "grad_norm": 0.17178814113140106, "learning_rate": 1e-05, "loss": 0.4987, "step": 7059 }, { "epoch": 1.9537120321040615, "grad_norm": 0.16991235315799713, "learning_rate": 1e-05, "loss": 0.4794, "step": 7060 }, { "epoch": 1.9539887912544107, "grad_norm": 0.16546471416950226, "learning_rate": 1e-05, "loss": 0.4872, "step": 7061 }, { "epoch": 1.9542655504047604, "grad_norm": 0.16569319367408752, "learning_rate": 1e-05, "loss": 0.477, "step": 7062 }, { "epoch": 1.9545423095551095, "grad_norm": 0.17064787447452545, "learning_rate": 1e-05, "loss": 0.4773, "step": 7063 }, { "epoch": 1.9548190687054592, "grad_norm": 0.16671986877918243, "learning_rate": 1e-05, "loss": 0.5067, "step": 7064 }, { "epoch": 1.9550958278558084, "grad_norm": 0.16855552792549133, "learning_rate": 1e-05, "loss": 0.5384, "step": 7065 }, { "epoch": 1.955372587006158, "grad_norm": 0.17143860459327698, "learning_rate": 1e-05, "loss": 0.5145, "step": 7066 }, { "epoch": 1.9556493461565072, "grad_norm": 0.1708568036556244, "learning_rate": 1e-05, "loss": 0.5242, "step": 7067 }, { "epoch": 1.9559261053068568, "grad_norm": 0.17042624950408936, "learning_rate": 1e-05, "loss": 0.5184, "step": 7068 }, { "epoch": 1.956202864457206, "grad_norm": 0.17911702394485474, "learning_rate": 1e-05, "loss": 0.5295, "step": 7069 }, { "epoch": 1.9564796236075557, "grad_norm": 0.17418624460697174, "learning_rate": 1e-05, "loss": 0.5085, "step": 7070 }, { "epoch": 1.9567563827579049, "grad_norm": 0.17797286808490753, "learning_rate": 1e-05, "loss": 0.4987, "step": 7071 }, { "epoch": 1.9570331419082545, "grad_norm": 0.17031443119049072, "learning_rate": 1e-05, "loss": 0.4842, "step": 7072 }, { "epoch": 1.9573099010586037, "grad_norm": 0.16291572153568268, "learning_rate": 1e-05, "loss": 0.4903, "step": 7073 }, { "epoch": 1.957586660208953, "grad_norm": 0.16801825165748596, "learning_rate": 1e-05, "loss": 0.532, "step": 7074 }, { "epoch": 1.9578634193593025, "grad_norm": 0.17995494604110718, "learning_rate": 1e-05, "loss": 0.4991, "step": 7075 }, { "epoch": 1.958140178509652, "grad_norm": 0.16850335896015167, "learning_rate": 1e-05, "loss": 0.4799, "step": 7076 }, { "epoch": 1.9584169376600014, "grad_norm": 0.17016829550266266, "learning_rate": 1e-05, "loss": 0.4777, "step": 7077 }, { "epoch": 1.9586936968103508, "grad_norm": 0.1746949851512909, "learning_rate": 1e-05, "loss": 0.5192, "step": 7078 }, { "epoch": 1.9589704559607002, "grad_norm": 0.17394128441810608, "learning_rate": 1e-05, "loss": 0.4853, "step": 7079 }, { "epoch": 1.9592472151110496, "grad_norm": 0.1632394790649414, "learning_rate": 1e-05, "loss": 0.4983, "step": 7080 }, { "epoch": 1.959523974261399, "grad_norm": 0.16596320271492004, "learning_rate": 1e-05, "loss": 0.4906, "step": 7081 }, { "epoch": 1.9598007334117484, "grad_norm": 0.17153343558311462, "learning_rate": 1e-05, "loss": 0.4881, "step": 7082 }, { "epoch": 1.9600774925620978, "grad_norm": 0.16721561551094055, "learning_rate": 1e-05, "loss": 0.485, "step": 7083 }, { "epoch": 1.9603542517124473, "grad_norm": 0.16729934513568878, "learning_rate": 1e-05, "loss": 0.5053, "step": 7084 }, { "epoch": 1.9606310108627967, "grad_norm": 0.17605262994766235, "learning_rate": 1e-05, "loss": 0.5326, "step": 7085 }, { "epoch": 1.960907770013146, "grad_norm": 0.16785192489624023, "learning_rate": 1e-05, "loss": 0.5095, "step": 7086 }, { "epoch": 1.9611845291634955, "grad_norm": 0.16189061105251312, "learning_rate": 1e-05, "loss": 0.4999, "step": 7087 }, { "epoch": 1.961461288313845, "grad_norm": 0.16639089584350586, "learning_rate": 1e-05, "loss": 0.5146, "step": 7088 }, { "epoch": 1.9617380474641943, "grad_norm": 0.1733742654323578, "learning_rate": 1e-05, "loss": 0.4897, "step": 7089 }, { "epoch": 1.9620148066145435, "grad_norm": 0.17138789594173431, "learning_rate": 1e-05, "loss": 0.4954, "step": 7090 }, { "epoch": 1.9622915657648932, "grad_norm": 0.1718035638332367, "learning_rate": 1e-05, "loss": 0.5155, "step": 7091 }, { "epoch": 1.9625683249152424, "grad_norm": 0.16717608273029327, "learning_rate": 1e-05, "loss": 0.5243, "step": 7092 }, { "epoch": 1.962845084065592, "grad_norm": 0.16962659358978271, "learning_rate": 1e-05, "loss": 0.5148, "step": 7093 }, { "epoch": 1.9631218432159412, "grad_norm": 0.17308706045150757, "learning_rate": 1e-05, "loss": 0.4932, "step": 7094 }, { "epoch": 1.9633986023662908, "grad_norm": 0.17481614649295807, "learning_rate": 1e-05, "loss": 0.4943, "step": 7095 }, { "epoch": 1.96367536151664, "grad_norm": 0.17535613477230072, "learning_rate": 1e-05, "loss": 0.4971, "step": 7096 }, { "epoch": 1.9639521206669897, "grad_norm": 0.16794396936893463, "learning_rate": 1e-05, "loss": 0.5055, "step": 7097 }, { "epoch": 1.9642288798173388, "grad_norm": 0.17192447185516357, "learning_rate": 1e-05, "loss": 0.4991, "step": 7098 }, { "epoch": 1.9645056389676885, "grad_norm": 0.1724812090396881, "learning_rate": 1e-05, "loss": 0.5003, "step": 7099 }, { "epoch": 1.9647823981180377, "grad_norm": 0.17160077393054962, "learning_rate": 1e-05, "loss": 0.4975, "step": 7100 }, { "epoch": 1.9650591572683873, "grad_norm": 0.17400182783603668, "learning_rate": 1e-05, "loss": 0.4773, "step": 7101 }, { "epoch": 1.9653359164187365, "grad_norm": 0.17265300452709198, "learning_rate": 1e-05, "loss": 0.4632, "step": 7102 }, { "epoch": 1.9656126755690861, "grad_norm": 0.1658206433057785, "learning_rate": 1e-05, "loss": 0.4966, "step": 7103 }, { "epoch": 1.9658894347194353, "grad_norm": 0.17290456593036652, "learning_rate": 1e-05, "loss": 0.5205, "step": 7104 }, { "epoch": 1.966166193869785, "grad_norm": 0.16483895480632782, "learning_rate": 1e-05, "loss": 0.4676, "step": 7105 }, { "epoch": 1.9664429530201342, "grad_norm": 0.1818087100982666, "learning_rate": 1e-05, "loss": 0.4909, "step": 7106 }, { "epoch": 1.9667197121704838, "grad_norm": 0.17631657421588898, "learning_rate": 1e-05, "loss": 0.488, "step": 7107 }, { "epoch": 1.966996471320833, "grad_norm": 0.1719113290309906, "learning_rate": 1e-05, "loss": 0.5059, "step": 7108 }, { "epoch": 1.9672732304711824, "grad_norm": 0.17272481322288513, "learning_rate": 1e-05, "loss": 0.5136, "step": 7109 }, { "epoch": 1.9675499896215318, "grad_norm": 0.17102868854999542, "learning_rate": 1e-05, "loss": 0.5195, "step": 7110 }, { "epoch": 1.9678267487718812, "grad_norm": 0.1728602945804596, "learning_rate": 1e-05, "loss": 0.473, "step": 7111 }, { "epoch": 1.9681035079222307, "grad_norm": 0.16678780317306519, "learning_rate": 1e-05, "loss": 0.4906, "step": 7112 }, { "epoch": 1.96838026707258, "grad_norm": 0.18989628553390503, "learning_rate": 1e-05, "loss": 0.5209, "step": 7113 }, { "epoch": 1.9686570262229295, "grad_norm": 0.17932872474193573, "learning_rate": 1e-05, "loss": 0.4836, "step": 7114 }, { "epoch": 1.968933785373279, "grad_norm": 0.16788507997989655, "learning_rate": 1e-05, "loss": 0.4984, "step": 7115 }, { "epoch": 1.9692105445236283, "grad_norm": 0.17059285938739777, "learning_rate": 1e-05, "loss": 0.5023, "step": 7116 }, { "epoch": 1.9694873036739777, "grad_norm": 0.17030414938926697, "learning_rate": 1e-05, "loss": 0.5094, "step": 7117 }, { "epoch": 1.9697640628243271, "grad_norm": 0.1649460345506668, "learning_rate": 1e-05, "loss": 0.4952, "step": 7118 }, { "epoch": 1.9700408219746766, "grad_norm": 0.16557513177394867, "learning_rate": 1e-05, "loss": 0.4892, "step": 7119 }, { "epoch": 1.970317581125026, "grad_norm": 0.16692276298999786, "learning_rate": 1e-05, "loss": 0.5084, "step": 7120 }, { "epoch": 1.9705943402753754, "grad_norm": 0.17776696383953094, "learning_rate": 1e-05, "loss": 0.5125, "step": 7121 }, { "epoch": 1.9708710994257248, "grad_norm": 0.17544718086719513, "learning_rate": 1e-05, "loss": 0.4982, "step": 7122 }, { "epoch": 1.9711478585760742, "grad_norm": 0.16620908677577972, "learning_rate": 1e-05, "loss": 0.4991, "step": 7123 }, { "epoch": 1.9714246177264236, "grad_norm": 0.16892218589782715, "learning_rate": 1e-05, "loss": 0.4889, "step": 7124 }, { "epoch": 1.9717013768767728, "grad_norm": 0.1694834679365158, "learning_rate": 1e-05, "loss": 0.4983, "step": 7125 }, { "epoch": 1.9719781360271225, "grad_norm": 0.16844309866428375, "learning_rate": 1e-05, "loss": 0.4737, "step": 7126 }, { "epoch": 1.9722548951774717, "grad_norm": 0.17199942469596863, "learning_rate": 1e-05, "loss": 0.4768, "step": 7127 }, { "epoch": 1.9725316543278213, "grad_norm": 0.17580750584602356, "learning_rate": 1e-05, "loss": 0.4944, "step": 7128 }, { "epoch": 1.9728084134781705, "grad_norm": 0.16993458569049835, "learning_rate": 1e-05, "loss": 0.5245, "step": 7129 }, { "epoch": 1.9730851726285201, "grad_norm": 0.1708415001630783, "learning_rate": 1e-05, "loss": 0.5279, "step": 7130 }, { "epoch": 1.9733619317788693, "grad_norm": 0.17840491235256195, "learning_rate": 1e-05, "loss": 0.4802, "step": 7131 }, { "epoch": 1.973638690929219, "grad_norm": 0.16318395733833313, "learning_rate": 1e-05, "loss": 0.5232, "step": 7132 }, { "epoch": 1.9739154500795681, "grad_norm": 0.16574594378471375, "learning_rate": 1e-05, "loss": 0.4825, "step": 7133 }, { "epoch": 1.9741922092299178, "grad_norm": 0.17652055621147156, "learning_rate": 1e-05, "loss": 0.4884, "step": 7134 }, { "epoch": 1.974468968380267, "grad_norm": 0.16110187768936157, "learning_rate": 1e-05, "loss": 0.4877, "step": 7135 }, { "epoch": 1.9747457275306166, "grad_norm": 0.16620387136936188, "learning_rate": 1e-05, "loss": 0.5178, "step": 7136 }, { "epoch": 1.9750224866809658, "grad_norm": 0.1785244345664978, "learning_rate": 1e-05, "loss": 0.5147, "step": 7137 }, { "epoch": 1.9752992458313154, "grad_norm": 0.17440250515937805, "learning_rate": 1e-05, "loss": 0.5185, "step": 7138 }, { "epoch": 1.9755760049816646, "grad_norm": 0.17371459305286407, "learning_rate": 1e-05, "loss": 0.4928, "step": 7139 }, { "epoch": 1.9758527641320143, "grad_norm": 0.17520995438098907, "learning_rate": 1e-05, "loss": 0.5314, "step": 7140 }, { "epoch": 1.9761295232823635, "grad_norm": 0.18639421463012695, "learning_rate": 1e-05, "loss": 0.5263, "step": 7141 }, { "epoch": 1.9764062824327129, "grad_norm": 0.17648614943027496, "learning_rate": 1e-05, "loss": 0.4778, "step": 7142 }, { "epoch": 1.9766830415830623, "grad_norm": 0.16939648985862732, "learning_rate": 1e-05, "loss": 0.5244, "step": 7143 }, { "epoch": 1.9769598007334117, "grad_norm": 0.1746552586555481, "learning_rate": 1e-05, "loss": 0.5254, "step": 7144 }, { "epoch": 1.9772365598837611, "grad_norm": 0.16997545957565308, "learning_rate": 1e-05, "loss": 0.5082, "step": 7145 }, { "epoch": 1.9775133190341105, "grad_norm": 0.16056345403194427, "learning_rate": 1e-05, "loss": 0.4981, "step": 7146 }, { "epoch": 1.97779007818446, "grad_norm": 0.17097042500972748, "learning_rate": 1e-05, "loss": 0.4975, "step": 7147 }, { "epoch": 1.9780668373348094, "grad_norm": 0.17441526055335999, "learning_rate": 1e-05, "loss": 0.4872, "step": 7148 }, { "epoch": 1.9783435964851588, "grad_norm": 0.17436960339546204, "learning_rate": 1e-05, "loss": 0.4858, "step": 7149 }, { "epoch": 1.9786203556355082, "grad_norm": 0.17076919972896576, "learning_rate": 1e-05, "loss": 0.4981, "step": 7150 }, { "epoch": 1.9788971147858576, "grad_norm": 0.1681969165802002, "learning_rate": 1e-05, "loss": 0.5125, "step": 7151 }, { "epoch": 1.979173873936207, "grad_norm": 0.16930270195007324, "learning_rate": 1e-05, "loss": 0.4983, "step": 7152 }, { "epoch": 1.9794506330865564, "grad_norm": 0.16875582933425903, "learning_rate": 1e-05, "loss": 0.4782, "step": 7153 }, { "epoch": 1.9797273922369059, "grad_norm": 0.16888749599456787, "learning_rate": 1e-05, "loss": 0.5247, "step": 7154 }, { "epoch": 1.9800041513872553, "grad_norm": 0.17178316414356232, "learning_rate": 1e-05, "loss": 0.485, "step": 7155 }, { "epoch": 1.9802809105376047, "grad_norm": 0.16896232962608337, "learning_rate": 1e-05, "loss": 0.4938, "step": 7156 }, { "epoch": 1.980557669687954, "grad_norm": 0.17426984012126923, "learning_rate": 1e-05, "loss": 0.5204, "step": 7157 }, { "epoch": 1.9808344288383035, "grad_norm": 0.1908217817544937, "learning_rate": 1e-05, "loss": 0.5021, "step": 7158 }, { "epoch": 1.981111187988653, "grad_norm": 0.17106251418590546, "learning_rate": 1e-05, "loss": 0.5085, "step": 7159 }, { "epoch": 1.9813879471390021, "grad_norm": 0.1667090803384781, "learning_rate": 1e-05, "loss": 0.4892, "step": 7160 }, { "epoch": 1.9816647062893518, "grad_norm": 0.1724335253238678, "learning_rate": 1e-05, "loss": 0.4743, "step": 7161 }, { "epoch": 1.981941465439701, "grad_norm": 0.16774596273899078, "learning_rate": 1e-05, "loss": 0.5091, "step": 7162 }, { "epoch": 1.9822182245900506, "grad_norm": 0.16984586417675018, "learning_rate": 1e-05, "loss": 0.516, "step": 7163 }, { "epoch": 1.9824949837403998, "grad_norm": 0.1678617000579834, "learning_rate": 1e-05, "loss": 0.4856, "step": 7164 }, { "epoch": 1.9827717428907494, "grad_norm": 0.17286449670791626, "learning_rate": 1e-05, "loss": 0.5138, "step": 7165 }, { "epoch": 1.9830485020410986, "grad_norm": 0.17181596159934998, "learning_rate": 1e-05, "loss": 0.5153, "step": 7166 }, { "epoch": 1.9833252611914483, "grad_norm": 0.1691957712173462, "learning_rate": 1e-05, "loss": 0.4866, "step": 7167 }, { "epoch": 1.9836020203417974, "grad_norm": 0.1674533635377884, "learning_rate": 1e-05, "loss": 0.5042, "step": 7168 }, { "epoch": 1.983878779492147, "grad_norm": 0.17005498707294464, "learning_rate": 1e-05, "loss": 0.4876, "step": 7169 }, { "epoch": 1.9841555386424963, "grad_norm": 0.17014223337173462, "learning_rate": 1e-05, "loss": 0.5174, "step": 7170 }, { "epoch": 1.984432297792846, "grad_norm": 0.17546652257442474, "learning_rate": 1e-05, "loss": 0.4922, "step": 7171 }, { "epoch": 1.984709056943195, "grad_norm": 0.16560129821300507, "learning_rate": 1e-05, "loss": 0.4898, "step": 7172 }, { "epoch": 1.9849858160935447, "grad_norm": 0.17174983024597168, "learning_rate": 1e-05, "loss": 0.5331, "step": 7173 }, { "epoch": 1.985262575243894, "grad_norm": 0.15915651619434357, "learning_rate": 1e-05, "loss": 0.4827, "step": 7174 }, { "epoch": 1.9855393343942436, "grad_norm": 0.17407700419425964, "learning_rate": 1e-05, "loss": 0.5068, "step": 7175 }, { "epoch": 1.9858160935445928, "grad_norm": 0.17162014544010162, "learning_rate": 1e-05, "loss": 0.5181, "step": 7176 }, { "epoch": 1.9860928526949422, "grad_norm": 0.1766163557767868, "learning_rate": 1e-05, "loss": 0.5126, "step": 7177 }, { "epoch": 1.9863696118452916, "grad_norm": 0.16907410323619843, "learning_rate": 1e-05, "loss": 0.4864, "step": 7178 }, { "epoch": 1.986646370995641, "grad_norm": 0.17466051876544952, "learning_rate": 1e-05, "loss": 0.5001, "step": 7179 }, { "epoch": 1.9869231301459904, "grad_norm": 0.18389195203781128, "learning_rate": 1e-05, "loss": 0.5021, "step": 7180 }, { "epoch": 1.9871998892963398, "grad_norm": 0.16902092099189758, "learning_rate": 1e-05, "loss": 0.4749, "step": 7181 }, { "epoch": 1.9874766484466893, "grad_norm": 0.17070956528186798, "learning_rate": 1e-05, "loss": 0.5028, "step": 7182 }, { "epoch": 1.9877534075970387, "grad_norm": 0.167259082198143, "learning_rate": 1e-05, "loss": 0.5075, "step": 7183 }, { "epoch": 1.988030166747388, "grad_norm": 0.16727331280708313, "learning_rate": 1e-05, "loss": 0.4942, "step": 7184 }, { "epoch": 1.9883069258977375, "grad_norm": 0.1813332885503769, "learning_rate": 1e-05, "loss": 0.4864, "step": 7185 }, { "epoch": 1.988583685048087, "grad_norm": 0.16906806826591492, "learning_rate": 1e-05, "loss": 0.4885, "step": 7186 }, { "epoch": 1.9888604441984363, "grad_norm": 0.17396627366542816, "learning_rate": 1e-05, "loss": 0.4942, "step": 7187 }, { "epoch": 1.9891372033487857, "grad_norm": 0.17281867563724518, "learning_rate": 1e-05, "loss": 0.4906, "step": 7188 }, { "epoch": 1.9894139624991352, "grad_norm": 0.17131273448467255, "learning_rate": 1e-05, "loss": 0.4835, "step": 7189 }, { "epoch": 1.9896907216494846, "grad_norm": 0.1718175709247589, "learning_rate": 1e-05, "loss": 0.5098, "step": 7190 }, { "epoch": 1.989967480799834, "grad_norm": 0.17776836454868317, "learning_rate": 1e-05, "loss": 0.5298, "step": 7191 }, { "epoch": 1.9902442399501834, "grad_norm": 0.17810772359371185, "learning_rate": 1e-05, "loss": 0.5318, "step": 7192 }, { "epoch": 1.9905209991005326, "grad_norm": 0.16680783033370972, "learning_rate": 1e-05, "loss": 0.4864, "step": 7193 }, { "epoch": 1.9907977582508822, "grad_norm": 0.16557487845420837, "learning_rate": 1e-05, "loss": 0.5169, "step": 7194 }, { "epoch": 1.9910745174012314, "grad_norm": 0.18172769248485565, "learning_rate": 1e-05, "loss": 0.5092, "step": 7195 }, { "epoch": 1.991351276551581, "grad_norm": 0.17332272231578827, "learning_rate": 1e-05, "loss": 0.5139, "step": 7196 }, { "epoch": 1.9916280357019303, "grad_norm": 0.17269538342952728, "learning_rate": 1e-05, "loss": 0.4963, "step": 7197 }, { "epoch": 1.99190479485228, "grad_norm": 0.16824579238891602, "learning_rate": 1e-05, "loss": 0.4737, "step": 7198 }, { "epoch": 1.992181554002629, "grad_norm": 0.16306976974010468, "learning_rate": 1e-05, "loss": 0.511, "step": 7199 }, { "epoch": 1.9924583131529787, "grad_norm": 0.1671704798936844, "learning_rate": 1e-05, "loss": 0.5077, "step": 7200 }, { "epoch": 1.992735072303328, "grad_norm": 0.17102189362049103, "learning_rate": 1e-05, "loss": 0.4938, "step": 7201 }, { "epoch": 1.9930118314536776, "grad_norm": 0.16842667758464813, "learning_rate": 1e-05, "loss": 0.4864, "step": 7202 }, { "epoch": 1.9932885906040267, "grad_norm": 0.1713099330663681, "learning_rate": 1e-05, "loss": 0.4971, "step": 7203 }, { "epoch": 1.9935653497543764, "grad_norm": 0.18238218128681183, "learning_rate": 1e-05, "loss": 0.4963, "step": 7204 }, { "epoch": 1.9938421089047256, "grad_norm": 0.17305579781532288, "learning_rate": 1e-05, "loss": 0.5022, "step": 7205 }, { "epoch": 1.9941188680550752, "grad_norm": 0.16974042356014252, "learning_rate": 1e-05, "loss": 0.5158, "step": 7206 }, { "epoch": 1.9943956272054244, "grad_norm": 0.17262136936187744, "learning_rate": 1e-05, "loss": 0.4938, "step": 7207 }, { "epoch": 1.994672386355774, "grad_norm": 0.1753796637058258, "learning_rate": 1e-05, "loss": 0.4974, "step": 7208 }, { "epoch": 1.9949491455061232, "grad_norm": 0.16617366671562195, "learning_rate": 1e-05, "loss": 0.4845, "step": 7209 }, { "epoch": 1.9952259046564729, "grad_norm": 0.17575739324092865, "learning_rate": 1e-05, "loss": 0.5117, "step": 7210 }, { "epoch": 1.995502663806822, "grad_norm": 0.173742413520813, "learning_rate": 1e-05, "loss": 0.4957, "step": 7211 }, { "epoch": 1.9957794229571715, "grad_norm": 0.17071154713630676, "learning_rate": 1e-05, "loss": 0.4815, "step": 7212 }, { "epoch": 1.996056182107521, "grad_norm": 0.1655445545911789, "learning_rate": 1e-05, "loss": 0.5163, "step": 7213 }, { "epoch": 1.9963329412578703, "grad_norm": 0.17054541409015656, "learning_rate": 1e-05, "loss": 0.5166, "step": 7214 }, { "epoch": 1.9966097004082197, "grad_norm": 0.17463527619838715, "learning_rate": 1e-05, "loss": 0.4978, "step": 7215 }, { "epoch": 1.9968864595585691, "grad_norm": 0.17157112061977386, "learning_rate": 1e-05, "loss": 0.503, "step": 7216 }, { "epoch": 1.9971632187089186, "grad_norm": 0.17020495235919952, "learning_rate": 1e-05, "loss": 0.4873, "step": 7217 }, { "epoch": 1.997439977859268, "grad_norm": 0.17884351313114166, "learning_rate": 1e-05, "loss": 0.4948, "step": 7218 }, { "epoch": 1.9977167370096174, "grad_norm": 0.17072179913520813, "learning_rate": 1e-05, "loss": 0.4976, "step": 7219 }, { "epoch": 1.9979934961599668, "grad_norm": 0.1659204214811325, "learning_rate": 1e-05, "loss": 0.5103, "step": 7220 }, { "epoch": 1.9982702553103162, "grad_norm": 0.17815425992012024, "learning_rate": 1e-05, "loss": 0.5438, "step": 7221 }, { "epoch": 1.9985470144606656, "grad_norm": 0.18199045956134796, "learning_rate": 1e-05, "loss": 0.4879, "step": 7222 }, { "epoch": 1.998823773611015, "grad_norm": 0.16575296223163605, "learning_rate": 1e-05, "loss": 0.4943, "step": 7223 }, { "epoch": 1.9991005327613645, "grad_norm": 0.16942286491394043, "learning_rate": 1e-05, "loss": 0.5028, "step": 7224 }, { "epoch": 1.9993772919117139, "grad_norm": 0.16773147881031036, "learning_rate": 1e-05, "loss": 0.4841, "step": 7225 }, { "epoch": 1.9996540510620633, "grad_norm": 0.17067377269268036, "learning_rate": 1e-05, "loss": 0.5097, "step": 7226 } ], "logging_steps": 1, "max_steps": 7226, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1807, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9609902613431688e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }