diff --git "a/checkpoint-8477/trainer_state.json" "b/checkpoint-8477/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-8477/trainer_state.json" @@ -0,0 +1,50891 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9625278509216124, + "eval_steps": 500, + "global_step": 8477, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1302, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.2193, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1633, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1995, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1775, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1753, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.2371, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.2672, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.2261, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1937, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1238, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1763, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1434, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.2315, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.2097, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1639, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 1.1884, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.1501, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.2154, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 1.133, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012, + "loss": 1.0502, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.1904, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016, + "loss": 1.1193, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 1.0192, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999999834562185, + "loss": 0.9811, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999999338248756, + "loss": 0.9311, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999998511059717, + "loss": 1.0179, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999997352995104, + "loss": 0.9642, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999995864054953, + "loss": 1.0291, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999999404423931, + "loss": 0.912, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999999189354824, + "loss": 1.0022, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999998941198181, + "loss": 1.0605, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999998659954011, + "loss": 0.8987, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999983456223222, + "loss": 0.9043, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999979982031257, + "loss": 0.8478, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999997617696433, + "loss": 0.9314, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999972041022564, + "loss": 0.8786, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199999675742061, + "loss": 0.9615, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999962776515078, + "loss": 0.9248, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999957647949662, + "loss": 0.8965, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999952188510026, + "loss": 0.9037, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999946398196344, + "loss": 0.9219, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999940277008808, + "loss": 0.9147, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999933824947625, + "loss": 0.8407, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999927042013002, + "loss": 0.9245, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999991992820517, + "loss": 0.9034, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999912483524362, + "loss": 0.9572, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999904707970824, + "loss": 0.8646, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999896601544809, + "loss": 0.8362, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999988816424659, + "loss": 0.8362, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999879396076452, + "loss": 0.8844, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999870297034673, + "loss": 0.9193, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999986086712156, + "loss": 0.8375, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999851106337427, + "loss": 0.7938, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999841014682593, + "loss": 0.8335, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999830592157394, + "loss": 0.8737, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999819838762174, + "loss": 0.8544, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999808754497288, + "loss": 0.8236, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999797339363104, + "loss": 0.8693, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999785593360003, + "loss": 0.8374, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999773516488366, + "loss": 0.8425, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999761108748597, + "loss": 0.8295, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999974837014111, + "loss": 0.9197, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999735300666322, + "loss": 0.8137, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999721900324665, + "loss": 0.8066, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999708169116587, + "loss": 0.8622, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999694107042534, + "loss": 0.9321, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999679714102978, + "loss": 0.8322, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999664990298394, + "loss": 0.8548, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999964993562927, + "loss": 0.9037, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199996345500961, + "loss": 0.843, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199996188336994, + "loss": 0.7413, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999602786439685, + "loss": 0.865, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999586408317484, + "loss": 0.8781, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999569699333348, + "loss": 0.8298, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999552659487818, + "loss": 0.8432, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999535288781463, + "loss": 0.8408, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999951758721486, + "loss": 0.7852, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999499554788593, + "loss": 0.8581, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999948119150326, + "loss": 0.8628, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999462497359466, + "loss": 0.8661, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999944347235783, + "loss": 0.831, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999424116498983, + "loss": 0.9166, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999404429783566, + "loss": 0.8071, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999384412212228, + "loss": 0.8683, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999364063785633, + "loss": 0.7978, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999343384504453, + "loss": 0.8396, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999322374369372, + "loss": 0.8789, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999301033381085, + "loss": 0.9197, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999279361540301, + "loss": 0.8511, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999257358847735, + "loss": 0.8512, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999235025304116, + "loss": 0.8125, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999212360910181, + "loss": 0.7839, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999189365666685, + "loss": 0.7678, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999916603957438, + "loss": 0.8549, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999142382634045, + "loss": 0.886, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999911839484646, + "loss": 0.8455, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999909407621242, + "loss": 0.7946, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999906942673273, + "loss": 0.7877, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199990444464082, + "loss": 0.7894, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999019135239664, + "loss": 0.8507, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019998993493227957, + "loss": 0.8617, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019998967520373927, + "loss": 0.8381, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019998941216678432, + "loss": 0.8568, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019998914582142346, + "loss": 0.8431, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019998887616766545, + "loss": 0.7984, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999886032055193, + "loss": 0.8666, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999883269349939, + "loss": 0.8023, + "step": 108 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998804735609852, + "loss": 0.8323, + "step": 109 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998776446884236, + "loss": 0.8321, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998747827323479, + "loss": 0.8164, + "step": 111 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998718876928527, + "loss": 0.8613, + "step": 112 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998689595700336, + "loss": 0.7596, + "step": 113 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998659983639882, + "loss": 0.8491, + "step": 114 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998630040748137, + "loss": 0.8224, + "step": 115 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998599767026093, + "loss": 0.7451, + "step": 116 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998569162474756, + "loss": 0.731, + "step": 117 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998538227095135, + "loss": 0.854, + "step": 118 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998506960888256, + "loss": 0.8263, + "step": 119 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998475363855152, + "loss": 0.8542, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998443435996868, + "loss": 0.8027, + "step": 121 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999841117731446, + "loss": 0.9464, + "step": 122 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998378587808997, + "loss": 0.8516, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998345667481557, + "loss": 0.8446, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998312416333227, + "loss": 0.8324, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998278834365115, + "loss": 0.8817, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999824492157832, + "loss": 0.8351, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998210677973972, + "loss": 0.8708, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998176103553205, + "loss": 0.7821, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998141198317154, + "loss": 0.7743, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998105962266987, + "loss": 0.8352, + "step": 131 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999807039540386, + "loss": 0.8127, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998034497728954, + "loss": 0.7933, + "step": 133 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997998269243456, + "loss": 0.7829, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997961709948564, + "loss": 0.834, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997924819845488, + "loss": 0.7925, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999788759893545, + "loss": 0.8043, + "step": 137 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999785004721968, + "loss": 0.7465, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999781216469942, + "loss": 0.8322, + "step": 139 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997773951375926, + "loss": 0.8788, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997735407250455, + "loss": 0.7898, + "step": 141 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997696532324293, + "loss": 0.8559, + "step": 142 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999765732659872, + "loss": 0.7981, + "step": 143 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997617790075035, + "loss": 0.7904, + "step": 144 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997577922754544, + "loss": 0.8208, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999753772463857, + "loss": 0.8139, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999749719572844, + "loss": 0.9067, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997456336025496, + "loss": 0.8134, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999741514553109, + "loss": 0.8543, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997373624246583, + "loss": 0.812, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019997331772173352, + "loss": 0.8334, + "step": 151 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997289589312778, + "loss": 0.807, + "step": 152 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997247075666263, + "loss": 0.7596, + "step": 153 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997204231235208, + "loss": 0.8635, + "step": 154 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999716105602103, + "loss": 0.7812, + "step": 155 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999711755002516, + "loss": 0.803, + "step": 156 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999707371324904, + "loss": 0.8068, + "step": 157 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997029545694116, + "loss": 0.8479, + "step": 158 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996985047361854, + "loss": 0.8064, + "step": 159 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999694021825372, + "loss": 0.7946, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.000199968950583712, + "loss": 0.8704, + "step": 161 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996849567715792, + "loss": 0.8403, + "step": 162 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996803746288996, + "loss": 0.8898, + "step": 163 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996757594092332, + "loss": 0.7788, + "step": 164 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996711111127326, + "loss": 0.84, + "step": 165 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996664297395515, + "loss": 0.7992, + "step": 166 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999661715289845, + "loss": 0.8083, + "step": 167 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996569677637686, + "loss": 0.7838, + "step": 168 + }, + { + "epoch": 0.04, + "learning_rate": 0.000199965218716148, + "loss": 0.7995, + "step": 169 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996473734831366, + "loss": 0.8224, + "step": 170 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996425267288984, + "loss": 0.8309, + "step": 171 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999637646898926, + "loss": 0.8162, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996327339933797, + "loss": 0.8214, + "step": 173 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996277880124233, + "loss": 0.8864, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996228089562196, + "loss": 0.8201, + "step": 175 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996177968249334, + "loss": 0.8356, + "step": 176 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996127516187312, + "loss": 0.7681, + "step": 177 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996076733377796, + "loss": 0.8029, + "step": 178 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999602561982246, + "loss": 0.7971, + "step": 179 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995974175523003, + "loss": 0.813, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999592240048113, + "loss": 0.8229, + "step": 181 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995870294698542, + "loss": 0.7887, + "step": 182 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995817858176973, + "loss": 0.7867, + "step": 183 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995765090918157, + "loss": 0.7972, + "step": 184 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995711992923834, + "loss": 0.7363, + "step": 185 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995658564195767, + "loss": 0.7376, + "step": 186 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995604804735718, + "loss": 0.8753, + "step": 187 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995550714545473, + "loss": 0.821, + "step": 188 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999549629362682, + "loss": 0.8063, + "step": 189 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995441541981553, + "loss": 0.8182, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995386459611495, + "loss": 0.7914, + "step": 191 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995331046518454, + "loss": 0.803, + "step": 192 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995275302704278, + "loss": 0.7851, + "step": 193 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995219228170798, + "loss": 0.7648, + "step": 194 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019995162822919883, + "loss": 0.7654, + "step": 195 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019995106086953388, + "loss": 0.8251, + "step": 196 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019995049020273196, + "loss": 0.8753, + "step": 197 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994991622881194, + "loss": 0.8021, + "step": 198 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999493389477928, + "loss": 0.8129, + "step": 199 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994875835969368, + "loss": 0.8119, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994817446453373, + "loss": 0.8364, + "step": 201 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994758726233234, + "loss": 0.8288, + "step": 202 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994699675310887, + "loss": 0.8751, + "step": 203 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999464029368829, + "loss": 0.8395, + "step": 204 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994580581367407, + "loss": 0.7927, + "step": 205 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994520538350213, + "loss": 0.7563, + "step": 206 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994460164638692, + "loss": 0.7904, + "step": 207 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999439946023485, + "loss": 0.7722, + "step": 208 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994338425140688, + "loss": 0.8949, + "step": 209 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999427705935823, + "loss": 0.7898, + "step": 210 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994215362889502, + "loss": 0.8372, + "step": 211 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994153335736546, + "loss": 0.8537, + "step": 212 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019994090977901418, + "loss": 0.8521, + "step": 213 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999402828938618, + "loss": 0.8168, + "step": 214 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993965270192904, + "loss": 0.7893, + "step": 215 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999390192032368, + "loss": 0.7778, + "step": 216 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993838239780595, + "loss": 0.8003, + "step": 217 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993774228565766, + "loss": 0.7848, + "step": 218 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993709886681303, + "loss": 0.8466, + "step": 219 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999364521412934, + "loss": 0.8486, + "step": 220 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993580210912016, + "loss": 0.783, + "step": 221 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993514877031485, + "loss": 0.7472, + "step": 222 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993449212489902, + "loss": 0.8327, + "step": 223 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999338321728944, + "loss": 0.8215, + "step": 224 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999331689143229, + "loss": 0.892, + "step": 225 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993250234920636, + "loss": 0.775, + "step": 226 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993183247756695, + "loss": 0.7259, + "step": 227 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019993115929942677, + "loss": 0.7449, + "step": 228 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999304828148081, + "loss": 0.7715, + "step": 229 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999298030237333, + "loss": 0.8312, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999291199262249, + "loss": 0.8275, + "step": 231 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019992843352230547, + "loss": 0.7595, + "step": 232 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019992774381199778, + "loss": 0.9094, + "step": 233 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999270507953246, + "loss": 0.8676, + "step": 234 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019992635447230886, + "loss": 0.7904, + "step": 235 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019992565484297362, + "loss": 0.8133, + "step": 236 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019992495190734202, + "loss": 0.812, + "step": 237 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992424566543733, + "loss": 0.8083, + "step": 238 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992353611728288, + "loss": 0.8047, + "step": 239 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999228232629022, + "loss": 0.8367, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992210710231885, + "loss": 0.8631, + "step": 241 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992138763555653, + "loss": 0.8129, + "step": 242 + }, + { + "epoch": 0.06, + "learning_rate": 0.000199920664862639, + "loss": 0.8135, + "step": 243 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991993878359026, + "loss": 0.7993, + "step": 244 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991920939843428, + "loss": 0.8342, + "step": 245 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991847670719522, + "loss": 0.8817, + "step": 246 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991774070989728, + "loss": 0.7526, + "step": 247 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991700140656486, + "loss": 0.7974, + "step": 248 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999162587972224, + "loss": 0.7931, + "step": 249 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991551288189448, + "loss": 0.7768, + "step": 250 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991476366060576, + "loss": 0.7896, + "step": 251 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991401113338104, + "loss": 0.8606, + "step": 252 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991325530024524, + "loss": 0.7774, + "step": 253 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991249616122333, + "loss": 0.8041, + "step": 254 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999117337163405, + "loss": 0.7721, + "step": 255 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991096796562188, + "loss": 0.9457, + "step": 256 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991019890909284, + "loss": 0.7721, + "step": 257 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990942654677887, + "loss": 0.7819, + "step": 258 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999086508787055, + "loss": 0.8086, + "step": 259 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999078719048984, + "loss": 0.7654, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999070896253833, + "loss": 0.8203, + "step": 261 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990630404018614, + "loss": 0.8168, + "step": 262 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999055151493329, + "loss": 0.8178, + "step": 263 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990472295284962, + "loss": 0.7597, + "step": 264 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990392745076262, + "loss": 0.7857, + "step": 265 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990312864309818, + "loss": 0.8042, + "step": 266 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990232652988268, + "loss": 0.8358, + "step": 267 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999015211111427, + "loss": 0.7333, + "step": 268 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999007123869049, + "loss": 0.8118, + "step": 269 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989990035719602, + "loss": 0.7939, + "step": 270 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989908502204292, + "loss": 0.8264, + "step": 271 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989826638147263, + "loss": 0.8698, + "step": 272 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989744443551217, + "loss": 0.7266, + "step": 273 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001998966191841888, + "loss": 0.768, + "step": 274 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989579062752974, + "loss": 0.8611, + "step": 275 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001998949587655625, + "loss": 0.808, + "step": 276 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989412359831455, + "loss": 0.7849, + "step": 277 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001998932851258135, + "loss": 0.8627, + "step": 278 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989244334808715, + "loss": 0.7497, + "step": 279 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989159826516332, + "loss": 0.6951, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019989074987706998, + "loss": 0.756, + "step": 281 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988989818383522, + "loss": 0.7924, + "step": 282 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998890431854872, + "loss": 0.8103, + "step": 283 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988818488205417, + "loss": 0.7668, + "step": 284 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988732327356462, + "loss": 0.7947, + "step": 285 + }, + { + "epoch": 0.07, + "learning_rate": 0.000199886458360047, + "loss": 0.7875, + "step": 286 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988559014152992, + "loss": 0.8308, + "step": 287 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988471861804213, + "loss": 0.8544, + "step": 288 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988384378961246, + "loss": 0.7925, + "step": 289 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988296565626987, + "loss": 0.7661, + "step": 290 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988208421804337, + "loss": 0.9, + "step": 291 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998811994749622, + "loss": 0.7279, + "step": 292 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988031142705558, + "loss": 0.7891, + "step": 293 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987942007435288, + "loss": 0.7736, + "step": 294 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987852541688363, + "loss": 0.8087, + "step": 295 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998776274546774, + "loss": 0.7975, + "step": 296 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987672618776396, + "loss": 0.8358, + "step": 297 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998758216161731, + "loss": 0.8448, + "step": 298 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987491373993469, + "loss": 0.7933, + "step": 299 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987400255907885, + "loss": 0.7753, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998730880736357, + "loss": 0.7666, + "step": 301 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987217028363547, + "loss": 0.8375, + "step": 302 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987124918910857, + "loss": 0.8066, + "step": 303 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987032479008547, + "loss": 0.7497, + "step": 304 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986939708659677, + "loss": 0.8451, + "step": 305 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998684660786731, + "loss": 0.7709, + "step": 306 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998675317663453, + "loss": 0.814, + "step": 307 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986659414964432, + "loss": 0.8008, + "step": 308 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986565322860115, + "loss": 0.7649, + "step": 309 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986470900324694, + "loss": 0.8151, + "step": 310 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986376147361292, + "loss": 0.8009, + "step": 311 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986281063973042, + "loss": 0.8264, + "step": 312 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986185650163095, + "loss": 0.7429, + "step": 313 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986089905934601, + "loss": 0.7226, + "step": 314 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985993831290734, + "loss": 0.7465, + "step": 315 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985897426234674, + "loss": 0.8774, + "step": 316 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985800690769606, + "loss": 0.8039, + "step": 317 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985703624898733, + "loss": 0.7816, + "step": 318 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985606228625264, + "loss": 0.7705, + "step": 319 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985508501952425, + "loss": 0.7647, + "step": 320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998541044488345, + "loss": 0.869, + "step": 321 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985312057421584, + "loss": 0.7976, + "step": 322 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985213339570077, + "loss": 0.7911, + "step": 323 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019985114291332199, + "loss": 0.7574, + "step": 324 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019985014912711228, + "loss": 0.7837, + "step": 325 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998491520371045, + "loss": 0.8448, + "step": 326 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984815164333163, + "loss": 0.8004, + "step": 327 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984714794582683, + "loss": 0.7659, + "step": 328 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984614094462324, + "loss": 0.8168, + "step": 329 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984513063975423, + "loss": 0.7989, + "step": 330 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984411703125322, + "loss": 0.8476, + "step": 331 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984310011915372, + "loss": 0.8231, + "step": 332 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998420799034894, + "loss": 0.7966, + "step": 333 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984105638429402, + "loss": 0.8221, + "step": 334 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019984002956160147, + "loss": 0.7966, + "step": 335 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983899943544565, + "loss": 0.8054, + "step": 336 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998379660058607, + "loss": 0.9002, + "step": 337 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998369292728808, + "loss": 0.8068, + "step": 338 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983588923654024, + "loss": 0.8665, + "step": 339 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983484589687346, + "loss": 0.8747, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983379925391493, + "loss": 0.8186, + "step": 341 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983274930769938, + "loss": 0.7857, + "step": 342 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983169605826146, + "loss": 0.8397, + "step": 343 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019983063950563604, + "loss": 0.794, + "step": 344 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019982957964985812, + "loss": 0.7846, + "step": 345 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998285164909627, + "loss": 0.7461, + "step": 346 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199827450028985, + "loss": 0.7788, + "step": 347 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019982638026396033, + "loss": 0.8395, + "step": 348 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019982530719592402, + "loss": 0.7953, + "step": 349 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019982423082491162, + "loss": 0.747, + "step": 350 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019982315115095873, + "loss": 0.8333, + "step": 351 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019982206817410107, + "loss": 0.8411, + "step": 352 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998209818943745, + "loss": 0.8851, + "step": 353 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019981989231181494, + "loss": 0.7965, + "step": 354 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019981879942645845, + "loss": 0.7353, + "step": 355 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019981770323834118, + "loss": 0.848, + "step": 356 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998166037474994, + "loss": 0.7833, + "step": 357 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998155009539695, + "loss": 0.7652, + "step": 358 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019981439485778797, + "loss": 0.7925, + "step": 359 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998132854589914, + "loss": 0.7999, + "step": 360 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998121727576165, + "loss": 0.7903, + "step": 361 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019981105675370006, + "loss": 0.764, + "step": 362 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019980993744727908, + "loss": 0.8055, + "step": 363 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019980881483839052, + "loss": 0.7711, + "step": 364 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019980768892707157, + "loss": 0.805, + "step": 365 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019980655971335945, + "loss": 0.8102, + "step": 366 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019980542719729154, + "loss": 0.7908, + "step": 367 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998042913789053, + "loss": 0.7892, + "step": 368 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019980315225823832, + "loss": 0.8428, + "step": 369 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019980200983532833, + "loss": 0.8324, + "step": 370 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019980086411021305, + "loss": 0.7979, + "step": 371 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979971508293047, + "loss": 0.7712, + "step": 372 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979856275351856, + "loss": 0.7714, + "step": 373 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979740712201545, + "loss": 0.857, + "step": 374 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997962481884594, + "loss": 0.8727, + "step": 375 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979508595288872, + "loss": 0.7743, + "step": 376 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979392041534187, + "loss": 0.8434, + "step": 377 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979275157585747, + "loss": 0.8585, + "step": 378 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019979157943447414, + "loss": 0.8423, + "step": 379 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997904039912307, + "loss": 0.7517, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.000199789225246166, + "loss": 0.8478, + "step": 381 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997880431993191, + "loss": 0.8557, + "step": 382 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019978685785072904, + "loss": 0.7752, + "step": 383 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997856692004351, + "loss": 0.8654, + "step": 384 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019978447724847652, + "loss": 0.8535, + "step": 385 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019978328199489286, + "loss": 0.7456, + "step": 386 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019978208343972363, + "loss": 0.7465, + "step": 387 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997808815830084, + "loss": 0.7754, + "step": 388 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977967642478706, + "loss": 0.8185, + "step": 389 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977846796509943, + "loss": 0.7896, + "step": 390 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977725620398546, + "loss": 0.737, + "step": 391 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977604114148533, + "loss": 0.7979, + "step": 392 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977482277763918, + "loss": 0.8144, + "step": 393 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977360111248727, + "loss": 0.8341, + "step": 394 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019977237614607018, + "loss": 0.8033, + "step": 395 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997711478784283, + "loss": 0.7847, + "step": 396 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976991630960234, + "loss": 0.7661, + "step": 397 + }, + { + "epoch": 0.09, + "learning_rate": 0.000199768681439633, + "loss": 0.7665, + "step": 398 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976744326856118, + "loss": 0.7943, + "step": 399 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976620179642786, + "loss": 0.7185, + "step": 400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976495702327406, + "loss": 0.7544, + "step": 401 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976370894914106, + "loss": 0.7819, + "step": 402 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976245757407003, + "loss": 0.8308, + "step": 403 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019976120289810247, + "loss": 0.8323, + "step": 404 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019975994492127988, + "loss": 0.7911, + "step": 405 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019975868364364383, + "loss": 0.7838, + "step": 406 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997574190652361, + "loss": 0.7548, + "step": 407 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019975615118609854, + "loss": 0.8319, + "step": 408 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001997548800062731, + "loss": 0.8818, + "step": 409 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019975360552580178, + "loss": 0.8119, + "step": 410 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019975232774472682, + "loss": 0.8118, + "step": 411 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019975104666309048, + "loss": 0.8313, + "step": 412 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997497622809351, + "loss": 0.7909, + "step": 413 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019974847459830326, + "loss": 0.8009, + "step": 414 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997471836152375, + "loss": 0.8188, + "step": 415 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019974588933178056, + "loss": 0.8074, + "step": 416 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997445917479753, + "loss": 0.7901, + "step": 417 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019974329086386457, + "loss": 0.7777, + "step": 418 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997419866794915, + "loss": 0.7863, + "step": 419 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019974067919489917, + "loss": 0.8648, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019973936841013087, + "loss": 0.7781, + "step": 421 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019973805432522997, + "loss": 0.7974, + "step": 422 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019973673694024, + "loss": 0.825, + "step": 423 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997354162552045, + "loss": 0.7407, + "step": 424 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019973409227016714, + "loss": 0.7754, + "step": 425 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019973276498517177, + "loss": 0.7507, + "step": 426 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997314344002623, + "loss": 0.7631, + "step": 427 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019973010051548275, + "loss": 0.8441, + "step": 428 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019972876333087728, + "loss": 0.8574, + "step": 429 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019972742284649008, + "loss": 0.8064, + "step": 430 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019972607906236555, + "loss": 0.8711, + "step": 431 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019972473197854811, + "loss": 0.8583, + "step": 432 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019972338159508237, + "loss": 0.7646, + "step": 433 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019972202791201303, + "loss": 0.7415, + "step": 434 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997206709293848, + "loss": 0.8165, + "step": 435 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997193106472427, + "loss": 0.7906, + "step": 436 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997179470656316, + "loss": 0.7006, + "step": 437 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997165801845967, + "loss": 0.7607, + "step": 438 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019971521000418323, + "loss": 0.7851, + "step": 439 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997138365244365, + "loss": 0.8041, + "step": 440 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019971245974540196, + "loss": 0.8188, + "step": 441 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019971107966712518, + "loss": 0.8126, + "step": 442 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019970969628965179, + "loss": 0.7739, + "step": 443 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997083096130276, + "loss": 0.8247, + "step": 444 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019970691963729846, + "loss": 0.851, + "step": 445 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019970552636251038, + "loss": 0.7674, + "step": 446 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019970412978870947, + "loss": 0.8429, + "step": 447 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019970272991594193, + "loss": 0.763, + "step": 448 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019970132674425407, + "loss": 0.8225, + "step": 449 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019969992027369228, + "loss": 0.7699, + "step": 450 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019969851050430316, + "loss": 0.7724, + "step": 451 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019969709743613336, + "loss": 0.8066, + "step": 452 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001996956810692296, + "loss": 0.8156, + "step": 453 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019969426140363874, + "loss": 0.705, + "step": 454 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019969283843940776, + "loss": 0.8421, + "step": 455 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019969141217658377, + "loss": 0.7843, + "step": 456 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019968998261521393, + "loss": 0.8125, + "step": 457 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019968854975534553, + "loss": 0.848, + "step": 458 + }, + { + "epoch": 0.11, + "learning_rate": 0.000199687113597026, + "loss": 0.8061, + "step": 459 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996856741403029, + "loss": 0.771, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996842313852238, + "loss": 0.8071, + "step": 461 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019968278533183647, + "loss": 0.7877, + "step": 462 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996813359801887, + "loss": 0.7999, + "step": 463 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019967988333032853, + "loss": 0.8959, + "step": 464 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019967842738230393, + "loss": 0.8302, + "step": 465 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019967696813616317, + "loss": 0.7108, + "step": 466 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019967550559195444, + "loss": 0.7529, + "step": 467 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019967403974972622, + "loss": 0.8186, + "step": 468 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019967257060952696, + "loss": 0.7813, + "step": 469 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996710981714053, + "loss": 0.7698, + "step": 470 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019966962243540994, + "loss": 0.808, + "step": 471 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996681434015897, + "loss": 0.8951, + "step": 472 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996666610699935, + "loss": 0.8085, + "step": 473 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019966517544067045, + "loss": 0.7963, + "step": 474 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019966368651366966, + "loss": 0.8229, + "step": 475 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996621942890404, + "loss": 0.7495, + "step": 476 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019966069876683204, + "loss": 0.7546, + "step": 477 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019965919994709407, + "loss": 0.7445, + "step": 478 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996576978298761, + "loss": 0.8094, + "step": 479 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996561924152278, + "loss": 0.7465, + "step": 480 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019965468370319905, + "loss": 0.7525, + "step": 481 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019965317169383964, + "loss": 0.7284, + "step": 482 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996516563871997, + "loss": 0.9095, + "step": 483 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019965013778332934, + "loss": 0.7865, + "step": 484 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019964861588227882, + "loss": 0.7769, + "step": 485 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996470906840985, + "loss": 0.7217, + "step": 486 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996455621888388, + "loss": 0.8498, + "step": 487 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019964403039655036, + "loss": 0.7553, + "step": 488 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019964249530728378, + "loss": 0.8226, + "step": 489 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019964095692108993, + "loss": 0.7887, + "step": 490 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001996394152380197, + "loss": 0.7965, + "step": 491 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019963787025812404, + "loss": 0.8101, + "step": 492 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019963632198145414, + "loss": 0.8037, + "step": 493 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019963477040806118, + "loss": 0.7657, + "step": 494 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019963321553799653, + "loss": 0.8367, + "step": 495 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019963165737131163, + "loss": 0.7941, + "step": 496 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199630095908058, + "loss": 0.7336, + "step": 497 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962853114828736, + "loss": 0.8293, + "step": 498 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962696309205148, + "loss": 0.7968, + "step": 499 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996253917394022, + "loss": 0.7434, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962381709039153, + "loss": 0.8363, + "step": 501 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962223914507158, + "loss": 0.7887, + "step": 502 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962065790349458, + "loss": 0.8115, + "step": 503 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019961907336571278, + "loss": 0.7277, + "step": 504 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019961748553177868, + "loss": 0.78, + "step": 505 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019961589440174482, + "loss": 0.7703, + "step": 506 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019961429997566378, + "loss": 0.741, + "step": 507 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019961270225358837, + "loss": 0.7245, + "step": 508 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019961110123557142, + "loss": 0.836, + "step": 509 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019960949692166596, + "loss": 0.7567, + "step": 510 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199607889311925, + "loss": 0.7788, + "step": 511 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019960627840640181, + "loss": 0.827, + "step": 512 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996046642051496, + "loss": 0.7658, + "step": 513 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019960304670822186, + "loss": 0.7505, + "step": 514 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019960142591567208, + "loss": 0.8378, + "step": 515 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019959980182755385, + "loss": 0.8108, + "step": 516 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199598174443921, + "loss": 0.7649, + "step": 517 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001995965437648273, + "loss": 0.753, + "step": 518 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001995949097903267, + "loss": 0.8634, + "step": 519 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019959327252047328, + "loss": 0.7534, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019959163195532124, + "loss": 0.8162, + "step": 521 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019958998809492485, + "loss": 0.7938, + "step": 522 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001995883409393385, + "loss": 0.8145, + "step": 523 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001995866904886167, + "loss": 0.7788, + "step": 524 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019958503674281399, + "loss": 0.745, + "step": 525 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001995833797019852, + "loss": 0.7502, + "step": 526 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019958171936618508, + "loss": 0.7794, + "step": 527 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019958005573546855, + "loss": 0.7664, + "step": 528 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019957838880989078, + "loss": 0.7641, + "step": 529 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019957671858950677, + "loss": 0.8056, + "step": 530 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019957504507437185, + "loss": 0.7491, + "step": 531 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019957336826454143, + "loss": 0.862, + "step": 532 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019957168816007092, + "loss": 0.8399, + "step": 533 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019957000476101598, + "loss": 0.7734, + "step": 534 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019956831806743227, + "loss": 0.7558, + "step": 535 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019956662807937557, + "loss": 0.7223, + "step": 536 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001995649347969019, + "loss": 0.7795, + "step": 537 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019956323822006714, + "loss": 0.8093, + "step": 538 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019956153834892754, + "loss": 0.8192, + "step": 539 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995598351835393, + "loss": 0.7119, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955812872395878, + "loss": 0.723, + "step": 541 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955641897024246, + "loss": 0.8054, + "step": 542 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955470592244686, + "loss": 0.8095, + "step": 543 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955298958062876, + "loss": 0.8501, + "step": 544 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955126994484484, + "loss": 0.7943, + "step": 545 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019954954701515204, + "loss": 0.7487, + "step": 546 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995478207916074, + "loss": 0.7855, + "step": 547 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199546091274268, + "loss": 0.7582, + "step": 548 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019954435846319108, + "loss": 0.8002, + "step": 549 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019954262235843395, + "loss": 0.754, + "step": 550 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995408829600541, + "loss": 0.7934, + "step": 551 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953914026810902, + "loss": 0.7639, + "step": 552 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953739428265642, + "loss": 0.7333, + "step": 553 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953564500375408, + "loss": 0.754, + "step": 554 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953389243145982, + "loss": 0.8023, + "step": 555 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953213656583168, + "loss": 0.8217, + "step": 556 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953037740692774, + "loss": 0.7119, + "step": 557 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995286149548062, + "loss": 0.8151, + "step": 558 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995268492095254, + "loss": 0.6971, + "step": 559 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019952508017114374, + "loss": 0.7958, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019952330783971976, + "loss": 0.7203, + "step": 561 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995215322153121, + "loss": 0.7438, + "step": 562 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951975329797952, + "loss": 0.7445, + "step": 563 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951797108778087, + "loss": 0.8638, + "step": 564 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951618558477515, + "loss": 0.829, + "step": 565 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951439678902135, + "loss": 0.8351, + "step": 566 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951260470057878, + "loss": 0.747, + "step": 567 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951080931950664, + "loss": 0.766, + "step": 568 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019950901064586435, + "loss": 0.8215, + "step": 569 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019950720867971148, + "loss": 0.7948, + "step": 570 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995054034211076, + "loss": 0.8025, + "step": 571 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019950359487011244, + "loss": 0.8184, + "step": 572 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019950359487011244, + "loss": 0.817, + "step": 573 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995017830267859, + "loss": 0.7725, + "step": 574 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949996789118786, + "loss": 0.7854, + "step": 575 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949814946337838, + "loss": 0.8401, + "step": 576 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949632774341768, + "loss": 0.8023, + "step": 577 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949450273136602, + "loss": 0.8514, + "step": 578 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949267442728378, + "loss": 0.7743, + "step": 579 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994908428312314, + "loss": 0.7445, + "step": 580 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019948900794326956, + "loss": 0.7645, + "step": 581 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019948716976345897, + "loss": 0.7685, + "step": 582 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019948532829186036, + "loss": 0.8071, + "step": 583 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019948348352853476, + "loss": 0.7766, + "step": 584 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019948163547354317, + "loss": 0.7322, + "step": 585 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019947978412694674, + "loss": 0.8139, + "step": 586 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019947792948880674, + "loss": 0.7984, + "step": 587 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001994760715591845, + "loss": 0.7774, + "step": 588 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001994742103381415, + "loss": 0.8295, + "step": 589 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019947234582573937, + "loss": 0.7993, + "step": 590 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019947047802203977, + "loss": 0.8016, + "step": 591 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001994686069271045, + "loss": 0.7452, + "step": 592 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019946673254099545, + "loss": 0.7798, + "step": 593 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001994648548637747, + "loss": 0.6703, + "step": 594 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019946297389550433, + "loss": 0.7192, + "step": 595 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019946108963624654, + "loss": 0.7675, + "step": 596 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019945920208606376, + "loss": 0.8103, + "step": 597 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019945731124501842, + "loss": 0.7503, + "step": 598 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019945541711317307, + "loss": 0.8073, + "step": 599 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019945351969059033, + "loss": 0.8421, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019945161897733308, + "loss": 0.7399, + "step": 601 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019944971497346415, + "loss": 0.7475, + "step": 602 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019944780767904653, + "loss": 0.7655, + "step": 603 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019944589709414338, + "loss": 0.7986, + "step": 604 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019944398321881788, + "loss": 0.7682, + "step": 605 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019944206605313335, + "loss": 0.7835, + "step": 606 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019944014559715326, + "loss": 0.7748, + "step": 607 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001994382218509411, + "loss": 0.8386, + "step": 608 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019943629481456053, + "loss": 0.817, + "step": 609 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019943436448807537, + "loss": 0.7912, + "step": 610 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019943243087154944, + "loss": 0.814, + "step": 611 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019943049396504674, + "loss": 0.7438, + "step": 612 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019942855376863135, + "loss": 0.791, + "step": 613 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019942661028236745, + "loss": 0.7691, + "step": 614 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019942466350631935, + "loss": 0.7843, + "step": 615 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019942271344055147, + "loss": 0.8026, + "step": 616 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019942076008512837, + "loss": 0.7579, + "step": 617 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019941880344011463, + "loss": 0.7768, + "step": 618 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019941684350557502, + "loss": 0.8056, + "step": 619 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019941488028157438, + "loss": 0.8091, + "step": 620 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019941291376817763, + "loss": 0.7535, + "step": 621 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001994109439654499, + "loss": 0.7866, + "step": 622 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019940897087345632, + "loss": 0.8244, + "step": 623 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019940699449226223, + "loss": 0.7662, + "step": 624 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019940501482193298, + "loss": 0.7547, + "step": 625 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019940303186253403, + "loss": 0.8408, + "step": 626 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001994010456141311, + "loss": 0.8075, + "step": 627 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019939905607678984, + "loss": 0.8162, + "step": 628 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019939706325057608, + "loss": 0.756, + "step": 629 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993950671355558, + "loss": 0.8613, + "step": 630 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019939306773179497, + "loss": 0.8017, + "step": 631 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993910650393598, + "loss": 0.8012, + "step": 632 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019938905905831654, + "loss": 0.8359, + "step": 633 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019938704978873158, + "loss": 0.7421, + "step": 634 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993850372306714, + "loss": 0.8326, + "step": 635 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019938302138420254, + "loss": 0.779, + "step": 636 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019938100224939177, + "loss": 0.7669, + "step": 637 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993789798263059, + "loss": 0.7357, + "step": 638 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019937695411501177, + "loss": 0.7885, + "step": 639 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019937492511557645, + "loss": 0.7785, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993728928280671, + "loss": 0.763, + "step": 641 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993708572525509, + "loss": 0.7752, + "step": 642 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993688183890953, + "loss": 0.8238, + "step": 643 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019936677623776764, + "loss": 0.7393, + "step": 644 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019936473079863558, + "loss": 0.8016, + "step": 645 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993626820717668, + "loss": 0.7891, + "step": 646 + }, + { + "epoch": 0.15, + "learning_rate": 0.000199360630057229, + "loss": 0.7593, + "step": 647 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019935857475509016, + "loss": 0.7439, + "step": 648 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019935651616541828, + "loss": 0.8218, + "step": 649 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019935445428828142, + "loss": 0.7491, + "step": 650 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019935238912374787, + "loss": 0.7488, + "step": 651 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993503206718859, + "loss": 0.7972, + "step": 652 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019934824893276399, + "loss": 0.8319, + "step": 653 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019934617390645064, + "loss": 0.791, + "step": 654 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019934409559301458, + "loss": 0.6857, + "step": 655 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019934201399252454, + "loss": 0.8472, + "step": 656 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019933992910504936, + "loss": 0.8374, + "step": 657 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993378409306581, + "loss": 0.8097, + "step": 658 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993357494694198, + "loss": 0.793, + "step": 659 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019933365472140366, + "loss": 0.7916, + "step": 660 + }, + { + "epoch": 0.15, + "learning_rate": 0.000199331556686679, + "loss": 0.8299, + "step": 661 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019932945536531524, + "loss": 0.7425, + "step": 662 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019932735075738195, + "loss": 0.8046, + "step": 663 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019932524286294867, + "loss": 0.7312, + "step": 664 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019932313168208524, + "loss": 0.8265, + "step": 665 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019932101721486145, + "loss": 0.8185, + "step": 666 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993188994613473, + "loss": 0.7705, + "step": 667 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001993167784216128, + "loss": 0.7967, + "step": 668 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019931465409572826, + "loss": 0.7607, + "step": 669 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019931252648376383, + "loss": 0.7873, + "step": 670 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019931039558578997, + "loss": 0.7908, + "step": 671 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019930826140187718, + "loss": 0.8388, + "step": 672 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001993061239320961, + "loss": 0.7623, + "step": 673 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019930398317651743, + "loss": 0.6908, + "step": 674 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019930183913521195, + "loss": 0.8133, + "step": 675 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019929969180825066, + "loss": 0.7362, + "step": 676 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019929754119570465, + "loss": 0.7381, + "step": 677 + }, + { + "epoch": 0.16, + "learning_rate": 0.000199295387297645, + "loss": 0.8052, + "step": 678 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019929323011414304, + "loss": 0.8315, + "step": 679 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019929106964527006, + "loss": 0.7258, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019928890589109764, + "loss": 0.8093, + "step": 681 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019928673885169734, + "loss": 0.7842, + "step": 682 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019928456852714082, + "loss": 0.805, + "step": 683 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019928239491749994, + "loss": 0.7272, + "step": 684 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992802180228466, + "loss": 0.8247, + "step": 685 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019927803784325284, + "loss": 0.7424, + "step": 686 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019927585437879078, + "loss": 0.7458, + "step": 687 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992736676295327, + "loss": 0.7504, + "step": 688 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019927147759555089, + "loss": 0.8167, + "step": 689 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019926928427691786, + "loss": 0.7904, + "step": 690 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992670876737062, + "loss": 0.7182, + "step": 691 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019926488778598852, + "loss": 0.8029, + "step": 692 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019926268461383767, + "loss": 0.803, + "step": 693 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019926047815732655, + "loss": 0.7787, + "step": 694 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019925826841652816, + "loss": 0.8045, + "step": 695 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019925605539151557, + "loss": 0.7678, + "step": 696 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019925383908236203, + "loss": 0.8094, + "step": 697 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019925161948914087, + "loss": 0.7966, + "step": 698 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992493966119256, + "loss": 0.8275, + "step": 699 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019924717045078963, + "loss": 0.8449, + "step": 700 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019924494100580673, + "loss": 0.7239, + "step": 701 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019924270827705062, + "loss": 0.7668, + "step": 702 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992404722645952, + "loss": 0.8301, + "step": 703 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019923823296851448, + "loss": 0.8099, + "step": 704 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019923599038888245, + "loss": 0.7131, + "step": 705 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992337445257734, + "loss": 0.7914, + "step": 706 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019923149537926164, + "loss": 0.7744, + "step": 707 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019922924294942153, + "loss": 0.8006, + "step": 708 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019922698723632767, + "loss": 0.7865, + "step": 709 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019922472824005462, + "loss": 0.7767, + "step": 710 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992224659606772, + "loss": 0.8612, + "step": 711 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001992202003982702, + "loss": 0.8042, + "step": 712 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001992179315529086, + "loss": 0.7669, + "step": 713 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019921565942466752, + "loss": 0.8057, + "step": 714 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001992133840136221, + "loss": 0.8556, + "step": 715 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001992111053198476, + "loss": 0.7741, + "step": 716 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019920882334341944, + "loss": 0.7159, + "step": 717 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019920653808441313, + "loss": 0.78, + "step": 718 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001992042495429043, + "loss": 0.7867, + "step": 719 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019920195771896864, + "loss": 0.7997, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019919966261268198, + "loss": 0.7343, + "step": 721 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991973642241203, + "loss": 0.8196, + "step": 722 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019919506255335962, + "loss": 0.8141, + "step": 723 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991927576004761, + "loss": 0.7357, + "step": 724 + }, + { + "epoch": 0.17, + "learning_rate": 0.000199190449365546, + "loss": 0.8056, + "step": 725 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019918813784864568, + "loss": 0.8003, + "step": 726 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019918582304985166, + "loss": 0.7424, + "step": 727 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991835049692405, + "loss": 0.8036, + "step": 728 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019918118360688893, + "loss": 0.8286, + "step": 729 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019917885896287373, + "loss": 0.7648, + "step": 730 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019917653103727182, + "loss": 0.7906, + "step": 731 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019917419983016025, + "loss": 0.7334, + "step": 732 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019917186534161612, + "loss": 0.7937, + "step": 733 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991695275717167, + "loss": 0.8081, + "step": 734 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991671865205393, + "loss": 0.7708, + "step": 735 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019916484218816143, + "loss": 0.7813, + "step": 736 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019916249457466065, + "loss": 0.799, + "step": 737 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991601436801146, + "loss": 0.7659, + "step": 738 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019915778950460112, + "loss": 0.7994, + "step": 739 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019915543204819804, + "loss": 0.8363, + "step": 740 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991530713109834, + "loss": 0.7872, + "step": 741 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019915070729303533, + "loss": 0.7848, + "step": 742 + }, + { + "epoch": 0.17, + "learning_rate": 0.000199148339994432, + "loss": 0.775, + "step": 743 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991459694152518, + "loss": 0.8304, + "step": 744 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991435955555731, + "loss": 0.7455, + "step": 745 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991412184154745, + "loss": 0.7586, + "step": 746 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991388379950346, + "loss": 0.7687, + "step": 747 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019913645429433223, + "loss": 0.7846, + "step": 748 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019913406731344622, + "loss": 0.7956, + "step": 749 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991316770524555, + "loss": 0.8504, + "step": 750 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019912928351143927, + "loss": 0.8345, + "step": 751 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019912688669047665, + "loss": 0.7403, + "step": 752 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019912448658964695, + "loss": 0.7743, + "step": 753 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001991220832090296, + "loss": 0.7958, + "step": 754 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019911967654870415, + "loss": 0.73, + "step": 755 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019911726660875014, + "loss": 0.7627, + "step": 756 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001991148533892474, + "loss": 0.786, + "step": 757 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019911243689027573, + "loss": 0.7189, + "step": 758 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019911001711191514, + "loss": 0.8654, + "step": 759 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019910759405424562, + "loss": 0.8218, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001991051677173474, + "loss": 0.8006, + "step": 761 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019910273810130072, + "loss": 0.8133, + "step": 762 + }, + { + "epoch": 0.18, + "learning_rate": 0.000199100305206186, + "loss": 0.7501, + "step": 763 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019909786903208373, + "loss": 0.7765, + "step": 764 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990954295790745, + "loss": 0.7499, + "step": 765 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019909298684723904, + "loss": 0.7474, + "step": 766 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019909054083665822, + "loss": 0.7653, + "step": 767 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019908809154741287, + "loss": 0.7546, + "step": 768 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990856389795841, + "loss": 0.848, + "step": 769 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990831831332531, + "loss": 0.7291, + "step": 770 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019908072400850104, + "loss": 0.8142, + "step": 771 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990782616054093, + "loss": 0.7355, + "step": 772 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990757959240594, + "loss": 0.7681, + "step": 773 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019907332696453295, + "loss": 0.8057, + "step": 774 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990708547269115, + "loss": 0.751, + "step": 775 + }, + { + "epoch": 0.18, + "learning_rate": 0.000199068379211277, + "loss": 0.858, + "step": 776 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990659004177113, + "loss": 0.7143, + "step": 777 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990634183462964, + "loss": 0.822, + "step": 778 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019906093299711447, + "loss": 0.8451, + "step": 779 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990584443702477, + "loss": 0.7746, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019905595246577846, + "loss": 0.816, + "step": 781 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019905345728378918, + "loss": 0.7942, + "step": 782 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019905095882436245, + "loss": 0.7911, + "step": 783 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990484570875809, + "loss": 0.8984, + "step": 784 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019904595207352737, + "loss": 0.8064, + "step": 785 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019904344378228467, + "loss": 0.8041, + "step": 786 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990409322139358, + "loss": 0.8366, + "step": 787 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019903841736856393, + "loss": 0.8618, + "step": 788 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019903589924625222, + "loss": 0.7576, + "step": 789 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019903337784708396, + "loss": 0.8154, + "step": 790 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019903085317114262, + "loss": 0.8126, + "step": 791 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019902832521851175, + "loss": 0.8073, + "step": 792 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019902579398927496, + "loss": 0.8192, + "step": 793 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019902325948351602, + "loss": 0.8361, + "step": 794 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019902072170131877, + "loss": 0.7847, + "step": 795 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019901818064276724, + "loss": 0.8294, + "step": 796 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001990156363079454, + "loss": 0.7673, + "step": 797 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019901308869693754, + "loss": 0.7564, + "step": 798 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019901053780982787, + "loss": 0.8445, + "step": 799 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001990079836467009, + "loss": 0.8028, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.000199005426207641, + "loss": 0.8096, + "step": 801 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001990028654927329, + "loss": 0.7956, + "step": 802 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019900030150206129, + "loss": 0.8012, + "step": 803 + }, + { + "epoch": 0.19, + "learning_rate": 0.000198997734235711, + "loss": 0.8277, + "step": 804 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019899516369376697, + "loss": 0.7085, + "step": 805 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989925898763143, + "loss": 0.7651, + "step": 806 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019899001278343806, + "loss": 0.7517, + "step": 807 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989874324152236, + "loss": 0.7644, + "step": 808 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989848487717563, + "loss": 0.7515, + "step": 809 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019898226185312162, + "loss": 0.7261, + "step": 810 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019897967165940513, + "loss": 0.7812, + "step": 811 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989770781906926, + "loss": 0.6985, + "step": 812 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019897448144706975, + "loss": 0.8777, + "step": 813 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989718814286226, + "loss": 0.7978, + "step": 814 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019896927813543708, + "loss": 0.7836, + "step": 815 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019896667156759937, + "loss": 0.7953, + "step": 816 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019896406172519577, + "loss": 0.7592, + "step": 817 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019896144860831257, + "loss": 0.7941, + "step": 818 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019895883221703625, + "loss": 0.7828, + "step": 819 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019895621255145332, + "loss": 0.8044, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019895358961165055, + "loss": 0.8112, + "step": 821 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019895096339771472, + "loss": 0.7647, + "step": 822 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019894833390973266, + "loss": 0.7498, + "step": 823 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019894570114779142, + "loss": 0.761, + "step": 824 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989430651119781, + "loss": 0.8068, + "step": 825 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019894042580237992, + "loss": 0.7604, + "step": 826 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989377832190842, + "loss": 0.7291, + "step": 827 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989351373621784, + "loss": 0.8001, + "step": 828 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019893248823175003, + "loss": 0.7971, + "step": 829 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019892983582788678, + "loss": 0.7617, + "step": 830 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989271801506764, + "loss": 0.7742, + "step": 831 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019892452120020675, + "loss": 0.7633, + "step": 832 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019892185897656578, + "loss": 0.7707, + "step": 833 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019891919347984164, + "loss": 0.7406, + "step": 834 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989165247101225, + "loss": 0.7176, + "step": 835 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019891385266749667, + "loss": 0.7351, + "step": 836 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019891117735205253, + "loss": 0.762, + "step": 837 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019890849876387861, + "loss": 0.7959, + "step": 838 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019890581690306357, + "loss": 0.7431, + "step": 839 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001989031317696961, + "loss": 0.7786, + "step": 840 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019890044336386508, + "loss": 0.8056, + "step": 841 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019889775168565943, + "loss": 0.8553, + "step": 842 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019889505673516825, + "loss": 0.7481, + "step": 843 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019889235851248068, + "loss": 0.7106, + "step": 844 + }, + { + "epoch": 0.2, + "learning_rate": 0.000198889657017686, + "loss": 0.8818, + "step": 845 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019888695225087365, + "loss": 0.7524, + "step": 846 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019888424421213302, + "loss": 0.8297, + "step": 847 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001988815329015538, + "loss": 0.7312, + "step": 848 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019887881831922568, + "loss": 0.8348, + "step": 849 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019887610046523846, + "loss": 0.7978, + "step": 850 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019887337933968205, + "loss": 0.7902, + "step": 851 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019887065494264653, + "loss": 0.7657, + "step": 852 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019886792727422207, + "loss": 0.7978, + "step": 853 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019886519633449882, + "loss": 0.723, + "step": 854 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019886246212356723, + "loss": 0.7611, + "step": 855 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019885972464151773, + "loss": 0.7221, + "step": 856 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001988569838884409, + "loss": 0.7602, + "step": 857 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019885423986442744, + "loss": 0.7977, + "step": 858 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001988514925695681, + "loss": 0.7477, + "step": 859 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019884874200395384, + "loss": 0.7951, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019884598816767563, + "loss": 0.802, + "step": 861 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001988432310608246, + "loss": 0.798, + "step": 862 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019884047068349198, + "loss": 0.7599, + "step": 863 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019883770703576912, + "loss": 0.7682, + "step": 864 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001988349401177474, + "loss": 0.8321, + "step": 865 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019883216992951845, + "loss": 0.7511, + "step": 866 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019882939647117387, + "loss": 0.7265, + "step": 867 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019882661974280542, + "loss": 0.7541, + "step": 868 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019882383974450507, + "loss": 0.7121, + "step": 869 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019882105647636472, + "loss": 0.7444, + "step": 870 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019881826993847646, + "loss": 0.7698, + "step": 871 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001988154801309325, + "loss": 0.7572, + "step": 872 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019881268705382516, + "loss": 0.7632, + "step": 873 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019880989070724688, + "loss": 0.7349, + "step": 874 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019880709109129012, + "loss": 0.8271, + "step": 875 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019880428820604759, + "loss": 0.8073, + "step": 876 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019880148205161192, + "loss": 0.7872, + "step": 877 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001987986726280761, + "loss": 0.7895, + "step": 878 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019879585993553298, + "loss": 0.7783, + "step": 879 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001987930439740757, + "loss": 0.767, + "step": 880 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019879022474379734, + "loss": 0.7983, + "step": 881 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019878740224479126, + "loss": 0.832, + "step": 882 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019878457647715082, + "loss": 0.7455, + "step": 883 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019878174744096953, + "loss": 0.8733, + "step": 884 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019877891513634101, + "loss": 0.7894, + "step": 885 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019877607956335892, + "loss": 0.7341, + "step": 886 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019877324072211714, + "loss": 0.7487, + "step": 887 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019877039861270954, + "loss": 0.7991, + "step": 888 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019876755323523023, + "loss": 0.7892, + "step": 889 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987647045897733, + "loss": 0.7648, + "step": 890 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019876185267643303, + "loss": 0.8899, + "step": 891 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019875899749530377, + "loss": 0.7375, + "step": 892 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019875613904648, + "loss": 0.7643, + "step": 893 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987532773300563, + "loss": 0.8038, + "step": 894 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019875041234612738, + "loss": 0.8279, + "step": 895 + }, + { + "epoch": 0.21, + "learning_rate": 0.000198747544094788, + "loss": 0.8081, + "step": 896 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019874467257613305, + "loss": 0.7315, + "step": 897 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987417977902576, + "loss": 0.7956, + "step": 898 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987389197372567, + "loss": 0.7465, + "step": 899 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019873603841722565, + "loss": 0.7667, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987331538302597, + "loss": 0.7133, + "step": 901 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019873026597645442, + "loss": 0.7452, + "step": 902 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987273748559052, + "loss": 0.8016, + "step": 903 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019872448046870785, + "loss": 0.8022, + "step": 904 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019872158281495803, + "loss": 0.851, + "step": 905 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987186818947517, + "loss": 0.7805, + "step": 906 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019871577770818477, + "loss": 0.766, + "step": 907 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019871287025535338, + "loss": 0.7636, + "step": 908 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001987099595363537, + "loss": 0.7535, + "step": 909 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019870704555128205, + "loss": 0.7363, + "step": 910 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019870412830023487, + "loss": 0.7769, + "step": 911 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019870120778330867, + "loss": 0.7979, + "step": 912 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019869828400060008, + "loss": 0.7649, + "step": 913 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019869535695220584, + "loss": 0.7042, + "step": 914 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001986924266382228, + "loss": 0.8487, + "step": 915 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001986894930587479, + "loss": 0.7587, + "step": 916 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019868655621387826, + "loss": 0.7838, + "step": 917 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019868361610371097, + "loss": 0.7927, + "step": 918 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001986806727283434, + "loss": 0.7852, + "step": 919 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019867772608787285, + "loss": 0.8045, + "step": 920 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001986747761823969, + "loss": 0.8108, + "step": 921 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019867182301201307, + "loss": 0.7717, + "step": 922 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001986688665768192, + "loss": 0.7009, + "step": 923 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019866590687691294, + "loss": 0.8697, + "step": 924 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019866294391239237, + "loss": 0.7736, + "step": 925 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019865997768335545, + "loss": 0.7315, + "step": 926 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019865700818990034, + "loss": 0.8477, + "step": 927 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019865403543212532, + "loss": 0.7362, + "step": 928 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001986510594101287, + "loss": 0.7611, + "step": 929 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019864808012400896, + "loss": 0.7781, + "step": 930 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019864509757386474, + "loss": 0.7352, + "step": 931 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019864211175979466, + "loss": 0.7676, + "step": 932 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001986391226818975, + "loss": 0.7561, + "step": 933 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019863613034027224, + "loss": 0.8338, + "step": 934 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019863313473501783, + "loss": 0.7424, + "step": 935 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019863013586623343, + "loss": 0.7763, + "step": 936 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001986271337340182, + "loss": 0.7435, + "step": 937 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001986241283384715, + "loss": 0.7912, + "step": 938 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001986211196796928, + "loss": 0.8143, + "step": 939 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019861810775778164, + "loss": 0.8166, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019861509257283766, + "loss": 0.8158, + "step": 941 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019861207412496062, + "loss": 0.8503, + "step": 942 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001986090524142504, + "loss": 0.7534, + "step": 943 + }, + { + "epoch": 0.22, + "learning_rate": 0.000198606027440807, + "loss": 0.7631, + "step": 944 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019860299920473052, + "loss": 0.7763, + "step": 945 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001985999677061211, + "loss": 0.7873, + "step": 946 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019859693294507907, + "loss": 0.824, + "step": 947 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019859389492170486, + "loss": 0.7102, + "step": 948 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019859085363609896, + "loss": 0.7379, + "step": 949 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019858780908836208, + "loss": 0.6864, + "step": 950 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019858476127859485, + "loss": 0.7555, + "step": 951 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019858171020689818, + "loss": 0.8092, + "step": 952 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019857865587337295, + "loss": 0.7642, + "step": 953 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019857559827812034, + "loss": 0.732, + "step": 954 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019857253742124146, + "loss": 0.7683, + "step": 955 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019856947330283752, + "loss": 0.7734, + "step": 956 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019856640592300998, + "loss": 0.7151, + "step": 957 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019856333528186034, + "loss": 0.8122, + "step": 958 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019856026137949018, + "loss": 0.8087, + "step": 959 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019855718421600118, + "loss": 0.8221, + "step": 960 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019855410379149518, + "loss": 0.774, + "step": 961 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001985510201060741, + "loss": 0.8129, + "step": 962 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019854793315983996, + "loss": 0.7458, + "step": 963 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019854484295289492, + "loss": 0.8116, + "step": 964 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019854174948534124, + "loss": 0.7549, + "step": 965 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019853865275728128, + "loss": 0.732, + "step": 966 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019853555276881744, + "loss": 0.7677, + "step": 967 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019853244952005233, + "loss": 0.7637, + "step": 968 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019852934301108863, + "loss": 0.7745, + "step": 969 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019852623324202916, + "loss": 0.7518, + "step": 970 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019852312021297674, + "loss": 0.7796, + "step": 971 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019852000392403443, + "loss": 0.7407, + "step": 972 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001985168843753053, + "loss": 0.7489, + "step": 973 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019851376156689263, + "loss": 0.7414, + "step": 974 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001985106354988997, + "loss": 0.7586, + "step": 975 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001985075061714299, + "loss": 0.7892, + "step": 976 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019850437358458689, + "loss": 0.8236, + "step": 977 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019850123773847424, + "loss": 0.7523, + "step": 978 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019849809863319574, + "loss": 0.7911, + "step": 979 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019849495626885516, + "loss": 0.7481, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019849181064555662, + "loss": 0.759, + "step": 981 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019848866176340414, + "loss": 0.738, + "step": 982 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019848550962250184, + "loss": 0.7288, + "step": 983 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019848235422295412, + "loss": 0.7329, + "step": 984 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019847919556486532, + "loss": 0.7725, + "step": 985 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019847603364833998, + "loss": 0.7523, + "step": 986 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984728684734827, + "loss": 0.7913, + "step": 987 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019846970004039824, + "loss": 0.8298, + "step": 988 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019846652834919142, + "loss": 0.7841, + "step": 989 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019846335339996715, + "loss": 0.7988, + "step": 990 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019846017519283056, + "loss": 0.7639, + "step": 991 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019845699372788672, + "loss": 0.8326, + "step": 992 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019845380900524094, + "loss": 0.7359, + "step": 993 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984506210249986, + "loss": 0.7713, + "step": 994 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984474297872652, + "loss": 0.8214, + "step": 995 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019844423529214627, + "loss": 0.7645, + "step": 996 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019844103753974753, + "loss": 0.7315, + "step": 997 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019843783653017484, + "loss": 0.7482, + "step": 998 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019843463226353408, + "loss": 0.8578, + "step": 999 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984314247399312, + "loss": 0.7806, + "step": 1000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019842821395947246, + "loss": 0.6996, + "step": 1001 + }, + { + "epoch": 0.23, + "learning_rate": 0.000198424999922264, + "loss": 0.8047, + "step": 1002 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984217826284122, + "loss": 0.7923, + "step": 1003 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019841856207802349, + "loss": 0.7427, + "step": 1004 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984153382712045, + "loss": 0.813, + "step": 1005 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019841211120806177, + "loss": 0.8599, + "step": 1006 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019840888088870222, + "loss": 0.779, + "step": 1007 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001984056473132326, + "loss": 0.7752, + "step": 1008 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019840241048176002, + "loss": 0.7345, + "step": 1009 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019839917039439148, + "loss": 0.7673, + "step": 1010 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019839592705123428, + "loss": 0.7406, + "step": 1011 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019839268045239566, + "loss": 0.8254, + "step": 1012 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019838943059798304, + "loss": 0.7944, + "step": 1013 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019838617748810402, + "loss": 0.818, + "step": 1014 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019838292112286617, + "loss": 0.7663, + "step": 1015 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019837966150237728, + "loss": 0.8176, + "step": 1016 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019837639862674516, + "loss": 0.8015, + "step": 1017 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983731324960778, + "loss": 0.795, + "step": 1018 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019836986311048326, + "loss": 0.8265, + "step": 1019 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019836659047006973, + "loss": 0.7223, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019836331457494545, + "loss": 0.7249, + "step": 1021 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019836003542521887, + "loss": 0.7122, + "step": 1022 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019835675302099848, + "loss": 0.7813, + "step": 1023 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019835346736239284, + "loss": 0.7657, + "step": 1024 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983501784495107, + "loss": 0.7757, + "step": 1025 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019834688628246085, + "loss": 0.774, + "step": 1026 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019834359086135227, + "loss": 0.7902, + "step": 1027 + }, + { + "epoch": 0.24, + "learning_rate": 0.000198340292186294, + "loss": 0.7149, + "step": 1028 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983369902573951, + "loss": 0.7779, + "step": 1029 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983336850747649, + "loss": 0.7739, + "step": 1030 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019833037663851276, + "loss": 0.7903, + "step": 1031 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983270649487481, + "loss": 0.7982, + "step": 1032 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019832375000558054, + "loss": 0.7192, + "step": 1033 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019832043180911975, + "loss": 0.7684, + "step": 1034 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983171103594755, + "loss": 0.7968, + "step": 1035 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019831378565675773, + "loss": 0.8108, + "step": 1036 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001983104577010764, + "loss": 0.8191, + "step": 1037 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019830712649254167, + "loss": 0.7044, + "step": 1038 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019830379203126375, + "loss": 0.7365, + "step": 1039 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019830045431735294, + "loss": 0.8285, + "step": 1040 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001982971133509197, + "loss": 0.7784, + "step": 1041 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019829376913207456, + "loss": 0.7808, + "step": 1042 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019829042166092818, + "loss": 0.7859, + "step": 1043 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019828707093759134, + "loss": 0.7109, + "step": 1044 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019828371696217488, + "loss": 0.8352, + "step": 1045 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001982803597347898, + "loss": 0.7537, + "step": 1046 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001982769992555471, + "loss": 0.7226, + "step": 1047 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001982736355245581, + "loss": 0.7469, + "step": 1048 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019827026854193403, + "loss": 0.7409, + "step": 1049 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019826689830778626, + "loss": 0.7756, + "step": 1050 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019826352482222638, + "loss": 0.7978, + "step": 1051 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019826014808536595, + "loss": 0.8058, + "step": 1052 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019825676809731671, + "loss": 0.7949, + "step": 1053 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019825338485819055, + "loss": 0.8137, + "step": 1054 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019824999836809935, + "loss": 0.7905, + "step": 1055 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019824660862715517, + "loss": 0.8283, + "step": 1056 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001982432156354702, + "loss": 0.7833, + "step": 1057 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019823981939315664, + "loss": 0.744, + "step": 1058 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019823641990032693, + "loss": 0.7744, + "step": 1059 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019823301715709356, + "loss": 0.8207, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019822961116356904, + "loss": 0.7677, + "step": 1061 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001982262019198661, + "loss": 0.7826, + "step": 1062 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001982227894260976, + "loss": 0.8715, + "step": 1063 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001982193736823764, + "loss": 0.8244, + "step": 1064 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001982159546888155, + "loss": 0.8314, + "step": 1065 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001982125324455281, + "loss": 0.7528, + "step": 1066 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019820910695262734, + "loss": 0.7973, + "step": 1067 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019820567821022663, + "loss": 0.773, + "step": 1068 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019820224621843938, + "loss": 0.7946, + "step": 1069 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019819881097737915, + "loss": 0.7557, + "step": 1070 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019819537248715966, + "loss": 0.7288, + "step": 1071 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981919307478946, + "loss": 0.7538, + "step": 1072 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981884857596979, + "loss": 0.8472, + "step": 1073 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981850375226835, + "loss": 0.7633, + "step": 1074 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019818158603696558, + "loss": 0.7423, + "step": 1075 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019817813130265825, + "loss": 0.7788, + "step": 1076 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019817467331987586, + "loss": 0.6996, + "step": 1077 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019817121208873283, + "loss": 0.7042, + "step": 1078 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019816774760934367, + "loss": 0.8109, + "step": 1079 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019816427988182304, + "loss": 0.7844, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019816080890628564, + "loss": 0.8378, + "step": 1081 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019815733468284632, + "loss": 0.8074, + "step": 1082 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019815385721162003, + "loss": 0.8401, + "step": 1083 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981503764927219, + "loss": 0.7465, + "step": 1084 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198146892526267, + "loss": 0.7957, + "step": 1085 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019814340531237065, + "loss": 0.7814, + "step": 1086 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019813991485114824, + "loss": 0.7633, + "step": 1087 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019813642114271528, + "loss": 0.7503, + "step": 1088 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019813292418718732, + "loss": 0.8464, + "step": 1089 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981294239846801, + "loss": 0.738, + "step": 1090 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019812592053530944, + "loss": 0.7812, + "step": 1091 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981224138391912, + "loss": 0.8099, + "step": 1092 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019811890389644145, + "loss": 0.7437, + "step": 1093 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019811539070717634, + "loss": 0.787, + "step": 1094 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019811187427151212, + "loss": 0.8305, + "step": 1095 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001981083545895651, + "loss": 0.7848, + "step": 1096 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019810483166145178, + "loss": 0.7131, + "step": 1097 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019810130548728866, + "loss": 0.7572, + "step": 1098 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001980977760671925, + "loss": 0.7203, + "step": 1099 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019809424340127998, + "loss": 0.7462, + "step": 1100 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019809070748966808, + "loss": 0.7526, + "step": 1101 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019808716833247375, + "loss": 0.8209, + "step": 1102 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019808362592981408, + "loss": 0.765, + "step": 1103 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019808008028180632, + "loss": 0.7736, + "step": 1104 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001980765313885678, + "loss": 0.798, + "step": 1105 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019807297925021584, + "loss": 0.7512, + "step": 1106 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019806942386686805, + "loss": 0.7517, + "step": 1107 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001980658652386421, + "loss": 0.7331, + "step": 1108 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019806230336565566, + "loss": 0.6801, + "step": 1109 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019805873824802666, + "loss": 0.7871, + "step": 1110 + }, + { + "epoch": 0.26, + "learning_rate": 0.000198055169885873, + "loss": 0.8183, + "step": 1111 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019805159827931276, + "loss": 0.7986, + "step": 1112 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019804802342846412, + "loss": 0.8222, + "step": 1113 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001980444453334454, + "loss": 0.7497, + "step": 1114 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019804086399437493, + "loss": 0.7949, + "step": 1115 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019803727941137124, + "loss": 0.8546, + "step": 1116 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019803369158455294, + "loss": 0.7361, + "step": 1117 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019803010051403872, + "loss": 0.8341, + "step": 1118 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019802650619994743, + "loss": 0.7617, + "step": 1119 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019802290864239796, + "loss": 0.7573, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001980193078415094, + "loss": 0.799, + "step": 1121 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019801570379740082, + "loss": 0.7718, + "step": 1122 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019801209651019154, + "loss": 0.7355, + "step": 1123 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019800848598000082, + "loss": 0.818, + "step": 1124 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019800487220694826, + "loss": 0.7413, + "step": 1125 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019800125519115331, + "loss": 0.8442, + "step": 1126 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001979976349327357, + "loss": 0.7562, + "step": 1127 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019799401143181524, + "loss": 0.7964, + "step": 1128 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019799038468851178, + "loss": 0.7697, + "step": 1129 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019798675470294533, + "loss": 0.815, + "step": 1130 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019798312147523598, + "loss": 0.7861, + "step": 1131 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019797948500550398, + "loss": 0.7241, + "step": 1132 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019797584529386962, + "loss": 0.7592, + "step": 1133 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001979722023404534, + "loss": 0.7846, + "step": 1134 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019796855614537576, + "loss": 0.8187, + "step": 1135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001979649067087574, + "loss": 0.645, + "step": 1136 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019796125403071907, + "loss": 0.7446, + "step": 1137 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019795759811138156, + "loss": 0.7413, + "step": 1138 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019795393895086596, + "loss": 0.6966, + "step": 1139 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019795027654929325, + "loss": 0.7282, + "step": 1140 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019794661090678464, + "loss": 0.8217, + "step": 1141 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001979429420234614, + "loss": 0.7984, + "step": 1142 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019793926989944495, + "loss": 0.8325, + "step": 1143 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019793559453485675, + "loss": 0.8431, + "step": 1144 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019793191592981844, + "loss": 0.7888, + "step": 1145 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019792823408445174, + "loss": 0.7064, + "step": 1146 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019792454899887843, + "loss": 0.7815, + "step": 1147 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019792086067322052, + "loss": 0.7514, + "step": 1148 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001979171691076, + "loss": 0.7511, + "step": 1149 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019791347430213904, + "loss": 0.7879, + "step": 1150 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019790977625695985, + "loss": 0.713, + "step": 1151 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001979060749721848, + "loss": 0.7451, + "step": 1152 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019790237044793637, + "loss": 0.8109, + "step": 1153 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019789866268433716, + "loss": 0.7597, + "step": 1154 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019789495168150975, + "loss": 0.7814, + "step": 1155 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019789123743957707, + "loss": 0.762, + "step": 1156 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978875199586619, + "loss": 0.7868, + "step": 1157 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978837992388873, + "loss": 0.7238, + "step": 1158 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019788007528037638, + "loss": 0.8193, + "step": 1159 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019787634808325234, + "loss": 0.8377, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978726176476385, + "loss": 0.8235, + "step": 1161 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978688839736583, + "loss": 0.7345, + "step": 1162 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019786514706143527, + "loss": 0.7323, + "step": 1163 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019786140691109306, + "loss": 0.7857, + "step": 1164 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019785766352275542, + "loss": 0.7616, + "step": 1165 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019785391689654621, + "loss": 0.7761, + "step": 1166 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978501670325894, + "loss": 0.7486, + "step": 1167 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978464139310091, + "loss": 0.7986, + "step": 1168 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019784265759192944, + "loss": 0.7937, + "step": 1169 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978388980154747, + "loss": 0.8125, + "step": 1170 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019783513520176931, + "loss": 0.6969, + "step": 1171 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019783136915093775, + "loss": 0.7992, + "step": 1172 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019782759986310464, + "loss": 0.7196, + "step": 1173 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019782382733839473, + "loss": 0.7758, + "step": 1174 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001978200515769328, + "loss": 0.8438, + "step": 1175 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019781627257884376, + "loss": 0.72, + "step": 1176 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019781249034425268, + "loss": 0.816, + "step": 1177 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019780870487328474, + "loss": 0.8128, + "step": 1178 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019780491616606512, + "loss": 0.814, + "step": 1179 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019780112422271927, + "loss": 0.7655, + "step": 1180 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019779732904337254, + "loss": 0.6974, + "step": 1181 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019779353062815059, + "loss": 0.7732, + "step": 1182 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019778972897717908, + "loss": 0.739, + "step": 1183 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019778592409058378, + "loss": 0.749, + "step": 1184 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019778211596849059, + "loss": 0.7604, + "step": 1185 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019777830461102552, + "loss": 0.7535, + "step": 1186 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019777449001831467, + "loss": 0.7586, + "step": 1187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977706721904843, + "loss": 0.757, + "step": 1188 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019776685112766064, + "loss": 0.7932, + "step": 1189 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977630268299702, + "loss": 0.7751, + "step": 1190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019775919929753953, + "loss": 0.7104, + "step": 1191 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977553685304952, + "loss": 0.7446, + "step": 1192 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019775153452896398, + "loss": 0.7957, + "step": 1193 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019774769729307274, + "loss": 0.7605, + "step": 1194 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019774385682294848, + "loss": 0.7869, + "step": 1195 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019774001311871823, + "loss": 0.7699, + "step": 1196 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019773616618050916, + "loss": 0.7761, + "step": 1197 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977323160084486, + "loss": 0.804, + "step": 1198 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977284626026639, + "loss": 0.7682, + "step": 1199 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977246059632826, + "loss": 0.7562, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019772074609043222, + "loss": 0.7447, + "step": 1201 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977168829842406, + "loss": 0.7559, + "step": 1202 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977130166448355, + "loss": 0.7783, + "step": 1203 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977091470723448, + "loss": 0.7547, + "step": 1204 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001977052742668966, + "loss": 0.7868, + "step": 1205 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019770139822861905, + "loss": 0.7915, + "step": 1206 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019769751895764036, + "loss": 0.7113, + "step": 1207 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019769363645408887, + "loss": 0.6996, + "step": 1208 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019768975071809307, + "loss": 0.8125, + "step": 1209 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019768586174978157, + "loss": 0.769, + "step": 1210 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019768196954928296, + "loss": 0.8157, + "step": 1211 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976780741167261, + "loss": 0.8269, + "step": 1212 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019767417545223982, + "loss": 0.7532, + "step": 1213 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019767027355595317, + "loss": 0.81, + "step": 1214 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976663684279952, + "loss": 0.7285, + "step": 1215 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019766246006849522, + "loss": 0.7789, + "step": 1216 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976585484775824, + "loss": 0.768, + "step": 1217 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976546336553863, + "loss": 0.7111, + "step": 1218 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019765071560203637, + "loss": 0.7495, + "step": 1219 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976467943176623, + "loss": 0.8338, + "step": 1220 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019764286980239378, + "loss": 0.7826, + "step": 1221 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019763894205636072, + "loss": 0.7478, + "step": 1222 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019763501107969303, + "loss": 0.7788, + "step": 1223 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019763107687252083, + "loss": 0.8376, + "step": 1224 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019762713943497425, + "loss": 0.7509, + "step": 1225 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019762319876718355, + "loss": 0.7513, + "step": 1226 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976192548692792, + "loss": 0.7572, + "step": 1227 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001976153077413916, + "loss": 0.7489, + "step": 1228 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019761135738365142, + "loss": 0.6974, + "step": 1229 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019760740379618934, + "loss": 0.7612, + "step": 1230 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019760344697913618, + "loss": 0.7539, + "step": 1231 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019759948693262287, + "loss": 0.7993, + "step": 1232 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975955236567804, + "loss": 0.7668, + "step": 1233 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019759155715173997, + "loss": 0.7406, + "step": 1234 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019758758741763276, + "loss": 0.84, + "step": 1235 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019758361445459014, + "loss": 0.8156, + "step": 1236 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019757963826274357, + "loss": 0.8152, + "step": 1237 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975756588422246, + "loss": 0.7422, + "step": 1238 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019757167619316495, + "loss": 0.7926, + "step": 1239 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019756769031569632, + "loss": 0.7889, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019756370120995066, + "loss": 0.7503, + "step": 1241 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975597088760599, + "loss": 0.7746, + "step": 1242 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975557133141562, + "loss": 0.7578, + "step": 1243 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975517145243717, + "loss": 0.7968, + "step": 1244 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019754771250683875, + "loss": 0.835, + "step": 1245 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019754370726168973, + "loss": 0.8047, + "step": 1246 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019753969878905724, + "loss": 0.7857, + "step": 1247 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019753568708907383, + "loss": 0.8333, + "step": 1248 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975316721618723, + "loss": 0.7447, + "step": 1249 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019752765400758544, + "loss": 0.71, + "step": 1250 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001975236326263462, + "loss": 0.744, + "step": 1251 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019751960801828769, + "loss": 0.7605, + "step": 1252 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019751558018354305, + "loss": 0.8101, + "step": 1253 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019751154912224555, + "loss": 0.7548, + "step": 1254 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019750751483452853, + "loss": 0.7587, + "step": 1255 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019750347732052555, + "loss": 0.7774, + "step": 1256 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019749943658037012, + "loss": 0.7791, + "step": 1257 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019749539261419602, + "loss": 0.7933, + "step": 1258 + }, + { + "epoch": 0.29, + "learning_rate": 0.000197491345422137, + "loss": 0.738, + "step": 1259 + }, + { + "epoch": 0.29, + "learning_rate": 0.000197487295004327, + "loss": 0.7631, + "step": 1260 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001974832413609, + "loss": 0.7455, + "step": 1261 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019747918449199015, + "loss": 0.774, + "step": 1262 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019747512439773171, + "loss": 0.7756, + "step": 1263 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019747106107825897, + "loss": 0.8287, + "step": 1264 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019746699453370638, + "loss": 0.7537, + "step": 1265 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019746292476420854, + "loss": 0.7651, + "step": 1266 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019745885176990007, + "loss": 0.7309, + "step": 1267 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019745477555091574, + "loss": 0.7243, + "step": 1268 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001974506961073904, + "loss": 0.7385, + "step": 1269 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019744661343945908, + "loss": 0.7365, + "step": 1270 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019744252754725684, + "loss": 0.7134, + "step": 1271 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019743843843091888, + "loss": 0.8651, + "step": 1272 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019743434609058045, + "loss": 0.7229, + "step": 1273 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019743025052637703, + "loss": 0.7968, + "step": 1274 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019742615173844405, + "loss": 0.7774, + "step": 1275 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019742204972691724, + "loss": 0.7085, + "step": 1276 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019741794449193223, + "loss": 0.7432, + "step": 1277 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019741383603362489, + "loss": 0.7992, + "step": 1278 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019740972435213115, + "loss": 0.7319, + "step": 1279 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019740560944758704, + "loss": 0.7213, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019740149132012875, + "loss": 0.682, + "step": 1281 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019739736996989254, + "loss": 0.7608, + "step": 1282 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019739324539701475, + "loss": 0.7845, + "step": 1283 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019738911760163182, + "loss": 0.7424, + "step": 1284 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973849865838804, + "loss": 0.7311, + "step": 1285 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973808523438971, + "loss": 0.8003, + "step": 1286 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973767148818188, + "loss": 0.738, + "step": 1287 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019737257419778233, + "loss": 0.8216, + "step": 1288 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019736843029192472, + "loss": 0.759, + "step": 1289 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019736428316438306, + "loss": 0.7788, + "step": 1290 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019736013281529462, + "loss": 0.722, + "step": 1291 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019735597924479668, + "loss": 0.7488, + "step": 1292 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019735182245302668, + "loss": 0.7334, + "step": 1293 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019734766244012216, + "loss": 0.7657, + "step": 1294 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019734349920622076, + "loss": 0.766, + "step": 1295 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019733933275146027, + "loss": 0.8192, + "step": 1296 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019733516307597848, + "loss": 0.8064, + "step": 1297 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019733099017991341, + "loss": 0.7609, + "step": 1298 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973268140634031, + "loss": 0.7311, + "step": 1299 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019732263472658574, + "loss": 0.7082, + "step": 1300 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973184521695996, + "loss": 0.7526, + "step": 1301 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973142663925831, + "loss": 0.7706, + "step": 1302 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973100773956747, + "loss": 0.7497, + "step": 1303 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019730588517901304, + "loss": 0.7764, + "step": 1304 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001973016897427368, + "loss": 0.805, + "step": 1305 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019729749108698483, + "loss": 0.8075, + "step": 1306 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019729328921189602, + "loss": 0.7968, + "step": 1307 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019728908411760943, + "loss": 0.7523, + "step": 1308 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019728487580426417, + "loss": 0.7685, + "step": 1309 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001972806642719995, + "loss": 0.7312, + "step": 1310 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019727644952095475, + "loss": 0.7998, + "step": 1311 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019727223155126936, + "loss": 0.8208, + "step": 1312 + }, + { + "epoch": 0.3, + "learning_rate": 0.000197268010363083, + "loss": 0.7416, + "step": 1313 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019726378595653516, + "loss": 0.7824, + "step": 1314 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019725955833176578, + "loss": 0.7247, + "step": 1315 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019725532748891467, + "loss": 0.753, + "step": 1316 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001972510934281218, + "loss": 0.7493, + "step": 1317 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019724685614952732, + "loss": 0.7539, + "step": 1318 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019724261565327137, + "loss": 0.7477, + "step": 1319 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001972383719394943, + "loss": 0.7503, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019723412500833654, + "loss": 0.6795, + "step": 1321 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019722987485993855, + "loss": 0.7709, + "step": 1322 + }, + { + "epoch": 0.31, + "learning_rate": 0.000197225621494441, + "loss": 0.8006, + "step": 1323 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001972213649119846, + "loss": 0.8048, + "step": 1324 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019721710511271022, + "loss": 0.7671, + "step": 1325 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019721284209675877, + "loss": 0.7418, + "step": 1326 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019720857586427135, + "loss": 0.8054, + "step": 1327 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001972043064153891, + "loss": 0.776, + "step": 1328 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001972000337502532, + "loss": 0.8358, + "step": 1329 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019719575786900518, + "loss": 0.7456, + "step": 1330 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019719147877178642, + "loss": 0.7653, + "step": 1331 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019718719645873853, + "loss": 0.7642, + "step": 1332 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019718291093000318, + "loss": 0.7729, + "step": 1333 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001971786221857222, + "loss": 0.7621, + "step": 1334 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019717433022603745, + "loss": 0.7367, + "step": 1335 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019717003505109095, + "loss": 0.7545, + "step": 1336 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001971657366610249, + "loss": 0.7744, + "step": 1337 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001971614350559814, + "loss": 0.8163, + "step": 1338 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019715713023610282, + "loss": 0.7044, + "step": 1339 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019715282220153166, + "loss": 0.7756, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001971485109524104, + "loss": 0.7794, + "step": 1341 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001971441964888817, + "loss": 0.7407, + "step": 1342 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019713987881108834, + "loss": 0.8074, + "step": 1343 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019713555791917313, + "loss": 0.6788, + "step": 1344 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001971312338132791, + "loss": 0.8079, + "step": 1345 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019712690649354925, + "loss": 0.6929, + "step": 1346 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019712257596012683, + "loss": 0.7825, + "step": 1347 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019711824221315508, + "loss": 0.8501, + "step": 1348 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019711390525277742, + "loss": 0.7092, + "step": 1349 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019710956507913737, + "loss": 0.7794, + "step": 1350 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019710522169237845, + "loss": 0.8125, + "step": 1351 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019710087509264447, + "loss": 0.7366, + "step": 1352 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001970965252800792, + "loss": 0.7414, + "step": 1353 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001970921722548266, + "loss": 0.6955, + "step": 1354 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019708781601703065, + "loss": 0.8299, + "step": 1355 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001970834565668355, + "loss": 0.7265, + "step": 1356 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001970790939043854, + "loss": 0.7327, + "step": 1357 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019707472802982473, + "loss": 0.7936, + "step": 1358 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001970703589432979, + "loss": 0.6816, + "step": 1359 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019706598664494948, + "loss": 0.7409, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019706161113492416, + "loss": 0.799, + "step": 1361 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019705723241336673, + "loss": 0.7756, + "step": 1362 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019705285048042203, + "loss": 0.74, + "step": 1363 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019704846533623508, + "loss": 0.8041, + "step": 1364 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019704407698095094, + "loss": 0.7341, + "step": 1365 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019703968541471485, + "loss": 0.7252, + "step": 1366 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019703529063767208, + "loss": 0.7471, + "step": 1367 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019703089264996805, + "loss": 0.7147, + "step": 1368 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001970264914517483, + "loss": 0.7906, + "step": 1369 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019702208704315845, + "loss": 0.7212, + "step": 1370 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019701767942434422, + "loss": 0.7825, + "step": 1371 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019701326859545141, + "loss": 0.7739, + "step": 1372 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019700885455662605, + "loss": 0.8377, + "step": 1373 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019700443730801413, + "loss": 0.77, + "step": 1374 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019700001684976178, + "loss": 0.7839, + "step": 1375 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019699559318201538, + "loss": 0.7226, + "step": 1376 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019699116630492115, + "loss": 0.7835, + "step": 1377 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019698673621862566, + "loss": 0.7649, + "step": 1378 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001969823029232755, + "loss": 0.7798, + "step": 1379 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019697786641901724, + "loss": 0.8101, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019697342670599782, + "loss": 0.7697, + "step": 1381 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019696898378436406, + "loss": 0.7613, + "step": 1382 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019696453765426298, + "loss": 0.7521, + "step": 1383 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019696008831584168, + "loss": 0.717, + "step": 1384 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001969556357692474, + "loss": 0.7397, + "step": 1385 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019695118001462745, + "loss": 0.761, + "step": 1386 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019694672105212925, + "loss": 0.8108, + "step": 1387 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019694225888190037, + "loss": 0.7332, + "step": 1388 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019693779350408842, + "loss": 0.7841, + "step": 1389 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019693332491884117, + "loss": 0.753, + "step": 1390 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019692885312630644, + "loss": 0.7629, + "step": 1391 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019692437812663226, + "loss": 0.7558, + "step": 1392 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019691989991996663, + "loss": 0.7647, + "step": 1393 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019691541850645771, + "loss": 0.7704, + "step": 1394 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019691093388625385, + "loss": 0.7681, + "step": 1395 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001969064460595034, + "loss": 0.7921, + "step": 1396 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019690195502635485, + "loss": 0.7044, + "step": 1397 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001968974607869568, + "loss": 0.766, + "step": 1398 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019689296334145795, + "loss": 0.7272, + "step": 1399 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019688846269000712, + "loss": 0.7544, + "step": 1400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019688395883275318, + "loss": 0.7985, + "step": 1401 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001968794517698452, + "loss": 0.8063, + "step": 1402 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019687494150143233, + "loss": 0.7324, + "step": 1403 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019687042802766375, + "loss": 0.7604, + "step": 1404 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019686591134868882, + "loss": 0.7516, + "step": 1405 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019686139146465697, + "loss": 0.8407, + "step": 1406 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019685686837571777, + "loss": 0.7913, + "step": 1407 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019685234208202088, + "loss": 0.7138, + "step": 1408 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019684781258371607, + "loss": 0.7809, + "step": 1409 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001968432798809532, + "loss": 0.7334, + "step": 1410 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019683874397388222, + "loss": 0.713, + "step": 1411 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019683420486265327, + "loss": 0.7386, + "step": 1412 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019682966254741652, + "loss": 0.734, + "step": 1413 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019682511702832217, + "loss": 0.7865, + "step": 1414 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019682056830552078, + "loss": 0.7651, + "step": 1415 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019681601637916275, + "loss": 0.7903, + "step": 1416 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001968114612493987, + "loss": 0.7277, + "step": 1417 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019680690291637939, + "loss": 0.8142, + "step": 1418 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019680234138025562, + "loss": 0.7798, + "step": 1419 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967977766411783, + "loss": 0.7833, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019679320869929852, + "loss": 0.8103, + "step": 1421 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019678863755476735, + "loss": 0.7925, + "step": 1422 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019678406320773612, + "loss": 0.7628, + "step": 1423 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019677948565835612, + "loss": 0.7531, + "step": 1424 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967749049067788, + "loss": 0.7372, + "step": 1425 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967703209531558, + "loss": 0.7487, + "step": 1426 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019676573379763873, + "loss": 0.789, + "step": 1427 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967611434403794, + "loss": 0.7966, + "step": 1428 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019675654988152969, + "loss": 0.7867, + "step": 1429 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019675195312124152, + "loss": 0.7119, + "step": 1430 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967473531596671, + "loss": 0.7759, + "step": 1431 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019674274999695858, + "loss": 0.7277, + "step": 1432 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019673814363326822, + "loss": 0.7888, + "step": 1433 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019673353406874852, + "loss": 0.8067, + "step": 1434 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019672892130355194, + "loss": 0.7915, + "step": 1435 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019672430533783112, + "loss": 0.7936, + "step": 1436 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967196861717388, + "loss": 0.7768, + "step": 1437 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019671506380542782, + "loss": 0.8452, + "step": 1438 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001967104382390511, + "loss": 0.6845, + "step": 1439 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019670580947276167, + "loss": 0.7556, + "step": 1440 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019670117750671278, + "loss": 0.7853, + "step": 1441 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019669654234105756, + "loss": 0.7619, + "step": 1442 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001966919039759495, + "loss": 0.8179, + "step": 1443 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019668726241154198, + "loss": 0.7443, + "step": 1444 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019668261764798863, + "loss": 0.7861, + "step": 1445 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001966779696854431, + "loss": 0.7287, + "step": 1446 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019667331852405923, + "loss": 0.7648, + "step": 1447 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019666866416399086, + "loss": 0.7757, + "step": 1448 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019666400660539203, + "loss": 0.8381, + "step": 1449 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019665934584841682, + "loss": 0.7835, + "step": 1450 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019665468189321943, + "loss": 0.7514, + "step": 1451 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019665001473995425, + "loss": 0.7568, + "step": 1452 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019664534438877562, + "loss": 0.7476, + "step": 1453 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001966406708398381, + "loss": 0.7273, + "step": 1454 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019663599409329639, + "loss": 0.7446, + "step": 1455 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019663131414930513, + "loss": 0.7291, + "step": 1456 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019662663100801923, + "loss": 0.6979, + "step": 1457 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019662194466959362, + "loss": 0.7757, + "step": 1458 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001966172551341834, + "loss": 0.759, + "step": 1459 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019661256240194365, + "loss": 0.7731, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019660786647302973, + "loss": 0.8103, + "step": 1461 + }, + { + "epoch": 0.34, + "learning_rate": 0.000196603167347597, + "loss": 0.8015, + "step": 1462 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019659846502580088, + "loss": 0.7938, + "step": 1463 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019659375950779705, + "loss": 0.7421, + "step": 1464 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965890507937411, + "loss": 0.7075, + "step": 1465 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965843388837889, + "loss": 0.7841, + "step": 1466 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019657962377809634, + "loss": 0.7959, + "step": 1467 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019657490547681948, + "loss": 0.6756, + "step": 1468 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019657018398011434, + "loss": 0.7712, + "step": 1469 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019656545928813723, + "loss": 0.7499, + "step": 1470 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019656073140104444, + "loss": 0.7739, + "step": 1471 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019655600031899238, + "loss": 0.7634, + "step": 1472 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019655126604213762, + "loss": 0.7995, + "step": 1473 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965465285706368, + "loss": 0.796, + "step": 1474 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965417879046467, + "loss": 0.7791, + "step": 1475 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019653704404432414, + "loss": 0.804, + "step": 1476 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965322969898261, + "loss": 0.7635, + "step": 1477 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019652754674130959, + "loss": 0.8199, + "step": 1478 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965227932989319, + "loss": 0.7179, + "step": 1479 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001965180366628502, + "loss": 0.8095, + "step": 1480 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019651327683322195, + "loss": 0.743, + "step": 1481 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019650851381020458, + "loss": 0.6996, + "step": 1482 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019650374759395576, + "loss": 0.8009, + "step": 1483 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019649897818463312, + "loss": 0.8278, + "step": 1484 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001964942055823945, + "loss": 0.7486, + "step": 1485 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019648942978739783, + "loss": 0.8181, + "step": 1486 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001964846507998011, + "loss": 0.7577, + "step": 1487 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019647986861976246, + "loss": 0.7711, + "step": 1488 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019647508324744013, + "loss": 0.8412, + "step": 1489 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019647029468299244, + "loss": 0.7524, + "step": 1490 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019646550292657785, + "loss": 0.7719, + "step": 1491 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019646070797835484, + "loss": 0.7645, + "step": 1492 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019645590983848217, + "loss": 0.7755, + "step": 1493 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001964511085071185, + "loss": 0.8164, + "step": 1494 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019644630398442278, + "loss": 0.7778, + "step": 1495 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019644149627055395, + "loss": 0.7815, + "step": 1496 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019643668536567106, + "loss": 0.7865, + "step": 1497 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001964318712699333, + "loss": 0.7047, + "step": 1498 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019642705398349998, + "loss": 0.7686, + "step": 1499 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019642223350653043, + "loss": 0.7665, + "step": 1500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019641740983918426, + "loss": 0.7934, + "step": 1501 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019641258298162096, + "loss": 0.7591, + "step": 1502 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001964077529340003, + "loss": 0.8176, + "step": 1503 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001964029196964821, + "loss": 0.7002, + "step": 1504 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019639808326922623, + "loss": 0.7692, + "step": 1505 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019639324365239275, + "loss": 0.7874, + "step": 1506 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019638840084614182, + "loss": 0.7607, + "step": 1507 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963835548506336, + "loss": 0.7023, + "step": 1508 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963787056660285, + "loss": 0.7638, + "step": 1509 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019637385329248695, + "loss": 0.8114, + "step": 1510 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019636899773016947, + "loss": 0.7821, + "step": 1511 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963641389792368, + "loss": 0.8403, + "step": 1512 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963592770398496, + "loss": 0.776, + "step": 1513 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963544119121688, + "loss": 0.7163, + "step": 1514 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963495435963554, + "loss": 0.7579, + "step": 1515 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963446720925704, + "loss": 0.7354, + "step": 1516 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019633979740097502, + "loss": 0.7074, + "step": 1517 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963349195217306, + "loss": 0.7085, + "step": 1518 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019633003845499848, + "loss": 0.7408, + "step": 1519 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001963251542009402, + "loss": 0.7558, + "step": 1520 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019632026675971736, + "loss": 0.6885, + "step": 1521 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019631537613149161, + "loss": 0.7965, + "step": 1522 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019631048231642489, + "loss": 0.7393, + "step": 1523 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019630558531467905, + "loss": 0.7729, + "step": 1524 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019630068512641608, + "loss": 0.7613, + "step": 1525 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001962957817517982, + "loss": 0.7609, + "step": 1526 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001962908751909876, + "loss": 0.7777, + "step": 1527 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019628596544414665, + "loss": 0.7714, + "step": 1528 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001962810525114378, + "loss": 0.7651, + "step": 1529 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001962761363930236, + "loss": 0.777, + "step": 1530 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019627121708906667, + "loss": 0.7268, + "step": 1531 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019626629459972987, + "loss": 0.7542, + "step": 1532 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019626136892517598, + "loss": 0.8172, + "step": 1533 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019625644006556806, + "loss": 0.731, + "step": 1534 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019625150802106913, + "loss": 0.7691, + "step": 1535 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019624657279184243, + "loss": 0.8145, + "step": 1536 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001962416343780512, + "loss": 0.7575, + "step": 1537 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019623669277985887, + "loss": 0.7934, + "step": 1538 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019623174799742894, + "loss": 0.8042, + "step": 1539 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019622680003092503, + "loss": 0.7596, + "step": 1540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019622184888051086, + "loss": 0.7777, + "step": 1541 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019621689454635025, + "loss": 0.8296, + "step": 1542 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001962119370286071, + "loss": 0.7923, + "step": 1543 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001962069763274455, + "loss": 0.7504, + "step": 1544 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019620201244302952, + "loss": 0.6759, + "step": 1545 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019619704537552344, + "loss": 0.7424, + "step": 1546 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001961920751250916, + "loss": 0.7481, + "step": 1547 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019618710169189843, + "loss": 0.7799, + "step": 1548 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019618212507610853, + "loss": 0.7389, + "step": 1549 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019617714527788652, + "loss": 0.7473, + "step": 1550 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019617216229739722, + "loss": 0.7767, + "step": 1551 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019616717613480552, + "loss": 0.8095, + "step": 1552 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001961621867902763, + "loss": 0.8234, + "step": 1553 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019615719426397475, + "loss": 0.7389, + "step": 1554 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019615219855606596, + "loss": 0.7472, + "step": 1555 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019614719966671532, + "loss": 0.775, + "step": 1556 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001961421975960882, + "loss": 0.793, + "step": 1557 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019613719234435012, + "loss": 0.7804, + "step": 1558 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019613218391166664, + "loss": 0.8263, + "step": 1559 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019612717229820347, + "loss": 0.7598, + "step": 1560 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001961221575041265, + "loss": 0.7989, + "step": 1561 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019611713952960168, + "loss": 0.7903, + "step": 1562 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019611211837479493, + "loss": 0.7988, + "step": 1563 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019610709403987246, + "loss": 0.751, + "step": 1564 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001961020665250005, + "loss": 0.7984, + "step": 1565 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019609703583034542, + "loss": 0.902, + "step": 1566 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019609200195607363, + "loss": 0.7783, + "step": 1567 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001960869649023517, + "loss": 0.7352, + "step": 1568 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019608192466934635, + "loss": 0.7092, + "step": 1569 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019607688125722426, + "loss": 0.7046, + "step": 1570 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019607183466615236, + "loss": 0.7542, + "step": 1571 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019606678489629762, + "loss": 0.8297, + "step": 1572 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019606173194782712, + "loss": 0.7637, + "step": 1573 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019605667582090806, + "loss": 0.7838, + "step": 1574 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019605161651570772, + "loss": 0.8752, + "step": 1575 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019604655403239352, + "loss": 0.7874, + "step": 1576 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001960414883711329, + "loss": 0.7794, + "step": 1577 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019603641953209357, + "loss": 0.8109, + "step": 1578 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001960313475154432, + "loss": 0.7187, + "step": 1579 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001960262723213496, + "loss": 0.7824, + "step": 1580 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001960211939499807, + "loss": 0.7749, + "step": 1581 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019601611240150453, + "loss": 0.7483, + "step": 1582 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019601102767608923, + "loss": 0.8099, + "step": 1583 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019600593977390306, + "loss": 0.8243, + "step": 1584 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019600084869511434, + "loss": 0.8711, + "step": 1585 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019599575443989152, + "loss": 0.827, + "step": 1586 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019599065700840317, + "loss": 0.7581, + "step": 1587 + }, + { + "epoch": 0.37, + "learning_rate": 0.000195985556400818, + "loss": 0.7821, + "step": 1588 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019598045261730467, + "loss": 0.7281, + "step": 1589 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001959753456580321, + "loss": 0.7767, + "step": 1590 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019597023552316928, + "loss": 0.7615, + "step": 1591 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019596512221288533, + "loss": 0.8057, + "step": 1592 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019596000572734933, + "loss": 0.6927, + "step": 1593 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019595488606673068, + "loss": 0.7775, + "step": 1594 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001959497632311987, + "loss": 0.8287, + "step": 1595 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019594463722092293, + "loss": 0.7601, + "step": 1596 + }, + { + "epoch": 0.37, + "learning_rate": 0.000195939508036073, + "loss": 0.8217, + "step": 1597 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019593437567681855, + "loss": 0.7744, + "step": 1598 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019592924014332947, + "loss": 0.7368, + "step": 1599 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019592410143577565, + "loss": 0.7555, + "step": 1600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001959189595543271, + "loss": 0.7527, + "step": 1601 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019591381449915397, + "loss": 0.802, + "step": 1602 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019590866627042653, + "loss": 0.8041, + "step": 1603 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001959035148683151, + "loss": 0.746, + "step": 1604 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019589836029299006, + "loss": 0.8344, + "step": 1605 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019589320254462207, + "loss": 0.7961, + "step": 1606 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019588804162338174, + "loss": 0.7532, + "step": 1607 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019588287752943983, + "loss": 0.7953, + "step": 1608 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019587771026296716, + "loss": 0.8054, + "step": 1609 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019587253982413479, + "loss": 0.733, + "step": 1610 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019586736621311378, + "loss": 0.7861, + "step": 1611 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019586218943007524, + "loss": 0.7792, + "step": 1612 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019585700947519054, + "loss": 0.8229, + "step": 1613 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019585182634863102, + "loss": 0.7914, + "step": 1614 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019584664005056822, + "loss": 0.7783, + "step": 1615 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001958414505811737, + "loss": 0.7863, + "step": 1616 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019583625794061918, + "loss": 0.7399, + "step": 1617 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001958310621290765, + "loss": 0.7926, + "step": 1618 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019582586314671754, + "loss": 0.8399, + "step": 1619 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019582066099371433, + "loss": 0.7179, + "step": 1620 + }, + { + "epoch": 0.38, + "learning_rate": 0.000195815455670239, + "loss": 0.7279, + "step": 1621 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001958102471764638, + "loss": 0.7404, + "step": 1622 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019580503551256103, + "loss": 0.7521, + "step": 1623 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019579982067870316, + "loss": 0.7334, + "step": 1624 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019579460267506274, + "loss": 0.7648, + "step": 1625 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001957893815018124, + "loss": 0.7046, + "step": 1626 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019578415715912491, + "loss": 0.785, + "step": 1627 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019577892964717312, + "loss": 0.7026, + "step": 1628 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019577369896612998, + "loss": 0.729, + "step": 1629 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001957684651161686, + "loss": 0.7562, + "step": 1630 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019576322809746214, + "loss": 0.8113, + "step": 1631 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001957579879101839, + "loss": 0.8194, + "step": 1632 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019575274455450718, + "loss": 0.7517, + "step": 1633 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019574749803060557, + "loss": 0.7662, + "step": 1634 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019574224833865262, + "loss": 0.7564, + "step": 1635 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019573699547882205, + "loss": 0.7371, + "step": 1636 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019573173945128765, + "loss": 0.7225, + "step": 1637 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001957264802562233, + "loss": 0.7889, + "step": 1638 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019572121789380304, + "loss": 0.7763, + "step": 1639 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019571595236420102, + "loss": 0.7841, + "step": 1640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019571068366759143, + "loss": 0.7431, + "step": 1641 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001957054118041486, + "loss": 0.7655, + "step": 1642 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019570013677404696, + "loss": 0.8047, + "step": 1643 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019569485857746105, + "loss": 0.7545, + "step": 1644 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019568957721456554, + "loss": 0.7608, + "step": 1645 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019568429268553513, + "loss": 0.7028, + "step": 1646 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019567900499054472, + "loss": 0.7774, + "step": 1647 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019567371412976923, + "loss": 0.7023, + "step": 1648 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019566842010338372, + "loss": 0.7728, + "step": 1649 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019566312291156341, + "loss": 0.7937, + "step": 1650 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019565782255448352, + "loss": 0.8152, + "step": 1651 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019565251903231942, + "loss": 0.7915, + "step": 1652 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019564721234524663, + "loss": 0.8577, + "step": 1653 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001956419024934407, + "loss": 0.7754, + "step": 1654 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019563658947707735, + "loss": 0.709, + "step": 1655 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019563127329633235, + "loss": 0.751, + "step": 1656 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019562595395138162, + "loss": 0.7174, + "step": 1657 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019562063144240113, + "loss": 0.8105, + "step": 1658 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019561530576956703, + "loss": 0.672, + "step": 1659 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019560997693305552, + "loss": 0.8472, + "step": 1660 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019560464493304292, + "loss": 0.7774, + "step": 1661 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019559930976970562, + "loss": 0.7144, + "step": 1662 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019559397144322017, + "loss": 0.756, + "step": 1663 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019558862995376326, + "loss": 0.7105, + "step": 1664 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019558328530151152, + "loss": 0.7397, + "step": 1665 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019557793748664187, + "loss": 0.8143, + "step": 1666 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019557258650933123, + "loss": 0.7539, + "step": 1667 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019556723236975664, + "loss": 0.7197, + "step": 1668 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019556187506809526, + "loss": 0.7958, + "step": 1669 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019555651460452438, + "loss": 0.9077, + "step": 1670 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019555115097922134, + "loss": 0.7741, + "step": 1671 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001955457841923636, + "loss": 0.8122, + "step": 1672 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019554041424412873, + "loss": 0.7628, + "step": 1673 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019553504113469446, + "loss": 0.7435, + "step": 1674 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019552966486423854, + "loss": 0.8133, + "step": 1675 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019552428543293882, + "loss": 0.785, + "step": 1676 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019551890284097334, + "loss": 0.7094, + "step": 1677 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001955135170885202, + "loss": 0.7229, + "step": 1678 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019550812817575755, + "loss": 0.8139, + "step": 1679 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019550273610286376, + "loss": 0.7605, + "step": 1680 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001954973408700172, + "loss": 0.7961, + "step": 1681 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001954919424773964, + "loss": 0.7558, + "step": 1682 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019548654092518, + "loss": 0.7816, + "step": 1683 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019548113621354666, + "loss": 0.7855, + "step": 1684 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019547572834267526, + "loss": 0.803, + "step": 1685 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019547031731274473, + "loss": 0.7996, + "step": 1686 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001954649031239341, + "loss": 0.8445, + "step": 1687 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019545948577642252, + "loss": 0.7658, + "step": 1688 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019545406527038923, + "loss": 0.7842, + "step": 1689 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019544864160601357, + "loss": 0.7812, + "step": 1690 + }, + { + "epoch": 0.39, + "learning_rate": 0.000195443214783475, + "loss": 0.7294, + "step": 1691 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001954377848029531, + "loss": 0.7576, + "step": 1692 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019543235166462753, + "loss": 0.7897, + "step": 1693 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019542691536867803, + "loss": 0.7448, + "step": 1694 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019542147591528451, + "loss": 0.8915, + "step": 1695 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019541603330462692, + "loss": 0.7487, + "step": 1696 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019541058753688538, + "loss": 0.7334, + "step": 1697 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019540513861224004, + "loss": 0.7862, + "step": 1698 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001953996865308712, + "loss": 0.7786, + "step": 1699 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019539423129295925, + "loss": 0.8008, + "step": 1700 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019538877289868473, + "loss": 0.6999, + "step": 1701 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019538331134822821, + "loss": 0.6779, + "step": 1702 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001953778466417704, + "loss": 0.7289, + "step": 1703 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001953723787794921, + "loss": 0.7585, + "step": 1704 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019536690776157428, + "loss": 0.7273, + "step": 1705 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001953614335881979, + "loss": 0.7949, + "step": 1706 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019535595625954413, + "loss": 0.8446, + "step": 1707 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001953504757757942, + "loss": 0.7728, + "step": 1708 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019534499213712942, + "loss": 0.7796, + "step": 1709 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019533950534373124, + "loss": 0.7596, + "step": 1710 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019533401539578122, + "loss": 0.7714, + "step": 1711 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019532852229346101, + "loss": 0.7595, + "step": 1712 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019532302603695234, + "loss": 0.7641, + "step": 1713 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001953175266264371, + "loss": 0.8068, + "step": 1714 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019531202406209718, + "loss": 0.8001, + "step": 1715 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019530651834411474, + "loss": 0.8243, + "step": 1716 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001953010094726719, + "loss": 0.8574, + "step": 1717 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019529549744795097, + "loss": 0.7073, + "step": 1718 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019528998227013426, + "loss": 0.8127, + "step": 1719 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001952844639394043, + "loss": 0.819, + "step": 1720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019527894245594373, + "loss": 0.7172, + "step": 1721 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019527341781993513, + "loss": 0.755, + "step": 1722 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001952678900315614, + "loss": 0.886, + "step": 1723 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019526235909100533, + "loss": 0.7199, + "step": 1724 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019525682499845005, + "loss": 0.7603, + "step": 1725 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019525128775407859, + "loss": 0.7335, + "step": 1726 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019524574735807417, + "loss": 0.8546, + "step": 1727 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019524020381062016, + "loss": 0.7944, + "step": 1728 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001952346571118999, + "loss": 0.7953, + "step": 1729 + }, + { + "epoch": 0.4, + "learning_rate": 0.000195229107262097, + "loss": 0.7037, + "step": 1730 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019522355426139504, + "loss": 0.7717, + "step": 1731 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019521799810997775, + "loss": 0.741, + "step": 1732 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019521243880802903, + "loss": 0.8449, + "step": 1733 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019520687635573273, + "loss": 0.7784, + "step": 1734 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019520131075327298, + "loss": 0.7183, + "step": 1735 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001951957420008339, + "loss": 0.7054, + "step": 1736 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019519017009859976, + "loss": 0.7845, + "step": 1737 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001951845950467549, + "loss": 0.7316, + "step": 1738 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001951790168454838, + "loss": 0.7709, + "step": 1739 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019517343549497101, + "loss": 0.6963, + "step": 1740 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019516785099540123, + "loss": 0.7248, + "step": 1741 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001951622633469592, + "loss": 0.7471, + "step": 1742 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019515667254982983, + "loss": 0.7915, + "step": 1743 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019515107860419812, + "loss": 0.8656, + "step": 1744 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019514548151024913, + "loss": 0.7498, + "step": 1745 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019513988126816805, + "loss": 0.8191, + "step": 1746 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001951342778781402, + "loss": 0.7255, + "step": 1747 + }, + { + "epoch": 0.4, + "learning_rate": 0.000195128671340351, + "loss": 0.797, + "step": 1748 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001951230616549859, + "loss": 0.7883, + "step": 1749 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019511744882223057, + "loss": 0.7808, + "step": 1750 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019511183284227072, + "loss": 0.8009, + "step": 1751 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019510621371529208, + "loss": 0.7986, + "step": 1752 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001951005914414807, + "loss": 0.7581, + "step": 1753 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019509496602102252, + "loss": 0.7189, + "step": 1754 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950893374541037, + "loss": 0.7676, + "step": 1755 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950837057409105, + "loss": 0.7957, + "step": 1756 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950780708816292, + "loss": 0.8188, + "step": 1757 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950724328764463, + "loss": 0.773, + "step": 1758 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019506679172554833, + "loss": 0.7583, + "step": 1759 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019506114742912192, + "loss": 0.7478, + "step": 1760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019505549998735387, + "loss": 0.758, + "step": 1761 + }, + { + "epoch": 0.41, + "learning_rate": 0.000195049849400431, + "loss": 0.783, + "step": 1762 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950441956685403, + "loss": 0.7958, + "step": 1763 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950385387918688, + "loss": 0.7608, + "step": 1764 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019503287877060371, + "loss": 0.7565, + "step": 1765 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019502721560493233, + "loss": 0.7286, + "step": 1766 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019502154929504196, + "loss": 0.7857, + "step": 1767 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019501587984112014, + "loss": 0.8058, + "step": 1768 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950102072433545, + "loss": 0.7651, + "step": 1769 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001950045315019326, + "loss": 0.7046, + "step": 1770 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019499885261704236, + "loss": 0.8094, + "step": 1771 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019499317058887164, + "loss": 0.789, + "step": 1772 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019498748541760846, + "loss": 0.7328, + "step": 1773 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019498179710344087, + "loss": 0.7687, + "step": 1774 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019497610564655715, + "loss": 0.76, + "step": 1775 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019497041104714558, + "loss": 0.7873, + "step": 1776 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001949647133053946, + "loss": 0.7961, + "step": 1777 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001949590124214927, + "loss": 0.736, + "step": 1778 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019495330839562852, + "loss": 0.732, + "step": 1779 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019494760122799084, + "loss": 0.6641, + "step": 1780 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019494189091876846, + "loss": 0.7347, + "step": 1781 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019493617746815033, + "loss": 0.7522, + "step": 1782 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019493046087632547, + "loss": 0.7122, + "step": 1783 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019492474114348306, + "loss": 0.7657, + "step": 1784 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001949190182698123, + "loss": 0.7532, + "step": 1785 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019491329225550263, + "loss": 0.7689, + "step": 1786 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001949075631007434, + "loss": 0.7496, + "step": 1787 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001949018308057243, + "loss": 0.7484, + "step": 1788 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001948960953706349, + "loss": 0.8441, + "step": 1789 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019489035679566503, + "loss": 0.7565, + "step": 1790 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001948846150810045, + "loss": 0.7346, + "step": 1791 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019487887022684336, + "loss": 0.7743, + "step": 1792 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019487312223337165, + "loss": 0.809, + "step": 1793 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019486737110077957, + "loss": 0.6915, + "step": 1794 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001948616168292574, + "loss": 0.7398, + "step": 1795 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019485585941899557, + "loss": 0.7116, + "step": 1796 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019485009887018455, + "loss": 0.7715, + "step": 1797 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019484433518301495, + "loss": 0.682, + "step": 1798 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019483856835767746, + "loss": 0.7979, + "step": 1799 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019483279839436292, + "loss": 0.7332, + "step": 1800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001948270252932622, + "loss": 0.7869, + "step": 1801 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019482124905456636, + "loss": 0.7338, + "step": 1802 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019481546967846654, + "loss": 0.7493, + "step": 1803 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019480968716515388, + "loss": 0.7844, + "step": 1804 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019480390151481978, + "loss": 0.8204, + "step": 1805 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019479811272765567, + "loss": 0.7114, + "step": 1806 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019479232080385305, + "loss": 0.7618, + "step": 1807 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001947865257436036, + "loss": 0.7874, + "step": 1808 + }, + { + "epoch": 0.42, + "learning_rate": 0.000194780727547099, + "loss": 0.7767, + "step": 1809 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019477492621453122, + "loss": 0.7185, + "step": 1810 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001947691217460921, + "loss": 0.8103, + "step": 1811 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019476331414197372, + "loss": 0.7964, + "step": 1812 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019475750340236826, + "loss": 0.806, + "step": 1813 + }, + { + "epoch": 0.42, + "learning_rate": 0.000194751689527468, + "loss": 0.7937, + "step": 1814 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019474587251746525, + "loss": 0.776, + "step": 1815 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019474005237255253, + "loss": 0.7791, + "step": 1816 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001947342290929224, + "loss": 0.7471, + "step": 1817 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019472840267876752, + "loss": 0.7158, + "step": 1818 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001947225731302807, + "loss": 0.7588, + "step": 1819 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019471674044765482, + "loss": 0.7377, + "step": 1820 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019471090463108284, + "loss": 0.7786, + "step": 1821 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001947050656807579, + "loss": 0.7258, + "step": 1822 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019469922359687317, + "loss": 0.8091, + "step": 1823 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019469337837962192, + "loss": 0.7539, + "step": 1824 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019468753002919762, + "loss": 0.7894, + "step": 1825 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001946816785457937, + "loss": 0.7702, + "step": 1826 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019467582392960385, + "loss": 0.7889, + "step": 1827 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019466996618082175, + "loss": 0.8297, + "step": 1828 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019466410529964123, + "loss": 0.7386, + "step": 1829 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019465824128625617, + "loss": 0.7576, + "step": 1830 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019465237414086067, + "loss": 0.7027, + "step": 1831 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019464650386364878, + "loss": 0.722, + "step": 1832 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019464063045481478, + "loss": 0.7718, + "step": 1833 + }, + { + "epoch": 0.42, + "learning_rate": 0.000194634753914553, + "loss": 0.7246, + "step": 1834 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019462887424305785, + "loss": 0.7818, + "step": 1835 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019462299144052392, + "loss": 0.7216, + "step": 1836 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019461710550714584, + "loss": 0.6824, + "step": 1837 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019461121644311836, + "loss": 0.7187, + "step": 1838 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001946053242486363, + "loss": 0.7697, + "step": 1839 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019459942892389472, + "loss": 0.7455, + "step": 1840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019459353046908856, + "loss": 0.7832, + "step": 1841 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019458762888441308, + "loss": 0.8186, + "step": 1842 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019458172417006347, + "loss": 0.7325, + "step": 1843 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019457581632623518, + "loss": 0.7603, + "step": 1844 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019456990535312362, + "loss": 0.7564, + "step": 1845 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019456399125092441, + "loss": 0.8493, + "step": 1846 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001945580740198332, + "loss": 0.7713, + "step": 1847 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019455215366004583, + "loss": 0.7712, + "step": 1848 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019454623017175812, + "loss": 0.7411, + "step": 1849 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019454030355516616, + "loss": 0.8119, + "step": 1850 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019453437381046593, + "loss": 0.7821, + "step": 1851 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019452844093785369, + "loss": 0.7662, + "step": 1852 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001945225049375258, + "loss": 0.7594, + "step": 1853 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019451656580967856, + "loss": 0.8347, + "step": 1854 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019451062355450854, + "loss": 0.7688, + "step": 1855 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019450467817221236, + "loss": 0.7225, + "step": 1856 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019449872966298672, + "loss": 0.7406, + "step": 1857 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019449277802702848, + "loss": 0.7524, + "step": 1858 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001944868232645345, + "loss": 0.7591, + "step": 1859 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019448086537570184, + "loss": 0.7908, + "step": 1860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019447490436072764, + "loss": 0.7549, + "step": 1861 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019446894021980912, + "loss": 0.7645, + "step": 1862 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019446297295314365, + "loss": 0.7007, + "step": 1863 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001944570025609286, + "loss": 0.801, + "step": 1864 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001944510290433616, + "loss": 0.7754, + "step": 1865 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019444505240064032, + "loss": 0.7865, + "step": 1866 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019443907263296236, + "loss": 0.7202, + "step": 1867 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001944330897405257, + "loss": 0.8356, + "step": 1868 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019442710372352833, + "loss": 0.7117, + "step": 1869 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001944211145821682, + "loss": 0.6704, + "step": 1870 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019441512231664355, + "loss": 0.7469, + "step": 1871 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019440912692715264, + "loss": 0.74, + "step": 1872 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001944031284138938, + "loss": 0.7533, + "step": 1873 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001943971267770656, + "loss": 0.7733, + "step": 1874 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001943911220168665, + "loss": 0.7892, + "step": 1875 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019438511413349526, + "loss": 0.776, + "step": 1876 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019437910312715065, + "loss": 0.769, + "step": 1877 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019437308899803154, + "loss": 0.7193, + "step": 1878 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019436707174633696, + "loss": 0.8029, + "step": 1879 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019436105137226602, + "loss": 0.7893, + "step": 1880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019435502787601782, + "loss": 0.7411, + "step": 1881 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019434900125779177, + "loss": 0.7993, + "step": 1882 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019434297151778724, + "loss": 0.7161, + "step": 1883 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019433693865620374, + "loss": 0.7569, + "step": 1884 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019433090267324086, + "loss": 0.7453, + "step": 1885 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019432486356909832, + "loss": 0.781, + "step": 1886 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019431882134397598, + "loss": 0.7439, + "step": 1887 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019431277599807372, + "loss": 0.7805, + "step": 1888 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019430672753159155, + "loss": 0.8329, + "step": 1889 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001943006759447297, + "loss": 0.7688, + "step": 1890 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001942946212376883, + "loss": 0.7473, + "step": 1891 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001942885634106677, + "loss": 0.764, + "step": 1892 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019428250246386836, + "loss": 0.7311, + "step": 1893 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019427643839749084, + "loss": 0.7322, + "step": 1894 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019427037121173574, + "loss": 0.688, + "step": 1895 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019426430090680388, + "loss": 0.7618, + "step": 1896 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019425822748289601, + "loss": 0.8215, + "step": 1897 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019425215094021316, + "loss": 0.7758, + "step": 1898 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019424607127895637, + "loss": 0.7278, + "step": 1899 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001942399884993268, + "loss": 0.7715, + "step": 1900 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001942339026015257, + "loss": 0.774, + "step": 1901 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019422781358575443, + "loss": 0.7269, + "step": 1902 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019422172145221452, + "loss": 0.7616, + "step": 1903 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019421562620110748, + "loss": 0.752, + "step": 1904 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019420952783263503, + "loss": 0.8105, + "step": 1905 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001942034263469989, + "loss": 0.7085, + "step": 1906 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019419732174440104, + "loss": 0.8261, + "step": 1907 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001941912140250434, + "loss": 0.7672, + "step": 1908 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019418510318912807, + "loss": 0.7537, + "step": 1909 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001941789892368572, + "loss": 0.8291, + "step": 1910 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019417287216843317, + "loss": 0.8126, + "step": 1911 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019416675198405834, + "loss": 0.754, + "step": 1912 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019416062868393518, + "loss": 0.7134, + "step": 1913 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019415450226826635, + "loss": 0.7043, + "step": 1914 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019414837273725453, + "loss": 0.7559, + "step": 1915 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001941422400911025, + "loss": 0.7155, + "step": 1916 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019413610433001327, + "loss": 0.7174, + "step": 1917 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019412996545418976, + "loss": 0.6592, + "step": 1918 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019412382346383514, + "loss": 0.8389, + "step": 1919 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001941176783591526, + "loss": 0.768, + "step": 1920 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001941115301403455, + "loss": 0.7568, + "step": 1921 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019410537880761724, + "loss": 0.7781, + "step": 1922 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001940992243611714, + "loss": 0.7672, + "step": 1923 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019409306680121156, + "loss": 0.7251, + "step": 1924 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019408690612794148, + "loss": 0.8235, + "step": 1925 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019408074234156502, + "loss": 0.7173, + "step": 1926 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019407457544228608, + "loss": 0.7873, + "step": 1927 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019406840543030878, + "loss": 0.785, + "step": 1928 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001940622323058372, + "loss": 0.8044, + "step": 1929 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001940560560690756, + "loss": 0.7276, + "step": 1930 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019404987672022844, + "loss": 0.7877, + "step": 1931 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019404369425950004, + "loss": 0.7743, + "step": 1932 + }, + { + "epoch": 0.45, + "learning_rate": 0.000194037508687095, + "loss": 0.7035, + "step": 1933 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019403132000321804, + "loss": 0.6944, + "step": 1934 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001940251282080739, + "loss": 0.7359, + "step": 1935 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019401893330186741, + "loss": 0.8501, + "step": 1936 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019401273528480358, + "loss": 0.6684, + "step": 1937 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019400653415708752, + "loss": 0.7933, + "step": 1938 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019400032991892437, + "loss": 0.7854, + "step": 1939 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019399412257051942, + "loss": 0.7334, + "step": 1940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019398791211207804, + "loss": 0.7497, + "step": 1941 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019398169854380575, + "loss": 0.7515, + "step": 1942 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019397548186590814, + "loss": 0.837, + "step": 1943 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019396926207859084, + "loss": 0.7454, + "step": 1944 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019396303918205974, + "loss": 0.7085, + "step": 1945 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019395681317652068, + "loss": 0.7766, + "step": 1946 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019395058406217972, + "loss": 0.7443, + "step": 1947 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001939443518392429, + "loss": 0.8331, + "step": 1948 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019393811650791646, + "loss": 0.7972, + "step": 1949 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019393187806840672, + "loss": 0.7421, + "step": 1950 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019392563652092005, + "loss": 0.7552, + "step": 1951 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019391939186566304, + "loss": 0.7762, + "step": 1952 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001939131441028423, + "loss": 0.7062, + "step": 1953 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019390689323266447, + "loss": 0.7616, + "step": 1954 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019390063925533646, + "loss": 0.7368, + "step": 1955 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019389438217106518, + "loss": 0.8178, + "step": 1956 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019388812198005764, + "loss": 0.8426, + "step": 1957 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019388185868252097, + "loss": 0.6997, + "step": 1958 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019387559227866246, + "loss": 0.7197, + "step": 1959 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019386932276868938, + "loss": 0.7188, + "step": 1960 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019386305015280924, + "loss": 0.7916, + "step": 1961 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019385677443122955, + "loss": 0.7566, + "step": 1962 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019385049560415794, + "loss": 0.7596, + "step": 1963 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019384421367180216, + "loss": 0.7263, + "step": 1964 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019383792863437015, + "loss": 0.7871, + "step": 1965 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019383164049206975, + "loss": 0.7495, + "step": 1966 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001938253492451091, + "loss": 0.7421, + "step": 1967 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019381905489369633, + "loss": 0.6849, + "step": 1968 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001938127574380397, + "loss": 0.7664, + "step": 1969 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001938064568783476, + "loss": 0.7494, + "step": 1970 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019380015321482846, + "loss": 0.73, + "step": 1971 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019379384644769093, + "loss": 0.766, + "step": 1972 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001937875365771436, + "loss": 0.7808, + "step": 1973 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001937812236033953, + "loss": 0.7946, + "step": 1974 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019377490752665486, + "loss": 0.742, + "step": 1975 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019376858834713133, + "loss": 0.7873, + "step": 1976 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019376226606503374, + "loss": 0.8364, + "step": 1977 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019375594068057132, + "loss": 0.7833, + "step": 1978 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019374961219395336, + "loss": 0.7525, + "step": 1979 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001937432806053892, + "loss": 0.8181, + "step": 1980 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001937369459150884, + "loss": 0.7175, + "step": 1981 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019373060812326052, + "loss": 0.7171, + "step": 1982 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001937242672301153, + "loss": 0.8029, + "step": 1983 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001937179232358625, + "loss": 0.688, + "step": 1984 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019371157614071207, + "loss": 0.7855, + "step": 1985 + }, + { + "epoch": 0.46, + "learning_rate": 0.000193705225944874, + "loss": 0.7559, + "step": 1986 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019369887264855838, + "loss": 0.763, + "step": 1987 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019369251625197547, + "loss": 0.789, + "step": 1988 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019368615675533553, + "loss": 0.7814, + "step": 1989 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019367979415884902, + "loss": 0.7426, + "step": 1990 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019367342846272648, + "loss": 0.7541, + "step": 1991 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019366705966717852, + "loss": 0.7699, + "step": 1992 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019366068777241583, + "loss": 0.8155, + "step": 1993 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001936543127786493, + "loss": 0.7707, + "step": 1994 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001936479346860898, + "loss": 0.7539, + "step": 1995 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019364155349494843, + "loss": 0.7001, + "step": 1996 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019363516920543628, + "loss": 0.7233, + "step": 1997 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019362878181776463, + "loss": 0.7713, + "step": 1998 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001936223913321448, + "loss": 0.8308, + "step": 1999 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019361599774878822, + "loss": 0.7633, + "step": 2000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019360960106790643, + "loss": 0.7324, + "step": 2001 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019360320128971117, + "loss": 0.7036, + "step": 2002 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001935967984144141, + "loss": 0.7737, + "step": 2003 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001935903924422271, + "loss": 0.7084, + "step": 2004 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019358398337336212, + "loss": 0.7164, + "step": 2005 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019357757120803124, + "loss": 0.8324, + "step": 2006 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019357115594644664, + "loss": 0.7957, + "step": 2007 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019356473758882052, + "loss": 0.743, + "step": 2008 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019355831613536531, + "loss": 0.7896, + "step": 2009 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019355189158629346, + "loss": 0.7239, + "step": 2010 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019354546394181754, + "loss": 0.7183, + "step": 2011 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019353903320215024, + "loss": 0.7735, + "step": 2012 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001935325993675043, + "loss": 0.7668, + "step": 2013 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019352616243809262, + "loss": 0.6915, + "step": 2014 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001935197224141282, + "loss": 0.7451, + "step": 2015 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019351327929582413, + "loss": 0.706, + "step": 2016 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019350683308339356, + "loss": 0.6836, + "step": 2017 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019350038377704979, + "loss": 0.7895, + "step": 2018 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019349393137700624, + "loss": 0.8115, + "step": 2019 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019348747588347637, + "loss": 0.7315, + "step": 2020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019348101729667382, + "loss": 0.7418, + "step": 2021 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019347455561681223, + "loss": 0.6854, + "step": 2022 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019346809084410543, + "loss": 0.7243, + "step": 2023 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001934616229787673, + "loss": 0.7819, + "step": 2024 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001934551520210119, + "loss": 0.7386, + "step": 2025 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019344867797105333, + "loss": 0.8092, + "step": 2026 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019344220082910573, + "loss": 0.7271, + "step": 2027 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001934357205953835, + "loss": 0.7233, + "step": 2028 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019342923727010098, + "loss": 0.7853, + "step": 2029 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019342275085347277, + "loss": 0.692, + "step": 2030 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019341626134571343, + "loss": 0.7935, + "step": 2031 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019340976874703767, + "loss": 0.7261, + "step": 2032 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019340327305766036, + "loss": 0.7036, + "step": 2033 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001933967742777964, + "loss": 0.8032, + "step": 2034 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019339027240766084, + "loss": 0.7902, + "step": 2035 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001933837674474688, + "loss": 0.7534, + "step": 2036 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019337725939743545, + "loss": 0.688, + "step": 2037 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019337074825777624, + "loss": 0.7907, + "step": 2038 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019336423402870653, + "loss": 0.7551, + "step": 2039 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001933577167104419, + "loss": 0.7951, + "step": 2040 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019335119630319798, + "loss": 0.8153, + "step": 2041 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019334467280719052, + "loss": 0.7308, + "step": 2042 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001933381462226353, + "loss": 0.7116, + "step": 2043 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019333161654974836, + "loss": 0.7594, + "step": 2044 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001933250837887457, + "loss": 0.7979, + "step": 2045 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019331854793984353, + "loss": 0.671, + "step": 2046 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019331200900325803, + "loss": 0.7667, + "step": 2047 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001933054669792056, + "loss": 0.6835, + "step": 2048 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001932989218679027, + "loss": 0.7776, + "step": 2049 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019329237366956588, + "loss": 0.7975, + "step": 2050 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001932858223844118, + "loss": 0.7134, + "step": 2051 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019327926801265724, + "loss": 0.7856, + "step": 2052 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019327271055451906, + "loss": 0.763, + "step": 2053 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019326615001021423, + "loss": 0.7528, + "step": 2054 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019325958637995982, + "loss": 0.7262, + "step": 2055 + }, + { + "epoch": 0.48, + "learning_rate": 0.000193253019663973, + "loss": 0.759, + "step": 2056 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019324644986247106, + "loss": 0.7985, + "step": 2057 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001932398769756714, + "loss": 0.7587, + "step": 2058 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019323330100379146, + "loss": 0.8345, + "step": 2059 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001932267219470488, + "loss": 0.7535, + "step": 2060 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019322013980566119, + "loss": 0.7978, + "step": 2061 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019321355457984637, + "loss": 0.7847, + "step": 2062 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001932069662698222, + "loss": 0.673, + "step": 2063 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001932003748758067, + "loss": 0.8052, + "step": 2064 + }, + { + "epoch": 0.48, + "learning_rate": 0.000193193780398018, + "loss": 0.7994, + "step": 2065 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001931871828366742, + "loss": 0.7493, + "step": 2066 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019318058219199371, + "loss": 0.7509, + "step": 2067 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019317397846419486, + "loss": 0.7777, + "step": 2068 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019316737165349618, + "loss": 0.7508, + "step": 2069 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001931607617601162, + "loss": 0.7936, + "step": 2070 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019315414878427373, + "loss": 0.782, + "step": 2071 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019314753272618753, + "loss": 0.7982, + "step": 2072 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019314091358607648, + "loss": 0.8014, + "step": 2073 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019313429136415966, + "loss": 0.7414, + "step": 2074 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019312766606065614, + "loss": 0.8067, + "step": 2075 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019312103767578514, + "loss": 0.711, + "step": 2076 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019311440620976597, + "loss": 0.7587, + "step": 2077 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019310777166281807, + "loss": 0.817, + "step": 2078 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001931011340351609, + "loss": 0.736, + "step": 2079 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019309449332701419, + "loss": 0.7546, + "step": 2080 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019308784953859758, + "loss": 0.7788, + "step": 2081 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019308120267013093, + "loss": 0.7324, + "step": 2082 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019307455272183415, + "loss": 0.7182, + "step": 2083 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019306789969392727, + "loss": 0.7348, + "step": 2084 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019306124358663045, + "loss": 0.8303, + "step": 2085 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019305458440016388, + "loss": 0.7994, + "step": 2086 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019304792213474797, + "loss": 0.7528, + "step": 2087 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001930412567906031, + "loss": 0.8006, + "step": 2088 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019303458836794982, + "loss": 0.7818, + "step": 2089 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019302791686700877, + "loss": 0.6958, + "step": 2090 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001930212422880007, + "loss": 0.7721, + "step": 2091 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019301456463114645, + "loss": 0.8096, + "step": 2092 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019300788389666696, + "loss": 0.7174, + "step": 2093 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001930012000847833, + "loss": 0.7328, + "step": 2094 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019299451319571662, + "loss": 0.757, + "step": 2095 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019298782322968815, + "loss": 0.7235, + "step": 2096 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019298113018691927, + "loss": 0.784, + "step": 2097 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019297443406763142, + "loss": 0.7264, + "step": 2098 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001929677348720462, + "loss": 0.6451, + "step": 2099 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019296103260038518, + "loss": 0.7448, + "step": 2100 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019295432725287016, + "loss": 0.7604, + "step": 2101 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019294761882972307, + "loss": 0.7533, + "step": 2102 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001929409073311658, + "loss": 0.7541, + "step": 2103 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019293419275742045, + "loss": 0.7701, + "step": 2104 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001929274751087092, + "loss": 0.7758, + "step": 2105 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019292075438525429, + "loss": 0.7853, + "step": 2106 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019291403058727807, + "loss": 0.7657, + "step": 2107 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001929073037150031, + "loss": 0.7762, + "step": 2108 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019290057376865187, + "loss": 0.7164, + "step": 2109 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019289384074844707, + "loss": 0.7845, + "step": 2110 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019288710465461154, + "loss": 0.7078, + "step": 2111 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001928803654873681, + "loss": 0.6915, + "step": 2112 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019287362324693977, + "loss": 0.8249, + "step": 2113 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001928668779335496, + "loss": 0.7537, + "step": 2114 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001928601295474208, + "loss": 0.7784, + "step": 2115 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019285337808877667, + "loss": 0.7826, + "step": 2116 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019284662355784055, + "loss": 0.7294, + "step": 2117 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019283986595483596, + "loss": 0.7721, + "step": 2118 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019283310527998653, + "loss": 0.7514, + "step": 2119 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001928263415335159, + "loss": 0.7677, + "step": 2120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019281957471564787, + "loss": 0.737, + "step": 2121 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019281280482660637, + "loss": 0.8081, + "step": 2122 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001928060318666154, + "loss": 0.7637, + "step": 2123 + }, + { + "epoch": 0.49, + "learning_rate": 0.000192799255835899, + "loss": 0.751, + "step": 2124 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019279247673468142, + "loss": 0.7555, + "step": 2125 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019278569456318697, + "loss": 0.7022, + "step": 2126 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019277890932164007, + "loss": 0.7912, + "step": 2127 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019277212101026517, + "loss": 0.7762, + "step": 2128 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001927653296292869, + "loss": 0.7241, + "step": 2129 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019275853517893002, + "loss": 0.7781, + "step": 2130 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019275173765941926, + "loss": 0.7979, + "step": 2131 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001927449370709796, + "loss": 0.7815, + "step": 2132 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019273813341383602, + "loss": 0.6827, + "step": 2133 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019273132668821364, + "loss": 0.6898, + "step": 2134 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019272451689433768, + "loss": 0.8116, + "step": 2135 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019271770403243347, + "loss": 0.7178, + "step": 2136 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019271088810272644, + "loss": 0.8355, + "step": 2137 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019270406910544208, + "loss": 0.6964, + "step": 2138 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019269724704080605, + "loss": 0.7233, + "step": 2139 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019269042190904405, + "loss": 0.7822, + "step": 2140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019268359371038188, + "loss": 0.769, + "step": 2141 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019267676244504554, + "loss": 0.7162, + "step": 2142 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019266992811326104, + "loss": 0.7578, + "step": 2143 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019266309071525446, + "loss": 0.7394, + "step": 2144 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019265625025125207, + "loss": 0.7886, + "step": 2145 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019264940672148018, + "loss": 0.728, + "step": 2146 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019264256012616526, + "loss": 0.7666, + "step": 2147 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019263571046553387, + "loss": 0.7831, + "step": 2148 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019262885773981258, + "loss": 0.7339, + "step": 2149 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019262200194922815, + "loss": 0.6881, + "step": 2150 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019261514309400747, + "loss": 0.7254, + "step": 2151 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001926082811743774, + "loss": 0.7853, + "step": 2152 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019260141619056507, + "loss": 0.7058, + "step": 2153 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019259454814279757, + "loss": 0.7257, + "step": 2154 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019258767703130217, + "loss": 0.7365, + "step": 2155 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019258080285630618, + "loss": 0.7491, + "step": 2156 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019257392561803713, + "loss": 0.7358, + "step": 2157 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001925670453167225, + "loss": 0.7916, + "step": 2158 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019256016195258994, + "loss": 0.8115, + "step": 2159 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019255327552586727, + "loss": 0.7446, + "step": 2160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019254638603678225, + "loss": 0.8191, + "step": 2161 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019253949348556291, + "loss": 0.7536, + "step": 2162 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019253259787243733, + "loss": 0.8051, + "step": 2163 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019252569919763358, + "loss": 0.7769, + "step": 2164 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019251879746138003, + "loss": 0.7386, + "step": 2165 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001925118926639049, + "loss": 0.7741, + "step": 2166 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019250498480543677, + "loss": 0.7853, + "step": 2167 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019249807388620416, + "loss": 0.7535, + "step": 2168 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019249115990643573, + "loss": 0.7195, + "step": 2169 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019248424286636028, + "loss": 0.76, + "step": 2170 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019247732276620662, + "loss": 0.7589, + "step": 2171 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001924703996062038, + "loss": 0.7908, + "step": 2172 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001924634733865808, + "loss": 0.6949, + "step": 2173 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019245654410756687, + "loss": 0.7253, + "step": 2174 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019244961176939125, + "loss": 0.756, + "step": 2175 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001924426763722833, + "loss": 0.8124, + "step": 2176 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019243573791647252, + "loss": 0.7686, + "step": 2177 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019242879640218844, + "loss": 0.7652, + "step": 2178 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001924218518296608, + "loss": 0.7634, + "step": 2179 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019241490419911935, + "loss": 0.7455, + "step": 2180 + }, + { + "epoch": 0.5, + "learning_rate": 0.000192407953510794, + "loss": 0.8487, + "step": 2181 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019240099976491467, + "loss": 0.7851, + "step": 2182 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019239404296171147, + "loss": 0.7273, + "step": 2183 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019238708310141464, + "loss": 0.782, + "step": 2184 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019238012018425436, + "loss": 0.7976, + "step": 2185 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019237315421046113, + "loss": 0.787, + "step": 2186 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019236618518026535, + "loss": 0.7123, + "step": 2187 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001923592130938976, + "loss": 0.7239, + "step": 2188 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019235223795158865, + "loss": 0.759, + "step": 2189 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019234525975356923, + "loss": 0.7564, + "step": 2190 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019233827850007027, + "loss": 0.7822, + "step": 2191 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001923312941913227, + "loss": 0.7534, + "step": 2192 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019232430682755768, + "loss": 0.7913, + "step": 2193 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001923173164090064, + "loss": 0.7205, + "step": 2194 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001923103229359001, + "loss": 0.7419, + "step": 2195 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019230332640847024, + "loss": 0.7516, + "step": 2196 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019229632682694824, + "loss": 0.7619, + "step": 2197 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001922893241915658, + "loss": 0.7473, + "step": 2198 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019228231850255457, + "loss": 0.7675, + "step": 2199 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019227530976014632, + "loss": 0.7426, + "step": 2200 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192268297964573, + "loss": 0.7183, + "step": 2201 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019226128311606658, + "loss": 0.7752, + "step": 2202 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001922542652148592, + "loss": 0.7392, + "step": 2203 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019224724426118305, + "loss": 0.7665, + "step": 2204 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019224022025527042, + "loss": 0.7765, + "step": 2205 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019223319319735375, + "loss": 0.8217, + "step": 2206 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019222616308766548, + "loss": 0.7567, + "step": 2207 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001922191299264383, + "loss": 0.7887, + "step": 2208 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019221209371390486, + "loss": 0.7312, + "step": 2209 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192205054450298, + "loss": 0.7801, + "step": 2210 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019219801213585068, + "loss": 0.8212, + "step": 2211 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001921909667707958, + "loss": 0.756, + "step": 2212 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001921839183553666, + "loss": 0.7883, + "step": 2213 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001921768668897962, + "loss": 0.7603, + "step": 2214 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019216981237431794, + "loss": 0.8297, + "step": 2215 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019216275480916527, + "loss": 0.783, + "step": 2216 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019215569419457165, + "loss": 0.7772, + "step": 2217 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019214863053077075, + "loss": 0.793, + "step": 2218 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019214156381799623, + "loss": 0.7583, + "step": 2219 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019213449405648198, + "loss": 0.727, + "step": 2220 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001921274212464619, + "loss": 0.7294, + "step": 2221 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019212034538817002, + "loss": 0.7949, + "step": 2222 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019211326648184041, + "loss": 0.7377, + "step": 2223 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019210618452770732, + "loss": 0.7435, + "step": 2224 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001920990995260051, + "loss": 0.7814, + "step": 2225 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001920920114769682, + "loss": 0.7492, + "step": 2226 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001920849203808311, + "loss": 0.7944, + "step": 2227 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019207782623782837, + "loss": 0.7605, + "step": 2228 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019207072904819486, + "loss": 0.7819, + "step": 2229 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019206362881216528, + "loss": 0.7213, + "step": 2230 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019205652552997467, + "loss": 0.7116, + "step": 2231 + }, + { + "epoch": 0.52, + "learning_rate": 0.000192049419201858, + "loss": 0.7665, + "step": 2232 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001920423098280504, + "loss": 0.6887, + "step": 2233 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019203519740878714, + "loss": 0.7742, + "step": 2234 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001920280819443035, + "loss": 0.7006, + "step": 2235 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019202096343483494, + "loss": 0.7199, + "step": 2236 + }, + { + "epoch": 0.52, + "learning_rate": 0.000192013841880617, + "loss": 0.7312, + "step": 2237 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001920067172818853, + "loss": 0.7473, + "step": 2238 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019199958963887558, + "loss": 0.7768, + "step": 2239 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001919924589518237, + "loss": 0.7282, + "step": 2240 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019198532522096554, + "loss": 0.7692, + "step": 2241 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019197818844653722, + "loss": 0.736, + "step": 2242 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001919710486287748, + "loss": 0.7537, + "step": 2243 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019196390576791458, + "loss": 0.7522, + "step": 2244 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019195675986419282, + "loss": 0.7511, + "step": 2245 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019194961091784606, + "loss": 0.7056, + "step": 2246 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001919424589291108, + "loss": 0.7357, + "step": 2247 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019193530389822363, + "loss": 0.6892, + "step": 2248 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019192814582542138, + "loss": 0.7082, + "step": 2249 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019192098471094085, + "loss": 0.7805, + "step": 2250 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019191382055501897, + "loss": 0.8256, + "step": 2251 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001919066533578928, + "loss": 0.8279, + "step": 2252 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019189948311979947, + "loss": 0.7982, + "step": 2253 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019189230984097627, + "loss": 0.7885, + "step": 2254 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019188513352166048, + "loss": 0.852, + "step": 2255 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001918779541620896, + "loss": 0.7729, + "step": 2256 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001918707717625012, + "loss": 0.8001, + "step": 2257 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019186358632313283, + "loss": 0.7827, + "step": 2258 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019185639784422232, + "loss": 0.7546, + "step": 2259 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001918492063260075, + "loss": 0.7255, + "step": 2260 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019184201176872633, + "loss": 0.7879, + "step": 2261 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019183481417261682, + "loss": 0.7443, + "step": 2262 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019182761353791716, + "loss": 0.7194, + "step": 2263 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019182040986486562, + "loss": 0.7487, + "step": 2264 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019181320315370047, + "loss": 0.7407, + "step": 2265 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019180599340466023, + "loss": 0.8548, + "step": 2266 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019179878061798347, + "loss": 0.7372, + "step": 2267 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019179156479390877, + "loss": 0.7863, + "step": 2268 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019178434593267497, + "loss": 0.7862, + "step": 2269 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019177712403452084, + "loss": 0.7879, + "step": 2270 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019176989909968535, + "loss": 0.7201, + "step": 2271 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019176267112840765, + "loss": 0.7382, + "step": 2272 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019175544012092676, + "loss": 0.7865, + "step": 2273 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019174820607748206, + "loss": 0.8055, + "step": 2274 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001917409689983128, + "loss": 0.7665, + "step": 2275 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001917337288836585, + "loss": 0.7348, + "step": 2276 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019172648573375872, + "loss": 0.8653, + "step": 2277 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019171923954885308, + "loss": 0.7339, + "step": 2278 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019171199032918138, + "loss": 0.7236, + "step": 2279 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019170473807498346, + "loss": 0.7209, + "step": 2280 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019169748278649926, + "loss": 0.7514, + "step": 2281 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019169022446396887, + "loss": 0.8477, + "step": 2282 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019168296310763245, + "loss": 0.7555, + "step": 2283 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019167569871773022, + "loss": 0.7448, + "step": 2284 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019166843129450262, + "loss": 0.8127, + "step": 2285 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019166116083819002, + "loss": 0.7805, + "step": 2286 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019165388734903305, + "loss": 0.7508, + "step": 2287 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019164661082727232, + "loss": 0.7373, + "step": 2288 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019163933127314863, + "loss": 0.8195, + "step": 2289 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001916320486869028, + "loss": 0.7166, + "step": 2290 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019162476306877588, + "loss": 0.7388, + "step": 2291 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019161747441900887, + "loss": 0.6967, + "step": 2292 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019161018273784291, + "loss": 0.7057, + "step": 2293 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001916028880255193, + "loss": 0.7019, + "step": 2294 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001915955902822794, + "loss": 0.7718, + "step": 2295 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019158828950836468, + "loss": 0.8137, + "step": 2296 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001915809857040167, + "loss": 0.7566, + "step": 2297 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019157367886947713, + "loss": 0.7996, + "step": 2298 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019156636900498773, + "loss": 0.7415, + "step": 2299 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019155905611079033, + "loss": 0.7282, + "step": 2300 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019155174018712696, + "loss": 0.7059, + "step": 2301 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019154442123423966, + "loss": 0.7441, + "step": 2302 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019153709925237057, + "loss": 0.7801, + "step": 2303 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019152977424176203, + "loss": 0.7954, + "step": 2304 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001915224462026563, + "loss": 0.8333, + "step": 2305 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001915151151352959, + "loss": 0.7804, + "step": 2306 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001915077810399234, + "loss": 0.737, + "step": 2307 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019150044391678153, + "loss": 0.7335, + "step": 2308 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019149310376611296, + "loss": 0.7493, + "step": 2309 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001914857605881606, + "loss": 0.745, + "step": 2310 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019147841438316737, + "loss": 0.7707, + "step": 2311 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001914710651513764, + "loss": 0.6553, + "step": 2312 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019146371289303086, + "loss": 0.8071, + "step": 2313 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019145635760837396, + "loss": 0.7671, + "step": 2314 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019144899929764912, + "loss": 0.7874, + "step": 2315 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019144163796109979, + "loss": 0.7484, + "step": 2316 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019143427359896955, + "loss": 0.7571, + "step": 2317 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019142690621150204, + "loss": 0.6778, + "step": 2318 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001914195357989411, + "loss": 0.7623, + "step": 2319 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019141216236153051, + "loss": 0.7815, + "step": 2320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019140478589951427, + "loss": 0.7511, + "step": 2321 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019139740641313643, + "loss": 0.7453, + "step": 2322 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019139002390264123, + "loss": 0.8154, + "step": 2323 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019138263836827288, + "loss": 0.8023, + "step": 2324 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019137524981027574, + "loss": 0.7938, + "step": 2325 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019136785822889435, + "loss": 0.7646, + "step": 2326 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001913604636243732, + "loss": 0.693, + "step": 2327 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019135306599695699, + "loss": 0.7811, + "step": 2328 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001913456653468905, + "loss": 0.7547, + "step": 2329 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001913382616744186, + "loss": 0.7319, + "step": 2330 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019133085497978624, + "loss": 0.7032, + "step": 2331 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019132344526323852, + "loss": 0.6973, + "step": 2332 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019131603252502054, + "loss": 0.8191, + "step": 2333 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019130861676537765, + "loss": 0.8009, + "step": 2334 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019130119798455519, + "loss": 0.7361, + "step": 2335 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019129377618279863, + "loss": 0.7228, + "step": 2336 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001912863513603535, + "loss": 0.7693, + "step": 2337 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019127892351746554, + "loss": 0.8103, + "step": 2338 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019127149265438053, + "loss": 0.7327, + "step": 2339 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001912640587713442, + "loss": 0.7053, + "step": 2340 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019125662186860267, + "loss": 0.7589, + "step": 2341 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019124918194640193, + "loss": 0.7932, + "step": 2342 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019124173900498818, + "loss": 0.7631, + "step": 2343 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019123429304460768, + "loss": 0.6995, + "step": 2344 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019122684406550682, + "loss": 0.7718, + "step": 2345 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019121939206793203, + "loss": 0.7468, + "step": 2346 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019121193705212987, + "loss": 0.7886, + "step": 2347 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019120447901834706, + "loss": 0.7687, + "step": 2348 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019119701796683033, + "loss": 0.8191, + "step": 2349 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019118955389782656, + "loss": 0.7652, + "step": 2350 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019118208681158274, + "loss": 0.8142, + "step": 2351 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001911746167083459, + "loss": 0.8113, + "step": 2352 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001911671435883632, + "loss": 0.8219, + "step": 2353 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019115966745188198, + "loss": 0.7383, + "step": 2354 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001911521882991495, + "loss": 0.7295, + "step": 2355 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019114470613041334, + "loss": 0.718, + "step": 2356 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019113722094592098, + "loss": 0.7677, + "step": 2357 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019112973274592015, + "loss": 0.7173, + "step": 2358 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019112224153065854, + "loss": 0.7791, + "step": 2359 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019111474730038408, + "loss": 0.8479, + "step": 2360 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019110725005534471, + "loss": 0.7677, + "step": 2361 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001910997497957885, + "loss": 0.743, + "step": 2362 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019109224652196362, + "loss": 0.7474, + "step": 2363 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019108474023411836, + "loss": 0.7801, + "step": 2364 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019107723093250104, + "loss": 0.7444, + "step": 2365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019106971861736013, + "loss": 0.8053, + "step": 2366 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001910622032889442, + "loss": 0.7483, + "step": 2367 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001910546849475019, + "loss": 0.7369, + "step": 2368 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019104716359328206, + "loss": 0.8312, + "step": 2369 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019103963922653344, + "loss": 0.6973, + "step": 2370 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001910321118475051, + "loss": 0.7458, + "step": 2371 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019102458145644607, + "loss": 0.7585, + "step": 2372 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001910170480536055, + "loss": 0.7915, + "step": 2373 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019100951163923265, + "loss": 0.7015, + "step": 2374 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019100197221357687, + "loss": 0.7131, + "step": 2375 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019099442977688766, + "loss": 0.7658, + "step": 2376 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019098688432941455, + "loss": 0.8613, + "step": 2377 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019097933587140719, + "loss": 0.7963, + "step": 2378 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001909717844031154, + "loss": 0.6986, + "step": 2379 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019096422992478896, + "loss": 0.7365, + "step": 2380 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001909566724366779, + "loss": 0.6878, + "step": 2381 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019094911193903223, + "loss": 0.7182, + "step": 2382 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019094154843210211, + "loss": 0.7745, + "step": 2383 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019093398191613785, + "loss": 0.8337, + "step": 2384 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019092641239138976, + "loss": 0.7175, + "step": 2385 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019091883985810832, + "loss": 0.7781, + "step": 2386 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019091126431654404, + "loss": 0.8033, + "step": 2387 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019090368576694765, + "loss": 0.7343, + "step": 2388 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019089610420956987, + "loss": 0.7443, + "step": 2389 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019088851964466154, + "loss": 0.7311, + "step": 2390 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001908809320724736, + "loss": 0.7452, + "step": 2391 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019087334149325716, + "loss": 0.7995, + "step": 2392 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019086574790726335, + "loss": 0.7438, + "step": 2393 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019085815131474342, + "loss": 0.7235, + "step": 2394 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019085055171594867, + "loss": 0.7909, + "step": 2395 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019084294911113068, + "loss": 0.7281, + "step": 2396 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019083534350054088, + "loss": 0.7308, + "step": 2397 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019082773488443097, + "loss": 0.7143, + "step": 2398 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001908201232630527, + "loss": 0.7765, + "step": 2399 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019081250863665794, + "loss": 0.699, + "step": 2400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001908048910054986, + "loss": 0.7443, + "step": 2401 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019079727036982674, + "loss": 0.7548, + "step": 2402 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001907896467298945, + "loss": 0.7587, + "step": 2403 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019078202008595416, + "loss": 0.7678, + "step": 2404 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019077439043825803, + "loss": 0.7737, + "step": 2405 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019076675778705862, + "loss": 0.8824, + "step": 2406 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019075912213260837, + "loss": 0.7944, + "step": 2407 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019075148347516004, + "loss": 0.84, + "step": 2408 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019074384181496629, + "loss": 0.6901, + "step": 2409 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019073619715228, + "loss": 0.7929, + "step": 2410 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019072854948735412, + "loss": 0.7474, + "step": 2411 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019072089882044168, + "loss": 0.7162, + "step": 2412 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001907132451517958, + "loss": 0.7121, + "step": 2413 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019070558848166975, + "loss": 0.7202, + "step": 2414 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019069792881031686, + "loss": 0.7032, + "step": 2415 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019069026613799058, + "loss": 0.7421, + "step": 2416 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019068260046494446, + "loss": 0.8043, + "step": 2417 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001906749317914321, + "loss": 0.7595, + "step": 2418 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019066726011770726, + "loss": 0.6811, + "step": 2419 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001906595854440238, + "loss": 0.7462, + "step": 2420 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019065190777063559, + "loss": 0.7309, + "step": 2421 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019064422709779674, + "loss": 0.6998, + "step": 2422 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019063654342576136, + "loss": 0.739, + "step": 2423 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019062885675478363, + "loss": 0.7373, + "step": 2424 + }, + { + "epoch": 0.56, + "learning_rate": 0.000190621167085118, + "loss": 0.7603, + "step": 2425 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019061347441701876, + "loss": 0.7331, + "step": 2426 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019060577875074056, + "loss": 0.7231, + "step": 2427 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019059808008653795, + "loss": 0.6796, + "step": 2428 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019059037842466574, + "loss": 0.7769, + "step": 2429 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019058267376537868, + "loss": 0.8257, + "step": 2430 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019057496610893173, + "loss": 0.7393, + "step": 2431 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019056725545557994, + "loss": 0.8024, + "step": 2432 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019055954180557843, + "loss": 0.7174, + "step": 2433 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019055182515918237, + "loss": 0.7713, + "step": 2434 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019054410551664713, + "loss": 0.7718, + "step": 2435 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019053638287822817, + "loss": 0.721, + "step": 2436 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019052865724418093, + "loss": 0.7431, + "step": 2437 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001905209286147611, + "loss": 0.7984, + "step": 2438 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019051319699022438, + "loss": 0.7895, + "step": 2439 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001905054623708266, + "loss": 0.788, + "step": 2440 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019049772475682368, + "loss": 0.7227, + "step": 2441 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019048998414847158, + "loss": 0.802, + "step": 2442 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001904822405460265, + "loss": 0.7092, + "step": 2443 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019047449394974462, + "loss": 0.7984, + "step": 2444 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019046674435988225, + "loss": 0.6561, + "step": 2445 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019045899177669583, + "loss": 0.7057, + "step": 2446 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019045123620044184, + "loss": 0.7175, + "step": 2447 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001904434776313769, + "loss": 0.8014, + "step": 2448 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019043571606975777, + "loss": 0.7279, + "step": 2449 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019042795151584117, + "loss": 0.7454, + "step": 2450 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019042018396988412, + "loss": 0.7585, + "step": 2451 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019041241343214352, + "loss": 0.6977, + "step": 2452 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001904046399028766, + "loss": 0.7108, + "step": 2453 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019039686338234043, + "loss": 0.7403, + "step": 2454 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019038908387079243, + "loss": 0.7666, + "step": 2455 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019038130136848994, + "loss": 0.7684, + "step": 2456 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001903735158756905, + "loss": 0.7632, + "step": 2457 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001903657273926517, + "loss": 0.8169, + "step": 2458 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001903579359196312, + "loss": 0.763, + "step": 2459 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019035014145688689, + "loss": 0.8173, + "step": 2460 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019034234400467658, + "loss": 0.7208, + "step": 2461 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019033454356325833, + "loss": 0.7139, + "step": 2462 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019032674013289022, + "loss": 0.7676, + "step": 2463 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019031893371383043, + "loss": 0.8065, + "step": 2464 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019031112430633725, + "loss": 0.7545, + "step": 2465 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019030331191066912, + "loss": 0.775, + "step": 2466 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001902954965270845, + "loss": 0.7265, + "step": 2467 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019028767815584198, + "loss": 0.7474, + "step": 2468 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019027985679720026, + "loss": 0.8291, + "step": 2469 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019027203245141814, + "loss": 0.7736, + "step": 2470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019026420511875447, + "loss": 0.7389, + "step": 2471 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001902563747994683, + "loss": 0.7331, + "step": 2472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019024854149381865, + "loss": 0.728, + "step": 2473 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001902407052020647, + "loss": 0.788, + "step": 2474 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019023286592446584, + "loss": 0.718, + "step": 2475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019022502366128135, + "loss": 0.7425, + "step": 2476 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019021717841277073, + "loss": 0.7905, + "step": 2477 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019020933017919363, + "loss": 0.8064, + "step": 2478 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019020147896080963, + "loss": 0.7675, + "step": 2479 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019019362475787853, + "loss": 0.7664, + "step": 2480 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019018576757066024, + "loss": 0.7771, + "step": 2481 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019017790739941473, + "loss": 0.7625, + "step": 2482 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019017004424440206, + "loss": 0.7666, + "step": 2483 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001901621781058824, + "loss": 0.7864, + "step": 2484 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019015430898411604, + "loss": 0.8038, + "step": 2485 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019014643687936334, + "loss": 0.7475, + "step": 2486 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019013856179188476, + "loss": 0.737, + "step": 2487 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019013068372194085, + "loss": 0.7445, + "step": 2488 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019012280266979235, + "loss": 0.7338, + "step": 2489 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019011491863569994, + "loss": 0.7741, + "step": 2490 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019010703161992455, + "loss": 0.7624, + "step": 2491 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019009914162272708, + "loss": 0.7272, + "step": 2492 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019009124864436864, + "loss": 0.6755, + "step": 2493 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019008335268511038, + "loss": 0.7529, + "step": 2494 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019007545374521355, + "loss": 0.7038, + "step": 2495 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019006755182493948, + "loss": 0.6884, + "step": 2496 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019005964692454965, + "loss": 0.7965, + "step": 2497 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019005173904430565, + "loss": 0.7526, + "step": 2498 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019004382818446907, + "loss": 0.7459, + "step": 2499 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001900359143453017, + "loss": 0.8198, + "step": 2500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019002799752706537, + "loss": 0.7092, + "step": 2501 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019002007773002207, + "loss": 0.805, + "step": 2502 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019001215495443376, + "loss": 0.7484, + "step": 2503 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019000422920056266, + "loss": 0.8038, + "step": 2504 + }, + { + "epoch": 0.58, + "learning_rate": 0.000189996300468671, + "loss": 0.7678, + "step": 2505 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001899883687590211, + "loss": 0.7814, + "step": 2506 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018998043407187547, + "loss": 0.7282, + "step": 2507 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018997249640749655, + "loss": 0.7544, + "step": 2508 + }, + { + "epoch": 0.58, + "learning_rate": 0.000189964555766147, + "loss": 0.7147, + "step": 2509 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018995661214808965, + "loss": 0.7847, + "step": 2510 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001899486655535872, + "loss": 0.7867, + "step": 2511 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018994071598290267, + "loss": 0.8004, + "step": 2512 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018993276343629912, + "loss": 0.745, + "step": 2513 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018992480791403958, + "loss": 0.713, + "step": 2514 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018991684941638731, + "loss": 0.763, + "step": 2515 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018990888794360569, + "loss": 0.757, + "step": 2516 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018990092349595813, + "loss": 0.7844, + "step": 2517 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001898929560737081, + "loss": 0.7843, + "step": 2518 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018988498567711924, + "loss": 0.7436, + "step": 2519 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018987701230645533, + "loss": 0.8052, + "step": 2520 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018986903596198015, + "loss": 0.7334, + "step": 2521 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018986105664395764, + "loss": 0.7118, + "step": 2522 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018985307435265175, + "loss": 0.7917, + "step": 2523 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018984508908832662, + "loss": 0.7577, + "step": 2524 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018983710085124656, + "loss": 0.7676, + "step": 2525 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018982910964167573, + "loss": 0.7483, + "step": 2526 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018982111545987868, + "loss": 0.6609, + "step": 2527 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018981311830611978, + "loss": 0.7922, + "step": 2528 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018980511818066374, + "loss": 0.7023, + "step": 2529 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018979711508377523, + "loss": 0.7735, + "step": 2530 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018978910901571905, + "loss": 0.7325, + "step": 2531 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001897810999767601, + "loss": 0.7682, + "step": 2532 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001897730879671634, + "loss": 0.7681, + "step": 2533 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018976507298719403, + "loss": 0.6692, + "step": 2534 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018975705503711718, + "loss": 0.786, + "step": 2535 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018974903411719815, + "loss": 0.7863, + "step": 2536 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018974101022770236, + "loss": 0.8318, + "step": 2537 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018973298336889526, + "loss": 0.7272, + "step": 2538 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018972495354104246, + "loss": 0.7238, + "step": 2539 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018971692074440963, + "loss": 0.8522, + "step": 2540 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018970888497926258, + "loss": 0.7707, + "step": 2541 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001897008462458672, + "loss": 0.7302, + "step": 2542 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018969280454448945, + "loss": 0.6911, + "step": 2543 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018968475987539538, + "loss": 0.7006, + "step": 2544 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018967671223885124, + "loss": 0.7568, + "step": 2545 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018966866163512328, + "loss": 0.7492, + "step": 2546 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018966060806447784, + "loss": 0.6714, + "step": 2547 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018965255152718143, + "loss": 0.7477, + "step": 2548 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001896444920235006, + "loss": 0.7838, + "step": 2549 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018963642955370201, + "loss": 0.7357, + "step": 2550 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018962836411805248, + "loss": 0.7361, + "step": 2551 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018962029571681886, + "loss": 0.6886, + "step": 2552 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018961222435026805, + "loss": 0.7902, + "step": 2553 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018960415001866718, + "loss": 0.7951, + "step": 2554 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001895960727222834, + "loss": 0.7456, + "step": 2555 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018958799246138392, + "loss": 0.7392, + "step": 2556 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018957990923623616, + "loss": 0.7071, + "step": 2557 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018957182304710755, + "loss": 0.7418, + "step": 2558 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018956373389426563, + "loss": 0.7706, + "step": 2559 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018955564177797806, + "loss": 0.7977, + "step": 2560 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018954754669851258, + "loss": 0.7703, + "step": 2561 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018953944865613705, + "loss": 0.7982, + "step": 2562 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001895313476511194, + "loss": 0.8012, + "step": 2563 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018952324368372772, + "loss": 0.7852, + "step": 2564 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018951513675423006, + "loss": 0.7586, + "step": 2565 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018950702686289472, + "loss": 0.7283, + "step": 2566 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018949891400999006, + "loss": 0.7123, + "step": 2567 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018949079819578443, + "loss": 0.7864, + "step": 2568 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018948267942054647, + "loss": 0.766, + "step": 2569 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001894745576845447, + "loss": 0.676, + "step": 2570 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018946643298804793, + "loss": 0.7828, + "step": 2571 + }, + { + "epoch": 0.6, + "learning_rate": 0.000189458305331325, + "loss": 0.7739, + "step": 2572 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001894501747146447, + "loss": 0.7157, + "step": 2573 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018944204113827622, + "loss": 0.7659, + "step": 2574 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018943390460248857, + "loss": 0.7387, + "step": 2575 + }, + { + "epoch": 0.6, + "learning_rate": 0.000189425765107551, + "loss": 0.7584, + "step": 2576 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018941762265373287, + "loss": 0.7472, + "step": 2577 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018940947724130353, + "loss": 0.8175, + "step": 2578 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001894013288705325, + "loss": 0.7549, + "step": 2579 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001893931775416894, + "loss": 0.7817, + "step": 2580 + }, + { + "epoch": 0.6, + "learning_rate": 0.000189385023255044, + "loss": 0.7011, + "step": 2581 + }, + { + "epoch": 0.6, + "learning_rate": 0.000189376866010866, + "loss": 0.7521, + "step": 2582 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018936870580942534, + "loss": 0.7106, + "step": 2583 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018936054265099208, + "loss": 0.7246, + "step": 2584 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018935237653583623, + "loss": 0.7282, + "step": 2585 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018934420746422804, + "loss": 0.7372, + "step": 2586 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018933603543643782, + "loss": 0.755, + "step": 2587 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018932786045273593, + "loss": 0.8123, + "step": 2588 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018931968251339283, + "loss": 0.7333, + "step": 2589 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018931150161867916, + "loss": 0.8004, + "step": 2590 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001893033177688656, + "loss": 0.7135, + "step": 2591 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018929513096422294, + "loss": 0.7942, + "step": 2592 + }, + { + "epoch": 0.6, + "learning_rate": 0.000189286941205022, + "loss": 0.7741, + "step": 2593 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018927874849153383, + "loss": 0.6858, + "step": 2594 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001892705528240295, + "loss": 0.743, + "step": 2595 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018926235420278013, + "loss": 0.7844, + "step": 2596 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018925415262805706, + "loss": 0.7123, + "step": 2597 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001892459481001316, + "loss": 0.7506, + "step": 2598 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018923774061927525, + "loss": 0.7026, + "step": 2599 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018922953018575962, + "loss": 0.7524, + "step": 2600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018922131679985626, + "loss": 0.8288, + "step": 2601 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018921310046183702, + "loss": 0.7124, + "step": 2602 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018920488117197377, + "loss": 0.7601, + "step": 2603 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018919665893053837, + "loss": 0.7146, + "step": 2604 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018918843373780296, + "loss": 0.741, + "step": 2605 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018918020559403968, + "loss": 0.7418, + "step": 2606 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018917197449952076, + "loss": 0.6797, + "step": 2607 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018916374045451853, + "loss": 0.7946, + "step": 2608 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001891555034593055, + "loss": 0.7425, + "step": 2609 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018914726351415415, + "loss": 0.7735, + "step": 2610 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018913902061933714, + "loss": 0.7372, + "step": 2611 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001891307747751272, + "loss": 0.7474, + "step": 2612 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018912252598179716, + "loss": 0.7932, + "step": 2613 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018911427423961996, + "loss": 0.7636, + "step": 2614 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018910601954886866, + "loss": 0.7519, + "step": 2615 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018909776190981635, + "loss": 0.689, + "step": 2616 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018908950132273623, + "loss": 0.7899, + "step": 2617 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001890812377879017, + "loss": 0.8011, + "step": 2618 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018907297130558615, + "loss": 0.7624, + "step": 2619 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018906470187606307, + "loss": 0.7185, + "step": 2620 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001890564294996061, + "loss": 0.7556, + "step": 2621 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018904815417648894, + "loss": 0.7336, + "step": 2622 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001890398759069854, + "loss": 0.7651, + "step": 2623 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001890315946913694, + "loss": 0.7936, + "step": 2624 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001890233105299149, + "loss": 0.7291, + "step": 2625 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001890150234228961, + "loss": 0.7487, + "step": 2626 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018900673337058713, + "loss": 0.7595, + "step": 2627 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018899844037326225, + "loss": 0.7143, + "step": 2628 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018899014443119594, + "loss": 0.712, + "step": 2629 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018898184554466265, + "loss": 0.7248, + "step": 2630 + }, + { + "epoch": 0.61, + "learning_rate": 0.000188973543713937, + "loss": 0.7641, + "step": 2631 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018896523893929362, + "loss": 0.7873, + "step": 2632 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018895693122100734, + "loss": 0.7312, + "step": 2633 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018894862055935302, + "loss": 0.7487, + "step": 2634 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018894030695460569, + "loss": 0.7508, + "step": 2635 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018893199040704035, + "loss": 0.7489, + "step": 2636 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001889236709169322, + "loss": 0.7764, + "step": 2637 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018891534848455654, + "loss": 0.7735, + "step": 2638 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018890702311018874, + "loss": 0.6876, + "step": 2639 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018889869479410423, + "loss": 0.6765, + "step": 2640 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001888903635365786, + "loss": 0.7361, + "step": 2641 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018888202933788748, + "loss": 0.7048, + "step": 2642 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018887369219830667, + "loss": 0.7486, + "step": 2643 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018886535211811201, + "loss": 0.7527, + "step": 2644 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018885700909757944, + "loss": 0.8114, + "step": 2645 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018884866313698503, + "loss": 0.7701, + "step": 2646 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001888403142366049, + "loss": 0.7794, + "step": 2647 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018883196239671533, + "loss": 0.7411, + "step": 2648 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018882360761759262, + "loss": 0.7772, + "step": 2649 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018881524989951327, + "loss": 0.7166, + "step": 2650 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018880688924275378, + "loss": 0.713, + "step": 2651 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018879852564759075, + "loss": 0.7208, + "step": 2652 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018879015911430096, + "loss": 0.7113, + "step": 2653 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018878178964316122, + "loss": 0.7147, + "step": 2654 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018877341723444845, + "loss": 0.7841, + "step": 2655 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001887650418884397, + "loss": 0.7719, + "step": 2656 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018875666360541207, + "loss": 0.7652, + "step": 2657 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018874828238564276, + "loss": 0.7548, + "step": 2658 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018873989822940912, + "loss": 0.7877, + "step": 2659 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018873151113698854, + "loss": 0.7877, + "step": 2660 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018872312110865852, + "loss": 0.8047, + "step": 2661 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018871472814469667, + "loss": 0.7319, + "step": 2662 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018870633224538072, + "loss": 0.7585, + "step": 2663 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018869793341098843, + "loss": 0.7639, + "step": 2664 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018868953164179777, + "loss": 0.7774, + "step": 2665 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018868112693808665, + "loss": 0.7694, + "step": 2666 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018867271930013315, + "loss": 0.7248, + "step": 2667 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018866430872821555, + "loss": 0.7212, + "step": 2668 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018865589522261208, + "loss": 0.669, + "step": 2669 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001886474787836011, + "loss": 0.796, + "step": 2670 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018863905941146115, + "loss": 0.728, + "step": 2671 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018863063710647073, + "loss": 0.78, + "step": 2672 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001886222118689086, + "loss": 0.7339, + "step": 2673 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018861378369905348, + "loss": 0.7539, + "step": 2674 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018860535259718423, + "loss": 0.7712, + "step": 2675 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018859691856357985, + "loss": 0.7518, + "step": 2676 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018858848159851937, + "loss": 0.7932, + "step": 2677 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018858004170228197, + "loss": 0.763, + "step": 2678 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018857159887514686, + "loss": 0.7795, + "step": 2679 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018856315311739348, + "loss": 0.7504, + "step": 2680 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001885547044293012, + "loss": 0.754, + "step": 2681 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018854625281114959, + "loss": 0.7377, + "step": 2682 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001885377982632183, + "loss": 0.7383, + "step": 2683 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001885293407857871, + "loss": 0.8184, + "step": 2684 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018852088037913577, + "loss": 0.7913, + "step": 2685 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018851241704354426, + "loss": 0.7113, + "step": 2686 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018850395077929258, + "loss": 0.7362, + "step": 2687 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018849548158666096, + "loss": 0.7868, + "step": 2688 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018848700946592952, + "loss": 0.8233, + "step": 2689 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884785344173786, + "loss": 0.74, + "step": 2690 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018847005644128867, + "loss": 0.7204, + "step": 2691 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884615755379402, + "loss": 0.7417, + "step": 2692 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884530917076138, + "loss": 0.7777, + "step": 2693 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018844460495059023, + "loss": 0.745, + "step": 2694 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884361152671502, + "loss": 0.8231, + "step": 2695 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884276226575747, + "loss": 0.7828, + "step": 2696 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884191271221447, + "loss": 0.7596, + "step": 2697 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018841062866114129, + "loss": 0.7764, + "step": 2698 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001884021272748457, + "loss": 0.7552, + "step": 2699 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018839362296353915, + "loss": 0.8005, + "step": 2700 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018838511572750308, + "loss": 0.7692, + "step": 2701 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018837660556701897, + "loss": 0.741, + "step": 2702 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001883680924823684, + "loss": 0.7961, + "step": 2703 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018835957647383303, + "loss": 0.7073, + "step": 2704 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018835105754169465, + "loss": 0.7342, + "step": 2705 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001883425356862351, + "loss": 0.7689, + "step": 2706 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018833401090773637, + "loss": 0.7154, + "step": 2707 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018832548320648056, + "loss": 0.7578, + "step": 2708 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018831695258274977, + "loss": 0.7419, + "step": 2709 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018830841903682627, + "loss": 0.7751, + "step": 2710 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018829988256899248, + "loss": 0.6645, + "step": 2711 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018829134317953074, + "loss": 0.6571, + "step": 2712 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001882828008687237, + "loss": 0.724, + "step": 2713 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018827425563685394, + "loss": 0.8128, + "step": 2714 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001882657074842042, + "loss": 0.7267, + "step": 2715 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001882571564110574, + "loss": 0.8606, + "step": 2716 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018824860241769637, + "loss": 0.7618, + "step": 2717 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001882400455044042, + "loss": 0.7381, + "step": 2718 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018823148567146398, + "loss": 0.7735, + "step": 2719 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018822292291915897, + "loss": 0.7271, + "step": 2720 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018821435724777249, + "loss": 0.7416, + "step": 2721 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001882057886575879, + "loss": 0.7122, + "step": 2722 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018819721714888877, + "loss": 0.7775, + "step": 2723 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018818864272195877, + "loss": 0.7698, + "step": 2724 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018818006537708147, + "loss": 0.7071, + "step": 2725 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001881714851145407, + "loss": 0.7937, + "step": 2726 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018816290193462046, + "loss": 0.7988, + "step": 2727 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001881543158376047, + "loss": 0.8001, + "step": 2728 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018814572682377746, + "loss": 0.8088, + "step": 2729 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018813713489342293, + "loss": 0.7383, + "step": 2730 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018812854004682548, + "loss": 0.7564, + "step": 2731 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018811994228426943, + "loss": 0.7642, + "step": 2732 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001881113416060393, + "loss": 0.7347, + "step": 2733 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018810273801241962, + "loss": 0.6914, + "step": 2734 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001880941315036951, + "loss": 0.7381, + "step": 2735 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018808552208015046, + "loss": 0.6663, + "step": 2736 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018807690974207062, + "loss": 0.7588, + "step": 2737 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018806829448974048, + "loss": 0.7599, + "step": 2738 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018805967632344516, + "loss": 0.7848, + "step": 2739 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018805105524346977, + "loss": 0.7564, + "step": 2740 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018804243125009962, + "loss": 0.7647, + "step": 2741 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018803380434362, + "loss": 0.7325, + "step": 2742 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018802517452431633, + "loss": 0.6924, + "step": 2743 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018801654179247424, + "loss": 0.7536, + "step": 2744 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018800790614837928, + "loss": 0.735, + "step": 2745 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018799926759231723, + "loss": 0.7452, + "step": 2746 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001879906261245739, + "loss": 0.7681, + "step": 2747 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018798198174543523, + "loss": 0.7386, + "step": 2748 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018797333445518723, + "loss": 0.7284, + "step": 2749 + }, + { + "epoch": 0.64, + "learning_rate": 0.000187964684254116, + "loss": 0.7354, + "step": 2750 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018795603114250782, + "loss": 0.8015, + "step": 2751 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001879473751206489, + "loss": 0.7339, + "step": 2752 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018793871618882574, + "loss": 0.7591, + "step": 2753 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018793005434732482, + "loss": 0.8118, + "step": 2754 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001879213895964327, + "loss": 0.7239, + "step": 2755 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018791272193643605, + "loss": 0.7769, + "step": 2756 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018790405136762177, + "loss": 0.7714, + "step": 2757 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018789537789027668, + "loss": 0.7994, + "step": 2758 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018788670150468776, + "loss": 0.8188, + "step": 2759 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001878780222111421, + "loss": 0.7202, + "step": 2760 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018786934000992688, + "loss": 0.7731, + "step": 2761 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001878606549013294, + "loss": 0.7205, + "step": 2762 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018785196688563699, + "loss": 0.7614, + "step": 2763 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001878432759631371, + "loss": 0.7378, + "step": 2764 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018783458213411734, + "loss": 0.6818, + "step": 2765 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018782588539886533, + "loss": 0.7761, + "step": 2766 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018781718575766886, + "loss": 0.7645, + "step": 2767 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018780848321081574, + "loss": 0.7923, + "step": 2768 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018779977775859396, + "loss": 0.7385, + "step": 2769 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018779106940129152, + "loss": 0.744, + "step": 2770 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001877823581391966, + "loss": 0.719, + "step": 2771 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001877736439725974, + "loss": 0.7687, + "step": 2772 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018776492690178226, + "loss": 0.7632, + "step": 2773 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001877562069270396, + "loss": 0.722, + "step": 2774 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018774748404865794, + "loss": 0.7738, + "step": 2775 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018773875826692593, + "loss": 0.782, + "step": 2776 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018773002958213226, + "loss": 0.8252, + "step": 2777 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018772129799456572, + "loss": 0.7105, + "step": 2778 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001877125635045153, + "loss": 0.7154, + "step": 2779 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018770382611226987, + "loss": 0.7025, + "step": 2780 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018769508581811868, + "loss": 0.7894, + "step": 2781 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001876863426223508, + "loss": 0.6928, + "step": 2782 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001876775965252556, + "loss": 0.811, + "step": 2783 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001876688475271224, + "loss": 0.7152, + "step": 2784 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018766009562824073, + "loss": 0.73, + "step": 2785 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001876513408289002, + "loss": 0.7592, + "step": 2786 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001876425831293904, + "loss": 0.7717, + "step": 2787 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018763382253000118, + "loss": 0.7122, + "step": 2788 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018762505903102233, + "loss": 0.7252, + "step": 2789 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018761629263274387, + "loss": 0.8541, + "step": 2790 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018760752333545587, + "loss": 0.7711, + "step": 2791 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018759875113944844, + "loss": 0.7931, + "step": 2792 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018758997604501184, + "loss": 0.7194, + "step": 2793 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018758119805243644, + "loss": 0.7411, + "step": 2794 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018757241716201267, + "loss": 0.7377, + "step": 2795 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018756363337403103, + "loss": 0.8007, + "step": 2796 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018755484668878225, + "loss": 0.6891, + "step": 2797 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018754605710655695, + "loss": 0.7662, + "step": 2798 + }, + { + "epoch": 0.65, + "learning_rate": 0.000187537264627646, + "loss": 0.7288, + "step": 2799 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018752846925234037, + "loss": 0.7282, + "step": 2800 + }, + { + "epoch": 0.65, + "learning_rate": 0.000187519670980931, + "loss": 0.7078, + "step": 2801 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018751086981370907, + "loss": 0.68, + "step": 2802 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018750206575096577, + "loss": 0.8015, + "step": 2803 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018749325879299233, + "loss": 0.74, + "step": 2804 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018748444894008026, + "loss": 0.7707, + "step": 2805 + }, + { + "epoch": 0.65, + "learning_rate": 0.000187475636192521, + "loss": 0.6914, + "step": 2806 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018746682055060612, + "loss": 0.8486, + "step": 2807 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001874580020146274, + "loss": 0.7958, + "step": 2808 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018744918058487655, + "loss": 0.7374, + "step": 2809 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018744035626164544, + "loss": 0.8419, + "step": 2810 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018743152904522609, + "loss": 0.7086, + "step": 2811 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001874226989359105, + "loss": 0.6556, + "step": 2812 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018741386593399095, + "loss": 0.749, + "step": 2813 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018740503003975963, + "loss": 0.7312, + "step": 2814 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001873961912535089, + "loss": 0.7289, + "step": 2815 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018738734957553121, + "loss": 0.6728, + "step": 2816 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001873785050061191, + "loss": 0.7466, + "step": 2817 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018736965754556528, + "loss": 0.7781, + "step": 2818 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018736080719416242, + "loss": 0.7566, + "step": 2819 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001873519539522034, + "loss": 0.7019, + "step": 2820 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018734309781998114, + "loss": 0.763, + "step": 2821 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018733423879778864, + "loss": 0.7525, + "step": 2822 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018732537688591908, + "loss": 0.7294, + "step": 2823 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018731651208466564, + "loss": 0.7931, + "step": 2824 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018730764439432162, + "loss": 0.6998, + "step": 2825 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018729877381518047, + "loss": 0.6737, + "step": 2826 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018728990034753566, + "loss": 0.7545, + "step": 2827 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018728102399168082, + "loss": 0.72, + "step": 2828 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018727214474790964, + "loss": 0.8348, + "step": 2829 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001872632626165159, + "loss": 0.7539, + "step": 2830 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001872543775977935, + "loss": 0.7516, + "step": 2831 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001872454896920364, + "loss": 0.8144, + "step": 2832 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018723659889953873, + "loss": 0.7445, + "step": 2833 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018722770522059464, + "loss": 0.7506, + "step": 2834 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018721880865549837, + "loss": 0.7028, + "step": 2835 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001872099092045443, + "loss": 0.7723, + "step": 2836 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018720100686802694, + "loss": 0.7055, + "step": 2837 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018719210164624077, + "loss": 0.778, + "step": 2838 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018718319353948048, + "loss": 0.7677, + "step": 2839 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018717428254804084, + "loss": 0.8327, + "step": 2840 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018716536867221666, + "loss": 0.7031, + "step": 2841 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001871564519123029, + "loss": 0.779, + "step": 2842 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018714753226859457, + "loss": 0.83, + "step": 2843 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001871386097413868, + "loss": 0.7042, + "step": 2844 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018712968433097485, + "loss": 0.7727, + "step": 2845 + }, + { + "epoch": 0.66, + "learning_rate": 0.000187120756037654, + "loss": 0.7281, + "step": 2846 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018711182486171968, + "loss": 0.8027, + "step": 2847 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018710289080346742, + "loss": 0.7092, + "step": 2848 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001870939538631928, + "loss": 0.7423, + "step": 2849 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018708501404119152, + "loss": 0.7319, + "step": 2850 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001870760713377594, + "loss": 0.7341, + "step": 2851 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018706712575319233, + "loss": 0.7816, + "step": 2852 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018705817728778624, + "loss": 0.7161, + "step": 2853 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018704922594183732, + "loss": 0.7147, + "step": 2854 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018704027171564166, + "loss": 0.7794, + "step": 2855 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018703131460949554, + "loss": 0.7655, + "step": 2856 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018702235462369538, + "loss": 0.7845, + "step": 2857 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001870133917585376, + "loss": 0.7648, + "step": 2858 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001870044260143188, + "loss": 0.7291, + "step": 2859 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018699545739133558, + "loss": 0.6554, + "step": 2860 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001869864858898847, + "loss": 0.6833, + "step": 2861 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018697751151026308, + "loss": 0.7295, + "step": 2862 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018696853425276757, + "loss": 0.7144, + "step": 2863 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018695955411769525, + "loss": 0.72, + "step": 2864 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018695057110534324, + "loss": 0.6809, + "step": 2865 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018694158521600875, + "loss": 0.7269, + "step": 2866 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018693259644998915, + "loss": 0.6987, + "step": 2867 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001869236048075818, + "loss": 0.7074, + "step": 2868 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018691461028908424, + "loss": 0.6679, + "step": 2869 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018690561289479404, + "loss": 0.7388, + "step": 2870 + }, + { + "epoch": 0.66, + "learning_rate": 0.000186896612625009, + "loss": 0.6609, + "step": 2871 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001868876094800268, + "loss": 0.7085, + "step": 2872 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018687860346014537, + "loss": 0.7882, + "step": 2873 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018686959456566275, + "loss": 0.765, + "step": 2874 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018686058279687698, + "loss": 0.8071, + "step": 2875 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018685156815408625, + "loss": 0.705, + "step": 2876 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001868425506375888, + "loss": 0.756, + "step": 2877 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018683353024768302, + "loss": 0.7995, + "step": 2878 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018682450698466737, + "loss": 0.764, + "step": 2879 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018681548084884043, + "loss": 0.7274, + "step": 2880 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018680645184050083, + "loss": 0.6974, + "step": 2881 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018679741995994732, + "loss": 0.7302, + "step": 2882 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018678838520747872, + "loss": 0.7275, + "step": 2883 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018677934758339401, + "loss": 0.7056, + "step": 2884 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018677030708799222, + "loss": 0.6716, + "step": 2885 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018676126372157246, + "loss": 0.7665, + "step": 2886 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018675221748443394, + "loss": 0.7251, + "step": 2887 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018674316837687598, + "loss": 0.7591, + "step": 2888 + }, + { + "epoch": 0.67, + "learning_rate": 0.000186734116399198, + "loss": 0.7454, + "step": 2889 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018672506155169958, + "loss": 0.796, + "step": 2890 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018671600383468023, + "loss": 0.6744, + "step": 2891 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018670694324843964, + "loss": 0.6847, + "step": 2892 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001866978797932777, + "loss": 0.7026, + "step": 2893 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018668881346949417, + "loss": 0.7451, + "step": 2894 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018667974427738916, + "loss": 0.7908, + "step": 2895 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018667067221726265, + "loss": 0.7927, + "step": 2896 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018666159728941483, + "loss": 0.7596, + "step": 2897 + }, + { + "epoch": 0.67, + "learning_rate": 0.000186652519494146, + "loss": 0.7099, + "step": 2898 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018664343883175653, + "loss": 0.7592, + "step": 2899 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018663435530254683, + "loss": 0.7528, + "step": 2900 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001866252689068175, + "loss": 0.7681, + "step": 2901 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018661617964486915, + "loss": 0.7014, + "step": 2902 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001866070875170025, + "loss": 0.7565, + "step": 2903 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001865979925235185, + "loss": 0.7506, + "step": 2904 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018658889466471794, + "loss": 0.6765, + "step": 2905 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018657979394090193, + "loss": 0.7498, + "step": 2906 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018657069035237157, + "loss": 0.7836, + "step": 2907 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018656158389942804, + "loss": 0.7349, + "step": 2908 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018655247458237276, + "loss": 0.8193, + "step": 2909 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018654336240150698, + "loss": 0.7461, + "step": 2910 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001865342473571323, + "loss": 0.7814, + "step": 2911 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018652512944955033, + "loss": 0.7498, + "step": 2912 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018651600867906272, + "loss": 0.7133, + "step": 2913 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018650688504597124, + "loss": 0.7257, + "step": 2914 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018649775855057778, + "loss": 0.7747, + "step": 2915 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001864886291931843, + "loss": 0.7656, + "step": 2916 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018647949697409293, + "loss": 0.7844, + "step": 2917 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018647036189360576, + "loss": 0.7858, + "step": 2918 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001864612239520251, + "loss": 0.729, + "step": 2919 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018645208314965327, + "loss": 0.7103, + "step": 2920 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018644293948679274, + "loss": 0.7092, + "step": 2921 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018643379296374603, + "loss": 0.7419, + "step": 2922 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018642464358081579, + "loss": 0.8176, + "step": 2923 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018641549133830476, + "loss": 0.745, + "step": 2924 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018640633623651573, + "loss": 0.7331, + "step": 2925 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018639717827575165, + "loss": 0.6696, + "step": 2926 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001863880174563155, + "loss": 0.7619, + "step": 2927 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018637885377851047, + "loss": 0.7663, + "step": 2928 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018636968724263965, + "loss": 0.7723, + "step": 2929 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018636051784900643, + "loss": 0.7603, + "step": 2930 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018635134559791418, + "loss": 0.6911, + "step": 2931 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018634217048966637, + "loss": 0.7322, + "step": 2932 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018633299252456657, + "loss": 0.7833, + "step": 2933 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018632381170291851, + "loss": 0.8371, + "step": 2934 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001863146280250259, + "loss": 0.7388, + "step": 2935 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018630544149119262, + "loss": 0.7881, + "step": 2936 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018629625210172268, + "loss": 0.7175, + "step": 2937 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018628705985692008, + "loss": 0.7704, + "step": 2938 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018627786475708898, + "loss": 0.6932, + "step": 2939 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018626866680253364, + "loss": 0.7673, + "step": 2940 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018625946599355836, + "loss": 0.7585, + "step": 2941 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018625026233046765, + "loss": 0.7787, + "step": 2942 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018624105581356598, + "loss": 0.8, + "step": 2943 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018623184644315791, + "loss": 0.7003, + "step": 2944 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001862226342195483, + "loss": 0.723, + "step": 2945 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018621341914304185, + "loss": 0.7217, + "step": 2946 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018620420121394352, + "loss": 0.7135, + "step": 2947 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018619498043255826, + "loss": 0.7608, + "step": 2948 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001861857567991912, + "loss": 0.7547, + "step": 2949 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018617653031414752, + "loss": 0.7755, + "step": 2950 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001861673009777325, + "loss": 0.7556, + "step": 2951 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001861580687902515, + "loss": 0.7482, + "step": 2952 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018614883375201003, + "loss": 0.7831, + "step": 2953 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018613959586331362, + "loss": 0.7161, + "step": 2954 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018613035512446793, + "loss": 0.8056, + "step": 2955 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018612111153577874, + "loss": 0.7846, + "step": 2956 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018611186509755187, + "loss": 0.7807, + "step": 2957 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018610261581009327, + "loss": 0.7394, + "step": 2958 + }, + { + "epoch": 0.68, + "learning_rate": 0.000186093363673709, + "loss": 0.7868, + "step": 2959 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018608410868870516, + "loss": 0.7461, + "step": 2960 + }, + { + "epoch": 0.69, + "learning_rate": 0.000186074850855388, + "loss": 0.7699, + "step": 2961 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018606559017406382, + "loss": 0.7481, + "step": 2962 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018605632664503902, + "loss": 0.7853, + "step": 2963 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018604706026862012, + "loss": 0.7693, + "step": 2964 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018603779104511375, + "loss": 0.747, + "step": 2965 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018602851897482658, + "loss": 0.7131, + "step": 2966 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018601924405806542, + "loss": 0.725, + "step": 2967 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018600996629513711, + "loss": 0.7377, + "step": 2968 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018600068568634868, + "loss": 0.745, + "step": 2969 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018599140223200716, + "loss": 0.8159, + "step": 2970 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018598211593241973, + "loss": 0.7627, + "step": 2971 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018597282678789367, + "loss": 0.805, + "step": 2972 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018596353479873634, + "loss": 0.821, + "step": 2973 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018595423996525513, + "loss": 0.7917, + "step": 2974 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018594494228775768, + "loss": 0.7573, + "step": 2975 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018593564176655152, + "loss": 0.7994, + "step": 2976 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018592633840194445, + "loss": 0.7237, + "step": 2977 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001859170321942443, + "loss": 0.749, + "step": 2978 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018590772314375895, + "loss": 0.7618, + "step": 2979 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018589841125079643, + "loss": 0.7572, + "step": 2980 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018588909651566485, + "loss": 0.7698, + "step": 2981 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018587977893867245, + "loss": 0.7857, + "step": 2982 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018587045852012745, + "loss": 0.7102, + "step": 2983 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001858611352603383, + "loss": 0.7705, + "step": 2984 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018585180915961348, + "loss": 0.748, + "step": 2985 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001858424802182615, + "loss": 0.7138, + "step": 2986 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001858331484365911, + "loss": 0.7793, + "step": 2987 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018582381381491105, + "loss": 0.7165, + "step": 2988 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001858144763535302, + "loss": 0.713, + "step": 2989 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018580513605275748, + "loss": 0.7344, + "step": 2990 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018579579291290191, + "loss": 0.7496, + "step": 2991 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018578644693427273, + "loss": 0.736, + "step": 2992 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018577709811717907, + "loss": 0.7531, + "step": 2993 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018576774646193034, + "loss": 0.7569, + "step": 2994 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018575839196883592, + "loss": 0.7259, + "step": 2995 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001857490346382053, + "loss": 0.7515, + "step": 2996 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001857396744703482, + "loss": 0.7458, + "step": 2997 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018573031146557422, + "loss": 0.6913, + "step": 2998 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001857209456241932, + "loss": 0.7728, + "step": 2999 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018571157694651503, + "loss": 0.711, + "step": 3000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018570220543284965, + "loss": 0.7363, + "step": 3001 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018569283108350724, + "loss": 0.7713, + "step": 3002 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018568345389879788, + "loss": 0.7293, + "step": 3003 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001856740738790319, + "loss": 0.7262, + "step": 3004 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018566469102451963, + "loss": 0.7865, + "step": 3005 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018565530533557153, + "loss": 0.6499, + "step": 3006 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018564591681249817, + "loss": 0.697, + "step": 3007 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018563652545561013, + "loss": 0.816, + "step": 3008 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018562713126521824, + "loss": 0.7282, + "step": 3009 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001856177342416333, + "loss": 0.6836, + "step": 3010 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018560833438516618, + "loss": 0.7205, + "step": 3011 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018559893169612792, + "loss": 0.7323, + "step": 3012 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001855895261748297, + "loss": 0.7468, + "step": 3013 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018558011782158267, + "loss": 0.7366, + "step": 3014 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001855707066366981, + "loss": 0.6674, + "step": 3015 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018556129262048746, + "loss": 0.8008, + "step": 3016 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018555187577326215, + "loss": 0.7485, + "step": 3017 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018554245609533383, + "loss": 0.7843, + "step": 3018 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018553303358701412, + "loss": 0.7825, + "step": 3019 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018552360824861482, + "loss": 0.7323, + "step": 3020 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018551418008044777, + "loss": 0.7318, + "step": 3021 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018550474908282495, + "loss": 0.713, + "step": 3022 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018549531525605839, + "loss": 0.6712, + "step": 3023 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018548587860046023, + "loss": 0.7244, + "step": 3024 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001854764391163427, + "loss": 0.7297, + "step": 3025 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018546699680401817, + "loss": 0.7861, + "step": 3026 + }, + { + "epoch": 0.7, + "learning_rate": 0.000185457551663799, + "loss": 0.7101, + "step": 3027 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018544810369599776, + "loss": 0.7458, + "step": 3028 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018543865290092704, + "loss": 0.7588, + "step": 3029 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018542919927889954, + "loss": 0.7175, + "step": 3030 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018541974283022806, + "loss": 0.7509, + "step": 3031 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018541028355522547, + "loss": 0.6947, + "step": 3032 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018540082145420482, + "loss": 0.7074, + "step": 3033 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018539135652747914, + "loss": 0.7613, + "step": 3034 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018538188877536158, + "loss": 0.7765, + "step": 3035 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018537241819816544, + "loss": 0.6894, + "step": 3036 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018536294479620406, + "loss": 0.7317, + "step": 3037 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018535346856979093, + "loss": 0.8159, + "step": 3038 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018534398951923958, + "loss": 0.826, + "step": 3039 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001853345076448636, + "loss": 0.8552, + "step": 3040 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018532502294697672, + "loss": 0.7525, + "step": 3041 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018531553542589288, + "loss": 0.6805, + "step": 3042 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001853060450819259, + "loss": 0.8171, + "step": 3043 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001852965519153898, + "loss": 0.7255, + "step": 3044 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018528705592659868, + "loss": 0.7481, + "step": 3045 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018527755711586678, + "loss": 0.7095, + "step": 3046 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018526805548350837, + "loss": 0.7259, + "step": 3047 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018525855102983785, + "loss": 0.7159, + "step": 3048 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018524904375516967, + "loss": 0.7587, + "step": 3049 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018523953365981842, + "loss": 0.7487, + "step": 3050 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018523002074409878, + "loss": 0.7455, + "step": 3051 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018522050500832551, + "loss": 0.7756, + "step": 3052 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018521098645281342, + "loss": 0.7935, + "step": 3053 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018520146507787746, + "loss": 0.6792, + "step": 3054 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018519194088383273, + "loss": 0.7672, + "step": 3055 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001851824138709943, + "loss": 0.7436, + "step": 3056 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001851728840396774, + "loss": 0.6946, + "step": 3057 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018516335139019742, + "loss": 0.7596, + "step": 3058 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001851538159228697, + "loss": 0.66, + "step": 3059 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018514427763800977, + "loss": 0.7399, + "step": 3060 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001851347365359332, + "loss": 0.7089, + "step": 3061 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018512519261695574, + "loss": 0.786, + "step": 3062 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018511564588139312, + "loss": 0.7387, + "step": 3063 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018510609632956123, + "loss": 0.7514, + "step": 3064 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018509654396177609, + "loss": 0.8339, + "step": 3065 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018508698877835368, + "loss": 0.7373, + "step": 3066 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018507743077961024, + "loss": 0.7892, + "step": 3067 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018506786996586196, + "loss": 0.7497, + "step": 3068 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001850583063374252, + "loss": 0.7631, + "step": 3069 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018504873989461644, + "loss": 0.7338, + "step": 3070 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018503917063775215, + "loss": 0.7509, + "step": 3071 + }, + { + "epoch": 0.71, + "learning_rate": 0.000185029598567149, + "loss": 0.7143, + "step": 3072 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018502002368312365, + "loss": 0.728, + "step": 3073 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018501044598599297, + "loss": 0.7956, + "step": 3074 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018500086547607383, + "loss": 0.8058, + "step": 3075 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018499128215368328, + "loss": 0.705, + "step": 3076 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001849816960191383, + "loss": 0.6978, + "step": 3077 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018497210707275614, + "loss": 0.8264, + "step": 3078 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018496251531485407, + "loss": 0.8013, + "step": 3079 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018495292074574947, + "loss": 0.7178, + "step": 3080 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001849433233657598, + "loss": 0.7402, + "step": 3081 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018493372317520258, + "loss": 0.8097, + "step": 3082 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018492412017439552, + "loss": 0.8129, + "step": 3083 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018491451436365627, + "loss": 0.7787, + "step": 3084 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018490490574330274, + "loss": 0.7013, + "step": 3085 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018489529431365276, + "loss": 0.7563, + "step": 3086 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018488568007502448, + "loss": 0.747, + "step": 3087 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018487606302773597, + "loss": 0.656, + "step": 3088 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018486644317210535, + "loss": 0.791, + "step": 3089 + }, + { + "epoch": 0.72, + "learning_rate": 0.000184856820508451, + "loss": 0.8001, + "step": 3090 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018484719503709132, + "loss": 0.7327, + "step": 3091 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018483756675834477, + "loss": 0.6571, + "step": 3092 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018482793567252986, + "loss": 0.7404, + "step": 3093 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018481830177996536, + "loss": 0.6871, + "step": 3094 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018480866508097, + "loss": 0.7577, + "step": 3095 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018479902557586259, + "loss": 0.7103, + "step": 3096 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018478938326496215, + "loss": 0.8622, + "step": 3097 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018477973814858768, + "loss": 0.7583, + "step": 3098 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018477009022705825, + "loss": 0.7428, + "step": 3099 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001847604395006932, + "loss": 0.7857, + "step": 3100 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018475078596981182, + "loss": 0.7555, + "step": 3101 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018474112963473352, + "loss": 0.6952, + "step": 3102 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018473147049577774, + "loss": 0.7424, + "step": 3103 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001847218085532642, + "loss": 0.7041, + "step": 3104 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018471214380751244, + "loss": 0.7452, + "step": 3105 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018470247625884234, + "loss": 0.7978, + "step": 3106 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018469280590757378, + "loss": 0.7358, + "step": 3107 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018468313275402668, + "loss": 0.7793, + "step": 3108 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018467345679852113, + "loss": 0.7823, + "step": 3109 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018466377804137726, + "loss": 0.7838, + "step": 3110 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018465409648291535, + "loss": 0.7157, + "step": 3111 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018464441212345575, + "loss": 0.8099, + "step": 3112 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018463472496331885, + "loss": 0.7933, + "step": 3113 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018462503500282518, + "loss": 0.714, + "step": 3114 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018461534224229538, + "loss": 0.7511, + "step": 3115 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018460564668205013, + "loss": 0.7186, + "step": 3116 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018459594832241028, + "loss": 0.8094, + "step": 3117 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018458624716369667, + "loss": 0.7805, + "step": 3118 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001845765432062303, + "loss": 0.706, + "step": 3119 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001845668364503323, + "loss": 0.7789, + "step": 3120 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018455712689632376, + "loss": 0.6749, + "step": 3121 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018454741454452603, + "loss": 0.7869, + "step": 3122 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018453769939526043, + "loss": 0.8077, + "step": 3123 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018452798144884838, + "loss": 0.7123, + "step": 3124 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018451826070561147, + "loss": 0.7277, + "step": 3125 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018450853716587133, + "loss": 0.7289, + "step": 3126 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018449881082994969, + "loss": 0.7799, + "step": 3127 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018448908169816833, + "loss": 0.7729, + "step": 3128 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001844793497708492, + "loss": 0.8341, + "step": 3129 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001844696150483143, + "loss": 0.7014, + "step": 3130 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001844598775308857, + "loss": 0.7581, + "step": 3131 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018445013721888565, + "loss": 0.7626, + "step": 3132 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018444039411263637, + "loss": 0.7183, + "step": 3133 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018443064821246026, + "loss": 0.7394, + "step": 3134 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001844208995186798, + "loss": 0.7186, + "step": 3135 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018441114803161756, + "loss": 0.7787, + "step": 3136 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001844013937515962, + "loss": 0.6928, + "step": 3137 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001843916366789384, + "loss": 0.7458, + "step": 3138 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018438187681396704, + "loss": 0.6755, + "step": 3139 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018437211415700509, + "loss": 0.7928, + "step": 3140 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018436234870837547, + "loss": 0.7092, + "step": 3141 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001843525804684014, + "loss": 0.739, + "step": 3142 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018434280943740606, + "loss": 0.7479, + "step": 3143 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001843330356157127, + "loss": 0.7729, + "step": 3144 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018432325900364472, + "loss": 0.7235, + "step": 3145 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001843134796015257, + "loss": 0.7518, + "step": 3146 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001843036974096791, + "loss": 0.7102, + "step": 3147 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018429391242842863, + "loss": 0.724, + "step": 3148 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018428412465809808, + "loss": 0.7755, + "step": 3149 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018427433409901127, + "loss": 0.8609, + "step": 3150 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018426454075149215, + "loss": 0.7312, + "step": 3151 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018425474461586475, + "loss": 0.8106, + "step": 3152 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018424494569245327, + "loss": 0.7725, + "step": 3153 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018423514398158183, + "loss": 0.7419, + "step": 3154 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018422533948357482, + "loss": 0.7596, + "step": 3155 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018421553219875658, + "loss": 0.7291, + "step": 3156 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018420572212745167, + "loss": 0.7618, + "step": 3157 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018419590926998467, + "loss": 0.7496, + "step": 3158 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018418609362668025, + "loss": 0.6783, + "step": 3159 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018417627519786315, + "loss": 0.8029, + "step": 3160 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001841664539838583, + "loss": 0.6788, + "step": 3161 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018415662998499065, + "loss": 0.7182, + "step": 3162 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001841468032015852, + "loss": 0.7724, + "step": 3163 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018413697363396718, + "loss": 0.7129, + "step": 3164 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018412714128246174, + "loss": 0.7764, + "step": 3165 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018411730614739427, + "loss": 0.7699, + "step": 3166 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018410746822909016, + "loss": 0.8287, + "step": 3167 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018409762752787497, + "loss": 0.717, + "step": 3168 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001840877840440742, + "loss": 0.7153, + "step": 3169 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018407793777801366, + "loss": 0.7949, + "step": 3170 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018406808873001906, + "loss": 0.7703, + "step": 3171 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018405823690041632, + "loss": 0.7039, + "step": 3172 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001840483822895314, + "loss": 0.6974, + "step": 3173 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001840385248976904, + "loss": 0.7298, + "step": 3174 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018402866472521946, + "loss": 0.7287, + "step": 3175 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018401880177244478, + "loss": 0.7334, + "step": 3176 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018400893603969274, + "loss": 0.8105, + "step": 3177 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018399906752728976, + "loss": 0.7372, + "step": 3178 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018398919623556238, + "loss": 0.6793, + "step": 3179 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018397932216483723, + "loss": 0.7411, + "step": 3180 + }, + { + "epoch": 0.74, + "learning_rate": 0.000183969445315441, + "loss": 0.8084, + "step": 3181 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018395956568770046, + "loss": 0.7849, + "step": 3182 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018394968328194257, + "loss": 0.7622, + "step": 3183 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018393979809849426, + "loss": 0.6798, + "step": 3184 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018392991013768266, + "loss": 0.767, + "step": 3185 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018392001939983486, + "loss": 0.7503, + "step": 3186 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018391012588527821, + "loss": 0.7749, + "step": 3187 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018390022959433998, + "loss": 0.6908, + "step": 3188 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018389033052734768, + "loss": 0.7082, + "step": 3189 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001838804286846288, + "loss": 0.7665, + "step": 3190 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018387052406651099, + "loss": 0.7355, + "step": 3191 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018386061667332196, + "loss": 0.7299, + "step": 3192 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018385070650538956, + "loss": 0.7436, + "step": 3193 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018384079356304162, + "loss": 0.7127, + "step": 3194 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001838308778466062, + "loss": 0.8077, + "step": 3195 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018382095935641135, + "loss": 0.758, + "step": 3196 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018381103809278525, + "loss": 0.7374, + "step": 3197 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001838011140560562, + "loss": 0.7561, + "step": 3198 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018379118724655254, + "loss": 0.6834, + "step": 3199 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018378125766460273, + "loss": 0.8167, + "step": 3200 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018377132531053532, + "loss": 0.7173, + "step": 3201 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018376139018467892, + "loss": 0.703, + "step": 3202 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018375145228736228, + "loss": 0.7731, + "step": 3203 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018374151161891424, + "loss": 0.7822, + "step": 3204 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018373156817966368, + "loss": 0.7557, + "step": 3205 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018372162196993963, + "loss": 0.7694, + "step": 3206 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018371167299007115, + "loss": 0.7367, + "step": 3207 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018370172124038746, + "loss": 0.7734, + "step": 3208 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018369176672121783, + "loss": 0.7621, + "step": 3209 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018368180943289164, + "loss": 0.714, + "step": 3210 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018367184937573833, + "loss": 0.773, + "step": 3211 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018366188655008747, + "loss": 0.7785, + "step": 3212 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018365192095626872, + "loss": 0.7741, + "step": 3213 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001836419525946118, + "loss": 0.7656, + "step": 3214 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001836319814654465, + "loss": 0.7722, + "step": 3215 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018362200756910283, + "loss": 0.6888, + "step": 3216 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018361203090591071, + "loss": 0.7406, + "step": 3217 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018360205147620028, + "loss": 0.766, + "step": 3218 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018359206928030174, + "loss": 0.7452, + "step": 3219 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001835820843185454, + "loss": 0.7862, + "step": 3220 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001835720965912616, + "loss": 0.7738, + "step": 3221 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018356210609878085, + "loss": 0.7514, + "step": 3222 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018355211284143365, + "loss": 0.7919, + "step": 3223 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018354211681955072, + "loss": 0.7918, + "step": 3224 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018353211803346273, + "loss": 0.7169, + "step": 3225 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001835221164835006, + "loss": 0.7424, + "step": 3226 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018351211216999518, + "loss": 0.752, + "step": 3227 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018350210509327752, + "loss": 0.7189, + "step": 3228 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018349209525367878, + "loss": 0.6811, + "step": 3229 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018348208265153005, + "loss": 0.7502, + "step": 3230 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018347206728716272, + "loss": 0.668, + "step": 3231 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018346204916090813, + "loss": 0.7116, + "step": 3232 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018345202827309775, + "loss": 0.7395, + "step": 3233 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018344200462406315, + "loss": 0.7356, + "step": 3234 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018343197821413604, + "loss": 0.7623, + "step": 3235 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018342194904364813, + "loss": 0.7524, + "step": 3236 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018341191711293125, + "loss": 0.7525, + "step": 3237 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018340188242231732, + "loss": 0.86, + "step": 3238 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018339184497213838, + "loss": 0.7463, + "step": 3239 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018338180476272658, + "loss": 0.7099, + "step": 3240 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018337176179441408, + "loss": 0.6992, + "step": 3241 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001833617160675332, + "loss": 0.7267, + "step": 3242 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018335166758241632, + "loss": 0.7848, + "step": 3243 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018334161633939595, + "loss": 0.8136, + "step": 3244 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001833315623388046, + "loss": 0.7569, + "step": 3245 + }, + { + "epoch": 0.75, + "learning_rate": 0.000183321505580975, + "loss": 0.7589, + "step": 3246 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018331144606623982, + "loss": 0.6713, + "step": 3247 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018330138379493195, + "loss": 0.6906, + "step": 3248 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018329131876738437, + "loss": 0.7703, + "step": 3249 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018328125098393005, + "loss": 0.6756, + "step": 3250 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018327118044490212, + "loss": 0.7421, + "step": 3251 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018326110715063382, + "loss": 0.7301, + "step": 3252 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001832510311014584, + "loss": 0.7537, + "step": 3253 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001832409522977093, + "loss": 0.7147, + "step": 3254 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018323087073971993, + "loss": 0.7571, + "step": 3255 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018322078642782396, + "loss": 0.749, + "step": 3256 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018321069936235503, + "loss": 0.7395, + "step": 3257 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018320060954364685, + "loss": 0.7378, + "step": 3258 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018319051697203328, + "loss": 0.724, + "step": 3259 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018318042164784827, + "loss": 0.7515, + "step": 3260 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001831703235714259, + "loss": 0.7396, + "step": 3261 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001831602227431002, + "loss": 0.7292, + "step": 3262 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018315011916320543, + "loss": 0.7259, + "step": 3263 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018314001283207588, + "loss": 0.753, + "step": 3264 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018312990375004597, + "loss": 0.7303, + "step": 3265 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018311979191745016, + "loss": 0.7532, + "step": 3266 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018310967733462303, + "loss": 0.7315, + "step": 3267 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018309956000189928, + "loss": 0.7992, + "step": 3268 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001830894399196136, + "loss": 0.7689, + "step": 3269 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001830793170881009, + "loss": 0.7716, + "step": 3270 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018306919150769607, + "loss": 0.7065, + "step": 3271 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018305906317873417, + "loss": 0.7947, + "step": 3272 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018304893210155035, + "loss": 0.7081, + "step": 3273 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018303879827647975, + "loss": 0.7546, + "step": 3274 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018302866170385776, + "loss": 0.8019, + "step": 3275 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018301852238401968, + "loss": 0.7319, + "step": 3276 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018300838031730108, + "loss": 0.727, + "step": 3277 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001829982355040375, + "loss": 0.7272, + "step": 3278 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018298808794456458, + "loss": 0.7706, + "step": 3279 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018297793763921813, + "loss": 0.7599, + "step": 3280 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018296778458833397, + "loss": 0.8141, + "step": 3281 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018295762879224804, + "loss": 0.7144, + "step": 3282 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018294747025129638, + "loss": 0.7072, + "step": 3283 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018293730896581508, + "loss": 0.7029, + "step": 3284 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001829271449361404, + "loss": 0.7552, + "step": 3285 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018291697816260865, + "loss": 0.7599, + "step": 3286 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018290680864555616, + "loss": 0.717, + "step": 3287 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018328960423062528, + "loss": 0.6724, + "step": 3288 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018327965580617342, + "loss": 0.7441, + "step": 3289 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018326970469139192, + "loss": 0.73, + "step": 3290 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018325975088660218, + "loss": 0.6981, + "step": 3291 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018324979439212583, + "loss": 0.7608, + "step": 3292 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018323983520828442, + "loss": 0.6645, + "step": 3293 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018322987333539977, + "loss": 0.7299, + "step": 3294 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018321990877379372, + "loss": 0.7406, + "step": 3295 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018320994152378807, + "loss": 0.7458, + "step": 3296 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001831999715857048, + "loss": 0.7276, + "step": 3297 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018318999895986613, + "loss": 0.7918, + "step": 3298 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001831800236465941, + "loss": 0.678, + "step": 3299 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018317004564621096, + "loss": 0.6754, + "step": 3300 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018316006495903913, + "loss": 0.7356, + "step": 3301 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018315008158540096, + "loss": 0.7184, + "step": 3302 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018314009552561898, + "loss": 0.7164, + "step": 3303 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001831301067800158, + "loss": 0.7112, + "step": 3304 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018312011534891412, + "loss": 0.758, + "step": 3305 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018311012123263667, + "loss": 0.7311, + "step": 3306 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001831001244315063, + "loss": 0.6739, + "step": 3307 + }, + { + "epoch": 0.76, + "learning_rate": 0.000183090124945846, + "loss": 0.8239, + "step": 3308 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018308012277597883, + "loss": 0.7711, + "step": 3309 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001830701179222278, + "loss": 0.7761, + "step": 3310 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018306011038491619, + "loss": 0.7315, + "step": 3311 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018305010016436726, + "loss": 0.715, + "step": 3312 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018304008726090443, + "loss": 0.7413, + "step": 3313 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001830300716748511, + "loss": 0.7427, + "step": 3314 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018302005340653087, + "loss": 0.6848, + "step": 3315 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018301003245626738, + "loss": 0.7067, + "step": 3316 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018300000882438434, + "loss": 0.7753, + "step": 3317 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018298998251120552, + "loss": 0.7454, + "step": 3318 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018297995351705492, + "loss": 0.8116, + "step": 3319 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018296992184225642, + "loss": 0.7204, + "step": 3320 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018295988748713416, + "loss": 0.7121, + "step": 3321 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018294985045201227, + "loss": 0.7969, + "step": 3322 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018293981073721498, + "loss": 0.7602, + "step": 3323 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018292976834306663, + "loss": 0.763, + "step": 3324 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018291972326989165, + "loss": 0.7819, + "step": 3325 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018290967551801455, + "loss": 0.6559, + "step": 3326 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001828996250877599, + "loss": 0.7158, + "step": 3327 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018288957197945237, + "loss": 0.7867, + "step": 3328 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018287951619341678, + "loss": 0.7451, + "step": 3329 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001828694577299779, + "loss": 0.7483, + "step": 3330 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001828593965894607, + "loss": 0.7247, + "step": 3331 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001828493327721902, + "loss": 0.769, + "step": 3332 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018283926627849154, + "loss": 0.8108, + "step": 3333 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001828291971086899, + "loss": 0.7641, + "step": 3334 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018281912526311054, + "loss": 0.7479, + "step": 3335 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018280905074207884, + "loss": 0.8065, + "step": 3336 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018279897354592025, + "loss": 0.7116, + "step": 3337 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001827888936749603, + "loss": 0.7287, + "step": 3338 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001827788111295247, + "loss": 0.7357, + "step": 3339 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018276872590993902, + "loss": 0.7016, + "step": 3340 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001827586380165292, + "loss": 0.7086, + "step": 3341 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018274854744962103, + "loss": 0.8487, + "step": 3342 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018273845420954056, + "loss": 0.6508, + "step": 3343 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018272835829661376, + "loss": 0.7377, + "step": 3344 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018271825971116686, + "loss": 0.8693, + "step": 3345 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018270815845352608, + "loss": 0.7397, + "step": 3346 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001826980545240177, + "loss": 0.7549, + "step": 3347 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018268794792296813, + "loss": 0.7354, + "step": 3348 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018267783865070388, + "loss": 0.7251, + "step": 3349 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018266772670755152, + "loss": 0.7397, + "step": 3350 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018265761209383774, + "loss": 0.7254, + "step": 3351 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018264749480988924, + "loss": 0.678, + "step": 3352 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018263737485603289, + "loss": 0.7455, + "step": 3353 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018262725223259563, + "loss": 0.6759, + "step": 3354 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001826171269399044, + "loss": 0.7407, + "step": 3355 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018260699897828636, + "loss": 0.7502, + "step": 3356 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018259686834806867, + "loss": 0.7004, + "step": 3357 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018258673504957865, + "loss": 0.7524, + "step": 3358 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018257659908314352, + "loss": 0.8022, + "step": 3359 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018256646044909086, + "loss": 0.7713, + "step": 3360 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018255631914774808, + "loss": 0.7769, + "step": 3361 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018254617517944293, + "loss": 0.7183, + "step": 3362 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018253602854450299, + "loss": 0.7442, + "step": 3363 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001825258792432561, + "loss": 0.7573, + "step": 3364 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018251572727603005, + "loss": 0.8268, + "step": 3365 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018250557264315294, + "loss": 0.7255, + "step": 3366 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018249541534495268, + "loss": 0.757, + "step": 3367 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001824852553817575, + "loss": 0.6909, + "step": 3368 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018247509275389555, + "loss": 0.7323, + "step": 3369 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018246492746169514, + "loss": 0.7232, + "step": 3370 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001824547595054847, + "loss": 0.7326, + "step": 3371 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018244458888559266, + "loss": 0.83, + "step": 3372 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018243441560234758, + "loss": 0.6386, + "step": 3373 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018242423965607812, + "loss": 0.7591, + "step": 3374 + }, + { + "epoch": 0.77, + "learning_rate": 0.000182414061047113, + "loss": 0.7766, + "step": 3375 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018240387977578107, + "loss": 0.7966, + "step": 3376 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018239369584241118, + "loss": 0.7229, + "step": 3377 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018238350924733236, + "loss": 0.7072, + "step": 3378 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001823733199908737, + "loss": 0.6916, + "step": 3379 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018236312807336428, + "loss": 0.8907, + "step": 3380 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018235293349513345, + "loss": 0.75, + "step": 3381 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018234273625651048, + "loss": 0.7275, + "step": 3382 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001823325363578248, + "loss": 0.7414, + "step": 3383 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018232233379940596, + "loss": 0.7488, + "step": 3384 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001823121285815835, + "loss": 0.7163, + "step": 3385 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018230192070468708, + "loss": 0.7386, + "step": 3386 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018229171016904651, + "loss": 0.6998, + "step": 3387 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001822814969749916, + "loss": 0.7712, + "step": 3388 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018227128112285234, + "loss": 0.7904, + "step": 3389 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001822610626129587, + "loss": 0.7423, + "step": 3390 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018225084144564085, + "loss": 0.7556, + "step": 3391 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001822406176212289, + "loss": 0.7347, + "step": 3392 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018223039114005315, + "loss": 0.6986, + "step": 3393 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018222016200244396, + "loss": 0.78, + "step": 3394 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018220993020873184, + "loss": 0.7407, + "step": 3395 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001821996957592473, + "loss": 0.7271, + "step": 3396 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001821894586543209, + "loss": 0.6759, + "step": 3397 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018217921889428343, + "loss": 0.7515, + "step": 3398 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001821689764794656, + "loss": 0.6696, + "step": 3399 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018215873141019836, + "loss": 0.748, + "step": 3400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018214848368681267, + "loss": 0.7168, + "step": 3401 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018213823330963955, + "loss": 0.7278, + "step": 3402 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018212798027901012, + "loss": 0.7269, + "step": 3403 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018211772459525565, + "loss": 0.7739, + "step": 3404 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018210746625870746, + "loss": 0.7449, + "step": 3405 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018209720526969684, + "loss": 0.6946, + "step": 3406 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018208694162855538, + "loss": 0.8191, + "step": 3407 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001820766753356146, + "loss": 0.7374, + "step": 3408 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018206640639120613, + "loss": 0.7872, + "step": 3409 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018205613479566174, + "loss": 0.7604, + "step": 3410 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018204586054931324, + "loss": 0.6984, + "step": 3411 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018203558365249253, + "loss": 0.7657, + "step": 3412 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018202530410553163, + "loss": 0.7494, + "step": 3413 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001820150219087626, + "loss": 0.7854, + "step": 3414 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018200473706251756, + "loss": 0.6754, + "step": 3415 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018199444956712882, + "loss": 0.734, + "step": 3416 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001819841594229287, + "loss": 0.7456, + "step": 3417 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018197386663024962, + "loss": 0.7514, + "step": 3418 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001819635711894241, + "loss": 0.7562, + "step": 3419 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018195327310078466, + "loss": 0.7615, + "step": 3420 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001819429723646641, + "loss": 0.6933, + "step": 3421 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018193266898139507, + "loss": 0.7141, + "step": 3422 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018192236295131047, + "loss": 0.746, + "step": 3423 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018191205427474323, + "loss": 0.7665, + "step": 3424 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018190174295202636, + "loss": 0.8173, + "step": 3425 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018189142898349299, + "loss": 0.7141, + "step": 3426 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018188111236947628, + "loss": 0.8058, + "step": 3427 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001818707931103095, + "loss": 0.7152, + "step": 3428 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018186047120632604, + "loss": 0.727, + "step": 3429 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018185014665785936, + "loss": 0.7353, + "step": 3430 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018183981946524297, + "loss": 0.7716, + "step": 3431 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018182948962881045, + "loss": 0.7405, + "step": 3432 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018181915714889554, + "loss": 0.689, + "step": 3433 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018180882202583205, + "loss": 0.7522, + "step": 3434 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018179848425995383, + "loss": 0.7516, + "step": 3435 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018178814385159478, + "loss": 0.689, + "step": 3436 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001817778008010891, + "loss": 0.707, + "step": 3437 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018176745510877073, + "loss": 0.7012, + "step": 3438 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018175710677497404, + "loss": 0.7356, + "step": 3439 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018174675580003325, + "loss": 0.7405, + "step": 3440 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018173640218428275, + "loss": 0.7069, + "step": 3441 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018172604592805707, + "loss": 0.7742, + "step": 3442 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001817156870316907, + "loss": 0.7203, + "step": 3443 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018170532549551828, + "loss": 0.7576, + "step": 3444 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018169496131987457, + "loss": 0.777, + "step": 3445 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001816845945050944, + "loss": 0.7746, + "step": 3446 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001816742250515126, + "loss": 0.7732, + "step": 3447 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018166385295946422, + "loss": 0.7033, + "step": 3448 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018165347822928432, + "loss": 0.7828, + "step": 3449 + }, + { + "epoch": 0.79, + "learning_rate": 0.000181643100861308, + "loss": 0.7856, + "step": 3450 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018163272085587056, + "loss": 0.7975, + "step": 3451 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018162233821330729, + "loss": 0.7597, + "step": 3452 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001816119529339536, + "loss": 0.7631, + "step": 3453 + }, + { + "epoch": 0.79, + "learning_rate": 0.000181601565018145, + "loss": 0.7496, + "step": 3454 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018159117446621702, + "loss": 0.6918, + "step": 3455 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018158078127850538, + "loss": 0.7751, + "step": 3456 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018157038545534585, + "loss": 0.7606, + "step": 3457 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018155998699707416, + "loss": 0.6861, + "step": 3458 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018154958590402637, + "loss": 0.7342, + "step": 3459 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018153918217653835, + "loss": 0.8037, + "step": 3460 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001815287758149463, + "loss": 0.8329, + "step": 3461 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018151836681958632, + "loss": 0.7498, + "step": 3462 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018150795519079467, + "loss": 0.7447, + "step": 3463 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001814975409289078, + "loss": 0.7724, + "step": 3464 + }, + { + "epoch": 0.79, + "learning_rate": 0.000181487124034262, + "loss": 0.7118, + "step": 3465 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018147670450719384, + "loss": 0.6736, + "step": 3466 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018146628234803997, + "loss": 0.762, + "step": 3467 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018145585755713703, + "loss": 0.7416, + "step": 3468 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018144543013482177, + "loss": 0.6831, + "step": 3469 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018143500008143112, + "loss": 0.7969, + "step": 3470 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001814245673973019, + "loss": 0.7119, + "step": 3471 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018141413208277123, + "loss": 0.6822, + "step": 3472 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018140369413817623, + "loss": 0.754, + "step": 3473 + }, + { + "epoch": 0.79, + "learning_rate": 0.000181393253563854, + "loss": 0.7089, + "step": 3474 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001813828103601419, + "loss": 0.7506, + "step": 3475 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018137236452737732, + "loss": 0.7381, + "step": 3476 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018136191606589762, + "loss": 0.6867, + "step": 3477 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001813514649760404, + "loss": 0.7523, + "step": 3478 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018134101125814327, + "loss": 0.7483, + "step": 3479 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018133055491254392, + "loss": 0.7221, + "step": 3480 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018132009593958015, + "loss": 0.7273, + "step": 3481 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018130963433958983, + "loss": 0.695, + "step": 3482 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001812991701129109, + "loss": 0.7667, + "step": 3483 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018128870325988144, + "loss": 0.7763, + "step": 3484 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018127823378083954, + "loss": 0.7098, + "step": 3485 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001812677616761235, + "loss": 0.7441, + "step": 3486 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001812572869460715, + "loss": 0.6937, + "step": 3487 + }, + { + "epoch": 0.8, + "learning_rate": 0.000181246809591022, + "loss": 0.7418, + "step": 3488 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018123632961131346, + "loss": 0.6869, + "step": 3489 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018122584700728443, + "loss": 0.7095, + "step": 3490 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018121536177927348, + "loss": 0.7606, + "step": 3491 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018120487392761945, + "loss": 0.7558, + "step": 3492 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001811943834526611, + "loss": 0.7459, + "step": 3493 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018118389035473727, + "loss": 0.7361, + "step": 3494 + }, + { + "epoch": 0.8, + "learning_rate": 0.000181173394634187, + "loss": 0.758, + "step": 3495 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018116289629134934, + "loss": 0.7381, + "step": 3496 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018115239532656344, + "loss": 0.7154, + "step": 3497 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018114189174016848, + "loss": 0.6799, + "step": 3498 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018113138553250383, + "loss": 0.7157, + "step": 3499 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001811208767039089, + "loss": 0.7148, + "step": 3500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018111036525472312, + "loss": 0.7223, + "step": 3501 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018109985118528612, + "loss": 0.777, + "step": 3502 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001810893344959375, + "loss": 0.7228, + "step": 3503 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018107881518701703, + "loss": 0.7237, + "step": 3504 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018106829325886453, + "loss": 0.7747, + "step": 3505 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018105776871181993, + "loss": 0.74, + "step": 3506 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018104724154622317, + "loss": 0.6622, + "step": 3507 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018103671176241437, + "loss": 0.7018, + "step": 3508 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001810261793607337, + "loss": 0.6833, + "step": 3509 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018101564434152136, + "loss": 0.7112, + "step": 3510 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018100510670511773, + "loss": 0.7098, + "step": 3511 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001809945664518632, + "loss": 0.7525, + "step": 3512 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018098402358209824, + "loss": 0.7854, + "step": 3513 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001809734780961635, + "loss": 0.7937, + "step": 3514 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001809629299943996, + "loss": 0.775, + "step": 3515 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018095237927714735, + "loss": 0.7766, + "step": 3516 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018094182594474752, + "loss": 0.7746, + "step": 3517 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001809312699975411, + "loss": 0.7214, + "step": 3518 + }, + { + "epoch": 0.8, + "learning_rate": 0.000180920711435869, + "loss": 0.7364, + "step": 3519 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018091015026007242, + "loss": 0.758, + "step": 3520 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018089958647049247, + "loss": 0.7942, + "step": 3521 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018088902006747044, + "loss": 0.6805, + "step": 3522 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018087845105134765, + "loss": 0.6613, + "step": 3523 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018086787942246557, + "loss": 0.7344, + "step": 3524 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001808573051811657, + "loss": 0.7428, + "step": 3525 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018084672832778957, + "loss": 0.725, + "step": 3526 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018083614886267897, + "loss": 0.8221, + "step": 3527 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018082556678617563, + "loss": 0.8102, + "step": 3528 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018081498209862132, + "loss": 0.7951, + "step": 3529 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001808043948003581, + "loss": 0.7079, + "step": 3530 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018079380489172792, + "loss": 0.7847, + "step": 3531 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018078321237307288, + "loss": 0.7505, + "step": 3532 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018077261724473522, + "loss": 0.7646, + "step": 3533 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018076201950705719, + "loss": 0.7178, + "step": 3534 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001807514191603811, + "loss": 0.7532, + "step": 3535 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018074081620504946, + "loss": 0.6998, + "step": 3536 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018073021064140475, + "loss": 0.7464, + "step": 3537 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001807196024697896, + "loss": 0.7193, + "step": 3538 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001807089916905467, + "loss": 0.7355, + "step": 3539 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018069837830401886, + "loss": 0.7741, + "step": 3540 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018068776231054887, + "loss": 0.799, + "step": 3541 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018067714371047973, + "loss": 0.7243, + "step": 3542 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018066652250415448, + "loss": 0.6951, + "step": 3543 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018065589869191618, + "loss": 0.7437, + "step": 3544 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001806452722741081, + "loss": 0.7513, + "step": 3545 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001806346432510735, + "loss": 0.7473, + "step": 3546 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018062401162315572, + "loss": 0.7408, + "step": 3547 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018061337739069821, + "loss": 0.7113, + "step": 3548 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018060274055404455, + "loss": 0.7203, + "step": 3549 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018059210111353834, + "loss": 0.7334, + "step": 3550 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001805814590695233, + "loss": 0.7059, + "step": 3551 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018057081442234316, + "loss": 0.7218, + "step": 3552 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018056016717234185, + "loss": 0.7179, + "step": 3553 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018054951731986333, + "loss": 0.7841, + "step": 3554 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001805388648652516, + "loss": 0.7794, + "step": 3555 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018052820980885082, + "loss": 0.6963, + "step": 3556 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001805175521510052, + "loss": 0.7094, + "step": 3557 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018050689189205897, + "loss": 0.7755, + "step": 3558 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001804962290323566, + "loss": 0.7897, + "step": 3559 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018048556357224248, + "loss": 0.7673, + "step": 3560 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001804748955120612, + "loss": 0.7548, + "step": 3561 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018046422485215737, + "loss": 0.6875, + "step": 3562 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018045355159287572, + "loss": 0.8002, + "step": 3563 + }, + { + "epoch": 0.82, + "learning_rate": 0.000180442875734561, + "loss": 0.7297, + "step": 3564 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018043219727755812, + "loss": 0.697, + "step": 3565 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018042151622221208, + "loss": 0.7353, + "step": 3566 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001804108325688679, + "loss": 0.656, + "step": 3567 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001804001463178707, + "loss": 0.6711, + "step": 3568 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001803894574695657, + "loss": 0.6732, + "step": 3569 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001803787660242982, + "loss": 0.6818, + "step": 3570 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018036807198241358, + "loss": 0.7647, + "step": 3571 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018035737534425735, + "loss": 0.7589, + "step": 3572 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018034667611017505, + "loss": 0.7687, + "step": 3573 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018033597428051225, + "loss": 0.7568, + "step": 3574 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018032526985561475, + "loss": 0.7225, + "step": 3575 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001803145628358283, + "loss": 0.7723, + "step": 3576 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018030385322149886, + "loss": 0.7768, + "step": 3577 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001802931410129723, + "loss": 0.7663, + "step": 3578 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018028242621059474, + "loss": 0.7111, + "step": 3579 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018027170881471232, + "loss": 0.8046, + "step": 3580 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018026098882567126, + "loss": 0.683, + "step": 3581 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018025026624381784, + "loss": 0.6661, + "step": 3582 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018023954106949848, + "loss": 0.7438, + "step": 3583 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018022881330305963, + "loss": 0.7585, + "step": 3584 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018021808294484785, + "loss": 0.7903, + "step": 3585 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001802073499952098, + "loss": 0.6978, + "step": 3586 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001801966144544922, + "loss": 0.7808, + "step": 3587 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018018587632304188, + "loss": 0.7301, + "step": 3588 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018017513560120567, + "loss": 0.7108, + "step": 3589 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001801643922893306, + "loss": 0.712, + "step": 3590 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018015364638776372, + "loss": 0.7057, + "step": 3591 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018014289789685214, + "loss": 0.7012, + "step": 3592 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018013214681694315, + "loss": 0.7028, + "step": 3593 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018012139314838404, + "loss": 0.7099, + "step": 3594 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018011063689152214, + "loss": 0.7467, + "step": 3595 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018009987804670503, + "loss": 0.6943, + "step": 3596 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018008911661428022, + "loss": 0.7254, + "step": 3597 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001800783525945953, + "loss": 0.7677, + "step": 3598 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001800675859879981, + "loss": 0.7248, + "step": 3599 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001800568167948364, + "loss": 0.7081, + "step": 3600 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001800460450154581, + "loss": 0.7909, + "step": 3601 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018003527065021114, + "loss": 0.7852, + "step": 3602 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018002449369944361, + "loss": 0.7678, + "step": 3603 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018001371416350364, + "loss": 0.8182, + "step": 3604 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018000293204273953, + "loss": 0.787, + "step": 3605 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017999214733749948, + "loss": 0.7015, + "step": 3606 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017998136004813197, + "loss": 0.757, + "step": 3607 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017997057017498546, + "loss": 0.7423, + "step": 3608 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017995977771840852, + "loss": 0.7992, + "step": 3609 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001799489826787498, + "loss": 0.773, + "step": 3610 + }, + { + "epoch": 0.83, + "learning_rate": 0.000179938185056358, + "loss": 0.7713, + "step": 3611 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017992738485158197, + "loss": 0.7985, + "step": 3612 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017991658206477059, + "loss": 0.783, + "step": 3613 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017990577669627284, + "loss": 0.6982, + "step": 3614 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017989496874643777, + "loss": 0.7021, + "step": 3615 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001798841582156146, + "loss": 0.7493, + "step": 3616 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001798733451041525, + "loss": 0.7708, + "step": 3617 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017986252941240078, + "loss": 0.7327, + "step": 3618 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017985171114070884, + "loss": 0.6444, + "step": 3619 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001798408902894262, + "loss": 0.7222, + "step": 3620 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001798300668589024, + "loss": 0.7594, + "step": 3621 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017981924084948706, + "loss": 0.7058, + "step": 3622 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017980841226153, + "loss": 0.7543, + "step": 3623 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017979758109538095, + "loss": 0.6835, + "step": 3624 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017978674735138982, + "loss": 0.745, + "step": 3625 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017977591102990662, + "loss": 0.8116, + "step": 3626 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001797650721312814, + "loss": 0.73, + "step": 3627 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001797542306558643, + "loss": 0.8038, + "step": 3628 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017974338660400557, + "loss": 0.7233, + "step": 3629 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017973253997605548, + "loss": 0.7724, + "step": 3630 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001797216907723645, + "loss": 0.7178, + "step": 3631 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017971083899328308, + "loss": 0.7266, + "step": 3632 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017969998463916172, + "loss": 0.7259, + "step": 3633 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017968912771035119, + "loss": 0.7512, + "step": 3634 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017967826820720214, + "loss": 0.686, + "step": 3635 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001796674061300654, + "loss": 0.6915, + "step": 3636 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017965654147929184, + "loss": 0.6945, + "step": 3637 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001796456742552325, + "loss": 0.6924, + "step": 3638 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017963480445823838, + "loss": 0.7334, + "step": 3639 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017962393208866066, + "loss": 0.7315, + "step": 3640 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017961305714685058, + "loss": 0.7139, + "step": 3641 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017960217963315943, + "loss": 0.6965, + "step": 3642 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017959129954793861, + "loss": 0.6981, + "step": 3643 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001795804168915396, + "loss": 0.7111, + "step": 3644 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017956953166431398, + "loss": 0.7253, + "step": 3645 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001795586438666134, + "loss": 0.7826, + "step": 3646 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001795477534987895, + "loss": 0.7554, + "step": 3647 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001795368605611942, + "loss": 0.7539, + "step": 3648 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017952596505417932, + "loss": 0.7884, + "step": 3649 + }, + { + "epoch": 0.83, + "learning_rate": 0.00017951506697809692, + "loss": 0.781, + "step": 3650 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017950416633329895, + "loss": 0.7525, + "step": 3651 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017949326312013763, + "loss": 0.7307, + "step": 3652 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017948235733896515, + "loss": 0.668, + "step": 3653 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017947144899013387, + "loss": 0.793, + "step": 3654 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017946053807399613, + "loss": 0.7317, + "step": 3655 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001794496245909044, + "loss": 0.7367, + "step": 3656 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017943870854121124, + "loss": 0.7777, + "step": 3657 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017942778992526936, + "loss": 0.6818, + "step": 3658 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017941686874343142, + "loss": 0.7587, + "step": 3659 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017940594499605024, + "loss": 0.7402, + "step": 3660 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001793950186834787, + "loss": 0.7058, + "step": 3661 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017938408980606977, + "loss": 0.7705, + "step": 3662 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001793731583641765, + "loss": 0.7211, + "step": 3663 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017936222435815206, + "loss": 0.7898, + "step": 3664 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017935128778834964, + "loss": 0.7251, + "step": 3665 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017934034865512257, + "loss": 0.7601, + "step": 3666 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017932940695882424, + "loss": 0.7283, + "step": 3667 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017931846269980805, + "loss": 0.7239, + "step": 3668 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017930751587842765, + "loss": 0.6966, + "step": 3669 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017929656649503658, + "loss": 0.7066, + "step": 3670 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017928561454998865, + "loss": 0.695, + "step": 3671 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017927466004363758, + "loss": 0.7535, + "step": 3672 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017926370297633728, + "loss": 0.6617, + "step": 3673 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017925274334844174, + "loss": 0.7707, + "step": 3674 + }, + { + "epoch": 0.84, + "learning_rate": 0.000179241781160305, + "loss": 0.7573, + "step": 3675 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017923081641228115, + "loss": 0.7896, + "step": 3676 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017921984910472446, + "loss": 0.7003, + "step": 3677 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017920887923798917, + "loss": 0.8472, + "step": 3678 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017919790681242975, + "loss": 0.7435, + "step": 3679 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017918693182840054, + "loss": 0.688, + "step": 3680 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017917595428625614, + "loss": 0.7232, + "step": 3681 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017916497418635124, + "loss": 0.7513, + "step": 3682 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017915399152904047, + "loss": 0.7841, + "step": 3683 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017914300631467861, + "loss": 0.6829, + "step": 3684 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017913201854362058, + "loss": 0.6923, + "step": 3685 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017912102821622137, + "loss": 0.7256, + "step": 3686 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017911003533283593, + "loss": 0.7268, + "step": 3687 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017909903989381945, + "loss": 0.7311, + "step": 3688 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001790880418995271, + "loss": 0.7405, + "step": 3689 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001790770413503142, + "loss": 0.7722, + "step": 3690 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017906603824653606, + "loss": 0.6734, + "step": 3691 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017905503258854822, + "loss": 0.694, + "step": 3692 + }, + { + "epoch": 0.84, + "learning_rate": 0.00017904402437670614, + "loss": 0.799, + "step": 3693 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001790330136113655, + "loss": 0.7622, + "step": 3694 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017902200029288194, + "loss": 0.717, + "step": 3695 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001790109844216113, + "loss": 0.6928, + "step": 3696 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017899996599790937, + "loss": 0.7552, + "step": 3697 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017898894502213218, + "loss": 0.7671, + "step": 3698 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001789779214946357, + "loss": 0.705, + "step": 3699 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017896689541577608, + "loss": 0.7286, + "step": 3700 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017895586678590947, + "loss": 0.772, + "step": 3701 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017894483560539223, + "loss": 0.689, + "step": 3702 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017893380187458061, + "loss": 0.6885, + "step": 3703 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017892276559383112, + "loss": 0.7092, + "step": 3704 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001789117267635003, + "loss": 0.7138, + "step": 3705 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017890068538394473, + "loss": 0.6859, + "step": 3706 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017888964145552108, + "loss": 0.7367, + "step": 3707 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017887859497858614, + "loss": 0.7119, + "step": 3708 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001788675459534968, + "loss": 0.6952, + "step": 3709 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001788564943806099, + "loss": 0.7262, + "step": 3710 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017884544026028256, + "loss": 0.7183, + "step": 3711 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017883438359287182, + "loss": 0.7528, + "step": 3712 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001788233243787349, + "loss": 0.7477, + "step": 3713 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017881226261822907, + "loss": 0.7191, + "step": 3714 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017880119831171161, + "loss": 0.7716, + "step": 3715 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017879013145954002, + "loss": 0.7044, + "step": 3716 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017877906206207177, + "loss": 0.6626, + "step": 3717 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017876799011966448, + "loss": 0.7173, + "step": 3718 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017875691563267582, + "loss": 0.767, + "step": 3719 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017874583860146356, + "loss": 0.7557, + "step": 3720 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017873475902638553, + "loss": 0.749, + "step": 3721 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017872367690779966, + "loss": 0.7584, + "step": 3722 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001787125922460639, + "loss": 0.7942, + "step": 3723 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017870150504153642, + "loss": 0.7938, + "step": 3724 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017869041529457533, + "loss": 0.7827, + "step": 3725 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017867932300553893, + "loss": 0.757, + "step": 3726 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017866822817478553, + "loss": 0.7284, + "step": 3727 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017865713080267352, + "loss": 0.7686, + "step": 3728 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017864603088956144, + "loss": 0.7712, + "step": 3729 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017863492843580786, + "loss": 0.7672, + "step": 3730 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001786238234417714, + "loss": 0.6538, + "step": 3731 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017861271590781085, + "loss": 0.8068, + "step": 3732 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017860160583428508, + "loss": 0.7607, + "step": 3733 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017859049322155288, + "loss": 0.7582, + "step": 3734 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017857937806997333, + "loss": 0.6521, + "step": 3735 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017856826037990545, + "loss": 0.7514, + "step": 3736 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017855714015170847, + "loss": 0.774, + "step": 3737 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017854601738574152, + "loss": 0.6915, + "step": 3738 + }, + { + "epoch": 0.86, + "learning_rate": 0.000178534892082364, + "loss": 0.7392, + "step": 3739 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017852376424193532, + "loss": 0.7108, + "step": 3740 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017851263386481489, + "loss": 0.6994, + "step": 3741 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001785015009513623, + "loss": 0.6499, + "step": 3742 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017849036550193726, + "loss": 0.7151, + "step": 3743 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017847922751689941, + "loss": 0.7085, + "step": 3744 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017846808699660858, + "loss": 0.7268, + "step": 3745 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001784569439414247, + "loss": 0.7236, + "step": 3746 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017844579835170774, + "loss": 0.674, + "step": 3747 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001784346502278177, + "loss": 0.7083, + "step": 3748 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001784234995701148, + "loss": 0.725, + "step": 3749 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017841234637895918, + "loss": 0.684, + "step": 3750 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017840119065471116, + "loss": 0.7707, + "step": 3751 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017839003239773118, + "loss": 0.7248, + "step": 3752 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017837887160837966, + "loss": 0.7571, + "step": 3753 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001783677082870171, + "loss": 0.7593, + "step": 3754 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017835654243400425, + "loss": 0.7113, + "step": 3755 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017834537404970172, + "loss": 0.7518, + "step": 3756 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017833420313447034, + "loss": 0.7504, + "step": 3757 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017832302968867096, + "loss": 0.8169, + "step": 3758 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017831185371266453, + "loss": 0.7957, + "step": 3759 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017830067520681214, + "loss": 0.7519, + "step": 3760 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017828949417147486, + "loss": 0.7513, + "step": 3761 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001782783106070139, + "loss": 0.7451, + "step": 3762 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001782671245137906, + "loss": 0.7634, + "step": 3763 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017825593589216622, + "loss": 0.6833, + "step": 3764 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001782447447425023, + "loss": 0.7486, + "step": 3765 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001782335510651603, + "loss": 0.723, + "step": 3766 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017822235486050187, + "loss": 0.768, + "step": 3767 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017821115612888869, + "loss": 0.7227, + "step": 3768 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017819995487068253, + "loss": 0.7722, + "step": 3769 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017818875108624522, + "loss": 0.7718, + "step": 3770 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001781775447759387, + "loss": 0.735, + "step": 3771 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017816633594012506, + "loss": 0.7396, + "step": 3772 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001781551245791663, + "loss": 0.6986, + "step": 3773 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001781439106934247, + "loss": 0.6933, + "step": 3774 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017813269428326243, + "loss": 0.7058, + "step": 3775 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001781214753490419, + "loss": 0.7284, + "step": 3776 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017811025389112545, + "loss": 0.7159, + "step": 3777 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017809902990987571, + "loss": 0.7778, + "step": 3778 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017808780340565513, + "loss": 0.7495, + "step": 3779 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001780765743788265, + "loss": 0.7719, + "step": 3780 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017806534282975253, + "loss": 0.7314, + "step": 3781 + }, + { + "epoch": 0.87, + "learning_rate": 0.000178054108758796, + "loss": 0.669, + "step": 3782 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017804287216631988, + "loss": 0.6668, + "step": 3783 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017803163305268717, + "loss": 0.7088, + "step": 3784 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017802039141826093, + "loss": 0.6985, + "step": 3785 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017800914726340432, + "loss": 0.6983, + "step": 3786 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017799790058848055, + "loss": 0.6732, + "step": 3787 + }, + { + "epoch": 0.87, + "learning_rate": 0.000177986651393853, + "loss": 0.7656, + "step": 3788 + }, + { + "epoch": 0.87, + "learning_rate": 0.000177975399679885, + "loss": 0.8048, + "step": 3789 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017796414544694013, + "loss": 0.8274, + "step": 3790 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001779528886953819, + "loss": 0.7322, + "step": 3791 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017794162942557393, + "loss": 0.8289, + "step": 3792 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017793036763788, + "loss": 0.6809, + "step": 3793 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017791910333266387, + "loss": 0.6955, + "step": 3794 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017790783651028944, + "loss": 0.7131, + "step": 3795 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017789656717112076, + "loss": 0.7516, + "step": 3796 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017788529531552176, + "loss": 0.7415, + "step": 3797 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017787402094385666, + "loss": 0.7337, + "step": 3798 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017786274405648966, + "loss": 0.7386, + "step": 3799 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017785146465378504, + "loss": 0.7665, + "step": 3800 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001778401827361072, + "loss": 0.7355, + "step": 3801 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017782889830382056, + "loss": 0.8128, + "step": 3802 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017781761135728973, + "loss": 0.7794, + "step": 3803 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017780632189687924, + "loss": 0.7433, + "step": 3804 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017779502992295389, + "loss": 0.719, + "step": 3805 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017778373543587842, + "loss": 0.6649, + "step": 3806 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017777243843601764, + "loss": 0.7075, + "step": 3807 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001777611389237366, + "loss": 0.7363, + "step": 3808 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017774983689940025, + "loss": 0.8306, + "step": 3809 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017773853236337375, + "loss": 0.776, + "step": 3810 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017772722531602227, + "loss": 0.735, + "step": 3811 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017771591575771106, + "loss": 0.6973, + "step": 3812 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017770460368880553, + "loss": 0.7455, + "step": 3813 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017769328910967105, + "loss": 0.7407, + "step": 3814 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017768197202067316, + "loss": 0.7329, + "step": 3815 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017767065242217745, + "loss": 0.6722, + "step": 3816 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017765933031454965, + "loss": 0.7346, + "step": 3817 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001776480056981554, + "loss": 0.7142, + "step": 3818 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017763667857336065, + "loss": 0.6947, + "step": 3819 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017762534894053128, + "loss": 0.7382, + "step": 3820 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001776140168000333, + "loss": 0.6609, + "step": 3821 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001776026821522328, + "loss": 0.7452, + "step": 3822 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001775913449974959, + "loss": 0.7788, + "step": 3823 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001775800053361889, + "loss": 0.679, + "step": 3824 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017756866316867806, + "loss": 0.7504, + "step": 3825 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017755731849532984, + "loss": 0.6656, + "step": 3826 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001775459713165107, + "loss": 0.7475, + "step": 3827 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017753462163258723, + "loss": 0.682, + "step": 3828 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001775232694439261, + "loss": 0.7403, + "step": 3829 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017751191475089394, + "loss": 0.7283, + "step": 3830 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017750055755385766, + "loss": 0.7642, + "step": 3831 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001774891978531841, + "loss": 0.6881, + "step": 3832 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001774778356492403, + "loss": 0.7064, + "step": 3833 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017746647094239325, + "loss": 0.8064, + "step": 3834 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017745510373301006, + "loss": 0.7206, + "step": 3835 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017744373402145804, + "loss": 0.7644, + "step": 3836 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017743236180810438, + "loss": 0.7704, + "step": 3837 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017742098709331653, + "loss": 0.751, + "step": 3838 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017740960987746194, + "loss": 0.7394, + "step": 3839 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001773982301609081, + "loss": 0.7245, + "step": 3840 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017738684794402267, + "loss": 0.7367, + "step": 3841 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017737546322717335, + "loss": 0.6991, + "step": 3842 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017736407601072788, + "loss": 0.7892, + "step": 3843 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001773526862950542, + "loss": 0.7129, + "step": 3844 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017734129408052018, + "loss": 0.8246, + "step": 3845 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017732989936749389, + "loss": 0.7357, + "step": 3846 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017731850215634337, + "loss": 0.6763, + "step": 3847 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001773071024474369, + "loss": 0.7008, + "step": 3848 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017729570024114266, + "loss": 0.7174, + "step": 3849 + }, + { + "epoch": 0.88, + "learning_rate": 0.000177284295537829, + "loss": 0.7364, + "step": 3850 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017727288833786442, + "loss": 0.7526, + "step": 3851 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017726147864161734, + "loss": 0.7508, + "step": 3852 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017725006644945642, + "loss": 0.7128, + "step": 3853 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001772386517617503, + "loss": 0.7338, + "step": 3854 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017722723457886769, + "loss": 0.7564, + "step": 3855 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017721581490117748, + "loss": 0.7493, + "step": 3856 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017720439272904853, + "loss": 0.8075, + "step": 3857 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001771929680628499, + "loss": 0.7023, + "step": 3858 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017718154090295057, + "loss": 0.7542, + "step": 3859 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017717011124971974, + "loss": 0.6887, + "step": 3860 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017715867910352663, + "loss": 0.7362, + "step": 3861 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001771472444647406, + "loss": 0.6912, + "step": 3862 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017713580733373099, + "loss": 0.7312, + "step": 3863 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017712436771086728, + "loss": 0.7056, + "step": 3864 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017711292559651905, + "loss": 0.7298, + "step": 3865 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017710148099105591, + "loss": 0.7727, + "step": 3866 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017709003389484757, + "loss": 0.7615, + "step": 3867 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017707858430826386, + "loss": 0.7524, + "step": 3868 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001770671322316746, + "loss": 0.73, + "step": 3869 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017705567766544982, + "loss": 0.7491, + "step": 3870 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001770442206099595, + "loss": 0.7679, + "step": 3871 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001770327610655738, + "loss": 0.7146, + "step": 3872 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017702129903266286, + "loss": 0.6603, + "step": 3873 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017700983451159703, + "loss": 0.7056, + "step": 3874 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017699836750274662, + "loss": 0.7145, + "step": 3875 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017698689800648207, + "loss": 0.7159, + "step": 3876 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017697542602317388, + "loss": 0.7474, + "step": 3877 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017696395155319272, + "loss": 0.7577, + "step": 3878 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017695247459690924, + "loss": 0.6663, + "step": 3879 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001769409951546942, + "loss": 0.734, + "step": 3880 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001769295132269184, + "loss": 0.7741, + "step": 3881 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017691802881395277, + "loss": 0.7025, + "step": 3882 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017690654191616836, + "loss": 0.7989, + "step": 3883 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001768950525339362, + "loss": 0.7454, + "step": 3884 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017688356066762753, + "loss": 0.6688, + "step": 3885 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001768720663176135, + "loss": 0.764, + "step": 3886 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017686056948426545, + "loss": 0.8, + "step": 3887 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017684907016795483, + "loss": 0.6924, + "step": 3888 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017683756836905313, + "loss": 0.7146, + "step": 3889 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017682606408793184, + "loss": 0.7151, + "step": 3890 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017681455732496262, + "loss": 0.6748, + "step": 3891 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017680304808051728, + "loss": 0.7575, + "step": 3892 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017679153635496747, + "loss": 0.7248, + "step": 3893 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017678002214868523, + "loss": 0.6924, + "step": 3894 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017676850546204242, + "loss": 0.7151, + "step": 3895 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017675698629541112, + "loss": 0.7446, + "step": 3896 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017674546464916348, + "loss": 0.7484, + "step": 3897 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017673394052367168, + "loss": 0.637, + "step": 3898 + }, + { + "epoch": 0.89, + "learning_rate": 0.000176722413919308, + "loss": 0.734, + "step": 3899 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001767108848364448, + "loss": 0.7003, + "step": 3900 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017669935327545451, + "loss": 0.8089, + "step": 3901 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001766878192367097, + "loss": 0.6805, + "step": 3902 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001766762827205829, + "loss": 0.6957, + "step": 3903 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001766647437274469, + "loss": 0.7021, + "step": 3904 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001766532022576744, + "loss": 0.6858, + "step": 3905 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017664165831163823, + "loss": 0.8213, + "step": 3906 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017663011188971136, + "loss": 0.7186, + "step": 3907 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017661856299226676, + "loss": 0.6675, + "step": 3908 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017660701161967754, + "loss": 0.7519, + "step": 3909 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017659545777231684, + "loss": 0.7177, + "step": 3910 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001765839014505579, + "loss": 0.686, + "step": 3911 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017657234265477408, + "loss": 0.7419, + "step": 3912 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017656078138533873, + "loss": 0.6943, + "step": 3913 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017654921764262538, + "loss": 0.7431, + "step": 3914 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001765376514270076, + "loss": 0.702, + "step": 3915 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017652608273885901, + "loss": 0.7544, + "step": 3916 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017651451157855332, + "loss": 0.7461, + "step": 3917 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001765029379464644, + "loss": 0.6693, + "step": 3918 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017649136184296605, + "loss": 0.6945, + "step": 3919 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017647978326843227, + "loss": 0.7955, + "step": 3920 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001764682022232371, + "loss": 0.7689, + "step": 3921 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017645661870775465, + "loss": 0.752, + "step": 3922 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017644503272235915, + "loss": 0.6039, + "step": 3923 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001764334442674249, + "loss": 0.7791, + "step": 3924 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017642185334332616, + "loss": 0.7153, + "step": 3925 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001764102599504375, + "loss": 0.6949, + "step": 3926 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001763986640891334, + "loss": 0.7442, + "step": 3927 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017638706575978842, + "loss": 0.6922, + "step": 3928 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017637546496277728, + "loss": 0.7488, + "step": 3929 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017636386169847472, + "loss": 0.6723, + "step": 3930 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001763522559672556, + "loss": 0.7208, + "step": 3931 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017634064776949485, + "loss": 0.7223, + "step": 3932 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001763290371055674, + "loss": 0.7544, + "step": 3933 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017631742397584843, + "loss": 0.7193, + "step": 3934 + }, + { + "epoch": 0.9, + "learning_rate": 0.000176305808380713, + "loss": 0.732, + "step": 3935 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017629419032053645, + "loss": 0.7704, + "step": 3936 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017628256979569404, + "loss": 0.7051, + "step": 3937 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017627094680656118, + "loss": 0.7687, + "step": 3938 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001762593213535133, + "loss": 0.7516, + "step": 3939 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017624769343692605, + "loss": 0.7462, + "step": 3940 + }, + { + "epoch": 0.9, + "learning_rate": 0.000176236063057175, + "loss": 0.6954, + "step": 3941 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017622443021463591, + "loss": 0.7283, + "step": 3942 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017621279490968452, + "loss": 0.7078, + "step": 3943 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017620115714269674, + "loss": 0.7875, + "step": 3944 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017618951691404856, + "loss": 0.7684, + "step": 3945 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017617787422411594, + "loss": 0.6831, + "step": 3946 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017616622907327503, + "loss": 0.7263, + "step": 3947 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017615458146190207, + "loss": 0.8775, + "step": 3948 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017614293139037326, + "loss": 0.6574, + "step": 3949 + }, + { + "epoch": 0.9, + "learning_rate": 0.000176131278859065, + "loss": 0.7303, + "step": 3950 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017611962386835365, + "loss": 0.7711, + "step": 3951 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017610796641861581, + "loss": 0.7671, + "step": 3952 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017609630651022805, + "loss": 0.7715, + "step": 3953 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017608464414356703, + "loss": 0.7869, + "step": 3954 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001760729793190095, + "loss": 0.7519, + "step": 3955 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017606131203693227, + "loss": 0.7342, + "step": 3956 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017604964229771226, + "loss": 0.7263, + "step": 3957 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001760379701017265, + "loss": 0.7007, + "step": 3958 + }, + { + "epoch": 0.91, + "learning_rate": 0.000176026295449352, + "loss": 0.698, + "step": 3959 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017601461834096594, + "loss": 0.7376, + "step": 3960 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001760029387769455, + "loss": 0.6845, + "step": 3961 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017599125675766807, + "loss": 0.7662, + "step": 3962 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017597957228351093, + "loss": 0.7474, + "step": 3963 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017596788535485165, + "loss": 0.8055, + "step": 3964 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017595619597206768, + "loss": 0.8463, + "step": 3965 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017594450413553674, + "loss": 0.7222, + "step": 3966 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017593280984563644, + "loss": 0.7108, + "step": 3967 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017592111310274462, + "loss": 0.7884, + "step": 3968 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017590941390723913, + "loss": 0.7548, + "step": 3969 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017589771225949785, + "loss": 0.6766, + "step": 3970 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017588600815989888, + "loss": 0.7685, + "step": 3971 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001758743016088203, + "loss": 0.6425, + "step": 3972 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017586259260664026, + "loss": 0.7352, + "step": 3973 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017585088115373702, + "loss": 0.7615, + "step": 3974 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017583916725048894, + "loss": 0.8306, + "step": 3975 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017582745089727442, + "loss": 0.7011, + "step": 3976 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017581573209447194, + "loss": 0.6871, + "step": 3977 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017580401084246012, + "loss": 0.7234, + "step": 3978 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017579228714161759, + "loss": 0.6655, + "step": 3979 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017578056099232304, + "loss": 0.7159, + "step": 3980 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017576883239495532, + "loss": 0.681, + "step": 3981 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001757571013498933, + "loss": 0.7135, + "step": 3982 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017574536785751598, + "loss": 0.7086, + "step": 3983 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001757336319182024, + "loss": 0.7825, + "step": 3984 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017572189353233167, + "loss": 0.7793, + "step": 3985 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017571015270028298, + "loss": 0.7367, + "step": 3986 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017569840942243566, + "loss": 0.7609, + "step": 3987 + }, + { + "epoch": 0.91, + "learning_rate": 0.000175686663699169, + "loss": 0.7121, + "step": 3988 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017567491553086256, + "loss": 0.7723, + "step": 3989 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017566316491789577, + "loss": 0.741, + "step": 3990 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001756514118606482, + "loss": 0.7548, + "step": 3991 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017563965635949966, + "loss": 0.7659, + "step": 3992 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017562789841482984, + "loss": 0.7693, + "step": 3993 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017561613802701852, + "loss": 0.7654, + "step": 3994 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001756043751964457, + "loss": 0.6721, + "step": 3995 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017559260992349135, + "loss": 0.7191, + "step": 3996 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017558084220853552, + "loss": 0.728, + "step": 3997 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017556907205195835, + "loss": 0.7921, + "step": 3998 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017555729945414015, + "loss": 0.6883, + "step": 3999 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017554552441546118, + "loss": 0.7344, + "step": 4000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017553374693630183, + "loss": 0.7185, + "step": 4001 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017552196701704255, + "loss": 0.7164, + "step": 4002 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017551018465806394, + "loss": 0.7275, + "step": 4003 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017549839985974658, + "loss": 0.7314, + "step": 4004 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017548661262247118, + "loss": 0.7452, + "step": 4005 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001754748229466186, + "loss": 0.7259, + "step": 4006 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001754630308325696, + "loss": 0.6833, + "step": 4007 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017545123628070512, + "loss": 0.7711, + "step": 4008 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017543943929140626, + "loss": 0.7712, + "step": 4009 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017542763986505406, + "loss": 0.7305, + "step": 4010 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017541583800202975, + "loss": 0.6528, + "step": 4011 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017540403370271452, + "loss": 0.7301, + "step": 4012 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017539222696748974, + "loss": 0.7674, + "step": 4013 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017538041779673684, + "loss": 0.6842, + "step": 4014 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001753686061908373, + "loss": 0.7278, + "step": 4015 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017535679215017265, + "loss": 0.7899, + "step": 4016 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001753449756751246, + "loss": 0.6954, + "step": 4017 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017533315676607485, + "loss": 0.7284, + "step": 4018 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001753213354234052, + "loss": 0.741, + "step": 4019 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017530951164749757, + "loss": 0.7111, + "step": 4020 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017529768543873391, + "loss": 0.7609, + "step": 4021 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017528585679749622, + "loss": 0.6965, + "step": 4022 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001752740257241667, + "loss": 0.7715, + "step": 4023 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017526219221912745, + "loss": 0.751, + "step": 4024 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017525035628276084, + "loss": 0.7118, + "step": 4025 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017523851791544917, + "loss": 0.7886, + "step": 4026 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017522667711757493, + "loss": 0.7344, + "step": 4027 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017521483388952057, + "loss": 0.7038, + "step": 4028 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017520298823166873, + "loss": 0.709, + "step": 4029 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017519114014440205, + "loss": 0.6957, + "step": 4030 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017517928962810332, + "loss": 0.7512, + "step": 4031 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017516743668315532, + "loss": 0.6972, + "step": 4032 + }, + { + "epoch": 0.92, + "learning_rate": 0.000175155581309941, + "loss": 0.7655, + "step": 4033 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001751437235088433, + "loss": 0.7971, + "step": 4034 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001751318632802453, + "loss": 0.6921, + "step": 4035 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017512000062453018, + "loss": 0.6728, + "step": 4036 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017510813554208113, + "loss": 0.7658, + "step": 4037 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001750962680332814, + "loss": 0.7264, + "step": 4038 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017508439809851446, + "loss": 0.7455, + "step": 4039 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017507252573816372, + "loss": 0.8065, + "step": 4040 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017506065095261272, + "loss": 0.7081, + "step": 4041 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017504877374224505, + "loss": 0.7197, + "step": 4042 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017503689410744444, + "loss": 0.7322, + "step": 4043 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001750250120485946, + "loss": 0.7423, + "step": 4044 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017501312756607943, + "loss": 0.728, + "step": 4045 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017500124066028287, + "loss": 0.7003, + "step": 4046 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017498935133158884, + "loss": 0.73, + "step": 4047 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017497745958038152, + "loss": 0.7, + "step": 4048 + }, + { + "epoch": 0.93, + "learning_rate": 0.000174965565407045, + "loss": 0.7102, + "step": 4049 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017495366881196356, + "loss": 0.7852, + "step": 4050 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017494176979552148, + "loss": 0.7414, + "step": 4051 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001749298683581032, + "loss": 0.7295, + "step": 4052 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017491796450009315, + "loss": 0.7337, + "step": 4053 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001749060582218759, + "loss": 0.7538, + "step": 4054 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017489414952383609, + "loss": 0.826, + "step": 4055 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001748822384063584, + "loss": 0.7729, + "step": 4056 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001748703248698276, + "loss": 0.7017, + "step": 4057 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017485840891462865, + "loss": 0.7355, + "step": 4058 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017484649054114643, + "loss": 0.6612, + "step": 4059 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017483456974976593, + "loss": 0.7035, + "step": 4060 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017482264654087226, + "loss": 0.6407, + "step": 4061 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017481072091485067, + "loss": 0.7562, + "step": 4062 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001747987928720863, + "loss": 0.7414, + "step": 4063 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017478686241296457, + "loss": 0.7337, + "step": 4064 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017477492953787087, + "loss": 0.7392, + "step": 4065 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017476299424719067, + "loss": 0.6236, + "step": 4066 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001747510565413095, + "loss": 0.7613, + "step": 4067 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017473911642061315, + "loss": 0.7173, + "step": 4068 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017472717388548715, + "loss": 0.7208, + "step": 4069 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017471522893631748, + "loss": 0.7767, + "step": 4070 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017470328157348988, + "loss": 0.739, + "step": 4071 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017469133179739036, + "loss": 0.6726, + "step": 4072 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017467937960840497, + "loss": 0.7535, + "step": 4073 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017466742500691978, + "loss": 0.7182, + "step": 4074 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017465546799332103, + "loss": 0.7264, + "step": 4075 + }, + { + "epoch": 0.93, + "learning_rate": 0.000174643508567995, + "loss": 0.7235, + "step": 4076 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017463154673132798, + "loss": 0.7466, + "step": 4077 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001746195824837064, + "loss": 0.8107, + "step": 4078 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017460761582551682, + "loss": 0.7232, + "step": 4079 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017459564675714571, + "loss": 0.716, + "step": 4080 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001745836752789799, + "loss": 0.7216, + "step": 4081 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017457170139140594, + "loss": 0.7476, + "step": 4082 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017455972509481077, + "loss": 0.7228, + "step": 4083 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001745477463895812, + "loss": 0.7338, + "step": 4084 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017453576527610428, + "loss": 0.6972, + "step": 4085 + }, + { + "epoch": 0.93, + "learning_rate": 0.000174523781754767, + "loss": 0.669, + "step": 4086 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017451179582595648, + "loss": 0.7375, + "step": 4087 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017449980749005998, + "loss": 0.7266, + "step": 4088 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017448781674746472, + "loss": 0.7005, + "step": 4089 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017447582359855809, + "loss": 0.7218, + "step": 4090 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001744638280437275, + "loss": 0.7548, + "step": 4091 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001744518300833605, + "loss": 0.8237, + "step": 4092 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017443982971784464, + "loss": 0.7208, + "step": 4093 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017442782694756764, + "loss": 0.6925, + "step": 4094 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001744158217729172, + "loss": 0.7451, + "step": 4095 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017440381419428115, + "loss": 0.755, + "step": 4096 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017439180421204744, + "loss": 0.6942, + "step": 4097 + }, + { + "epoch": 0.94, + "learning_rate": 0.000174379791826604, + "loss": 0.7047, + "step": 4098 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001743677770383389, + "loss": 0.7973, + "step": 4099 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017435575984764027, + "loss": 0.7234, + "step": 4100 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017434374025489632, + "loss": 0.7643, + "step": 4101 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017433171826049536, + "loss": 0.7115, + "step": 4102 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017431969386482577, + "loss": 0.7263, + "step": 4103 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001743076670682759, + "loss": 0.7732, + "step": 4104 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001742956378712344, + "loss": 0.7753, + "step": 4105 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017428360627408978, + "loss": 0.7706, + "step": 4106 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001742715722772308, + "loss": 0.7376, + "step": 4107 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017425953588104615, + "loss": 0.723, + "step": 4108 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017424749708592468, + "loss": 0.7641, + "step": 4109 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001742354558922553, + "loss": 0.7822, + "step": 4110 + }, + { + "epoch": 0.94, + "learning_rate": 0.000174223412300427, + "loss": 0.6975, + "step": 4111 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017421136631082885, + "loss": 0.7476, + "step": 4112 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017419931792384997, + "loss": 0.7668, + "step": 4113 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001741872671398796, + "loss": 0.7758, + "step": 4114 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017417521395930705, + "loss": 0.7364, + "step": 4115 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017416315838252166, + "loss": 0.6875, + "step": 4116 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017415110040991294, + "loss": 0.7166, + "step": 4117 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017413904004187037, + "loss": 0.6916, + "step": 4118 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017412697727878357, + "loss": 0.7466, + "step": 4119 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001741149121210422, + "loss": 0.7578, + "step": 4120 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017410284456903608, + "loss": 0.7164, + "step": 4121 + }, + { + "epoch": 0.94, + "learning_rate": 0.000174090774623155, + "loss": 0.7327, + "step": 4122 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017407870228378888, + "loss": 0.7957, + "step": 4123 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017406662755132775, + "loss": 0.743, + "step": 4124 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017405455042616166, + "loss": 0.6354, + "step": 4125 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017404247090868077, + "loss": 0.685, + "step": 4126 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017403038899927524, + "loss": 0.7357, + "step": 4127 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017401830469833548, + "loss": 0.801, + "step": 4128 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001740062180062518, + "loss": 0.715, + "step": 4129 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017399412892341466, + "loss": 0.7848, + "step": 4130 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017398203745021463, + "loss": 0.7351, + "step": 4131 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001739699435870423, + "loss": 0.7116, + "step": 4132 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017395784733428835, + "loss": 0.784, + "step": 4133 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017394574869234356, + "loss": 0.7337, + "step": 4134 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017393364766159878, + "loss": 0.7563, + "step": 4135 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017392154424244492, + "loss": 0.7306, + "step": 4136 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017390943843527298, + "loss": 0.6884, + "step": 4137 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017389733024047405, + "loss": 0.8069, + "step": 4138 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017388521965843928, + "loss": 0.7024, + "step": 4139 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017387310668955986, + "loss": 0.6874, + "step": 4140 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017386099133422713, + "loss": 0.7097, + "step": 4141 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017384887359283248, + "loss": 0.6886, + "step": 4142 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017383675346576737, + "loss": 0.7251, + "step": 4143 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017382463095342326, + "loss": 0.7911, + "step": 4144 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001738125060561919, + "loss": 0.7226, + "step": 4145 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017380037877446491, + "loss": 0.7444, + "step": 4146 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017378824910863405, + "loss": 0.7252, + "step": 4147 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017377611705909115, + "loss": 0.7435, + "step": 4148 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001737639826262282, + "loss": 0.7423, + "step": 4149 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017375184581043716, + "loss": 0.7012, + "step": 4150 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017373970661211012, + "loss": 0.7026, + "step": 4151 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001737275650316392, + "loss": 0.7648, + "step": 4152 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017371542106941665, + "loss": 0.7238, + "step": 4153 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001737032747258348, + "loss": 0.7709, + "step": 4154 + }, + { + "epoch": 0.95, + "learning_rate": 0.000173691126001286, + "loss": 0.7218, + "step": 4155 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017367897489616274, + "loss": 0.7003, + "step": 4156 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017366682141085752, + "loss": 0.7725, + "step": 4157 + }, + { + "epoch": 0.95, + "learning_rate": 0.000173654665545763, + "loss": 0.7741, + "step": 4158 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017364250730127187, + "loss": 0.6379, + "step": 4159 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001736303466777769, + "loss": 0.6785, + "step": 4160 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001736181836756709, + "loss": 0.7495, + "step": 4161 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017360601829534677, + "loss": 0.7408, + "step": 4162 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001735938505371976, + "loss": 0.6767, + "step": 4163 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017358168040161641, + "loss": 0.6891, + "step": 4164 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017356950788899637, + "loss": 0.7254, + "step": 4165 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001735573329997307, + "loss": 0.7718, + "step": 4166 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001735451557342127, + "loss": 0.7264, + "step": 4167 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017353297609283577, + "loss": 0.7522, + "step": 4168 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017352079407599336, + "loss": 0.6659, + "step": 4169 + }, + { + "epoch": 0.95, + "learning_rate": 0.000173508609684079, + "loss": 0.7429, + "step": 4170 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017349642291748633, + "loss": 0.755, + "step": 4171 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017348423377660903, + "loss": 0.7335, + "step": 4172 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017347204226184087, + "loss": 0.7203, + "step": 4173 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017345984837357565, + "loss": 0.7473, + "step": 4174 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017344765211220735, + "loss": 0.7795, + "step": 4175 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017343545347812996, + "loss": 0.729, + "step": 4176 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017342325247173752, + "loss": 0.7839, + "step": 4177 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001734110490934242, + "loss": 0.7395, + "step": 4178 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017339884334358424, + "loss": 0.7116, + "step": 4179 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017338663522261188, + "loss": 0.7538, + "step": 4180 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001733744247309016, + "loss": 0.7584, + "step": 4181 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017336221186884784, + "loss": 0.7267, + "step": 4182 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017334999663684504, + "loss": 0.752, + "step": 4183 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001733377790352879, + "loss": 0.7166, + "step": 4184 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017332555906457108, + "loss": 0.6902, + "step": 4185 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017331333672508933, + "loss": 0.7194, + "step": 4186 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001733011120172375, + "loss": 0.712, + "step": 4187 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017328888494141053, + "loss": 0.753, + "step": 4188 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017327665549800338, + "loss": 0.7243, + "step": 4189 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017326442368741112, + "loss": 0.7976, + "step": 4190 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017325218951002894, + "loss": 0.7779, + "step": 4191 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017323995296625197, + "loss": 0.7091, + "step": 4192 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017322771405647562, + "loss": 0.8119, + "step": 4193 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017321547278109517, + "loss": 0.6764, + "step": 4194 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017320322914050616, + "loss": 0.7449, + "step": 4195 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017319098313510405, + "loss": 0.6812, + "step": 4196 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017317873476528446, + "loss": 0.7222, + "step": 4197 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017316648403144306, + "loss": 0.8074, + "step": 4198 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017315423093397565, + "loss": 0.8048, + "step": 4199 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017314197547327803, + "loss": 0.7474, + "step": 4200 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017312971764974614, + "loss": 0.7141, + "step": 4201 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001731174574637759, + "loss": 0.7133, + "step": 4202 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017310519491576344, + "loss": 0.6757, + "step": 4203 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017309293000610486, + "loss": 0.7442, + "step": 4204 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001730806627351964, + "loss": 0.6816, + "step": 4205 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017306839310343434, + "loss": 0.7223, + "step": 4206 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017305612111121504, + "loss": 0.7335, + "step": 4207 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017304384675893495, + "loss": 0.7614, + "step": 4208 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017303157004699063, + "loss": 0.6839, + "step": 4209 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017301929097577858, + "loss": 0.6954, + "step": 4210 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017300700954569557, + "loss": 0.7716, + "step": 4211 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001729947257571383, + "loss": 0.7431, + "step": 4212 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001729824396105036, + "loss": 0.7765, + "step": 4213 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017297015110618837, + "loss": 0.7512, + "step": 4214 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001729578602445896, + "loss": 0.7505, + "step": 4215 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017294556702610432, + "loss": 0.7239, + "step": 4216 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017293327145112973, + "loss": 0.6743, + "step": 4217 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017292097352006292, + "loss": 0.7004, + "step": 4218 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017290867323330127, + "loss": 0.7844, + "step": 4219 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017289637059124207, + "loss": 0.6896, + "step": 4220 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017288406559428282, + "loss": 0.753, + "step": 4221 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017287175824282094, + "loss": 0.7251, + "step": 4222 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001728594485372541, + "loss": 0.7586, + "step": 4223 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017284713647797997, + "loss": 0.6882, + "step": 4224 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017283482206539622, + "loss": 0.6766, + "step": 4225 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017282250529990068, + "loss": 0.797, + "step": 4226 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001728101861818913, + "loss": 0.6972, + "step": 4227 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017279786471176593, + "loss": 0.7184, + "step": 4228 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017278554088992273, + "loss": 0.6487, + "step": 4229 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001727732147167598, + "loss": 0.6427, + "step": 4230 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017276088619267525, + "loss": 0.7455, + "step": 4231 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017274855531806743, + "loss": 0.7067, + "step": 4232 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017273622209333466, + "loss": 0.772, + "step": 4233 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017272388651887538, + "loss": 0.7628, + "step": 4234 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017271154859508807, + "loss": 0.7918, + "step": 4235 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001726992083223713, + "loss": 0.7105, + "step": 4236 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001726868657011237, + "loss": 0.7671, + "step": 4237 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017267452073174404, + "loss": 0.7254, + "step": 4238 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017266217341463112, + "loss": 0.726, + "step": 4239 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017264982375018377, + "loss": 0.7848, + "step": 4240 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017263747173880097, + "loss": 0.7008, + "step": 4241 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001726251173808818, + "loss": 0.7324, + "step": 4242 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017261276067682526, + "loss": 0.6856, + "step": 4243 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017260040162703062, + "loss": 0.7241, + "step": 4244 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001725880402318971, + "loss": 0.729, + "step": 4245 + }, + { + "epoch": 0.97, + "learning_rate": 0.000172575676491824, + "loss": 0.7511, + "step": 4246 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017256331040721082, + "loss": 0.6552, + "step": 4247 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017255094197845693, + "loss": 0.7476, + "step": 4248 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017253857120596197, + "loss": 0.7671, + "step": 4249 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017252619809012557, + "loss": 0.73, + "step": 4250 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017251382263134735, + "loss": 0.7076, + "step": 4251 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017250144483002723, + "loss": 0.7691, + "step": 4252 + }, + { + "epoch": 0.97, + "learning_rate": 0.000172489064686565, + "loss": 0.7275, + "step": 4253 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001724766822013606, + "loss": 0.8123, + "step": 4254 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017246429737481403, + "loss": 0.6975, + "step": 4255 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017245191020732542, + "loss": 0.7161, + "step": 4256 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001724395206992949, + "loss": 0.83, + "step": 4257 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017242712885112276, + "loss": 0.6711, + "step": 4258 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017241473466320922, + "loss": 0.7369, + "step": 4259 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017240233813595478, + "loss": 0.7635, + "step": 4260 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017238993926975985, + "loss": 0.7284, + "step": 4261 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017237753806502494, + "loss": 0.7544, + "step": 4262 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017236513452215075, + "loss": 0.7629, + "step": 4263 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001723527286415379, + "loss": 0.7492, + "step": 4264 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001723403204235872, + "loss": 0.7892, + "step": 4265 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001723279098686995, + "loss": 0.76, + "step": 4266 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001723154969772757, + "loss": 0.75, + "step": 4267 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017230308174971678, + "loss": 0.6667, + "step": 4268 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017229066418642384, + "loss": 0.7265, + "step": 4269 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017227824428779805, + "loss": 0.6313, + "step": 4270 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017226582205424055, + "loss": 0.7116, + "step": 4271 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001722533974861527, + "loss": 0.7723, + "step": 4272 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017224097058393589, + "loss": 0.7318, + "step": 4273 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017222854134799152, + "loss": 0.6772, + "step": 4274 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017221610977872113, + "loss": 0.7073, + "step": 4275 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017220367587652627, + "loss": 0.7027, + "step": 4276 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017219123964180874, + "loss": 0.72, + "step": 4277 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017217880107497018, + "loss": 0.7698, + "step": 4278 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017216636017641245, + "loss": 0.7144, + "step": 4279 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017215391694653747, + "loss": 0.7556, + "step": 4280 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017214147138574717, + "loss": 0.7305, + "step": 4281 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017212902349444362, + "loss": 0.7869, + "step": 4282 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017211657327302897, + "loss": 0.8121, + "step": 4283 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017210412072190542, + "loss": 0.7677, + "step": 4284 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001720916658414752, + "loss": 0.7729, + "step": 4285 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017207920863214067, + "loss": 0.749, + "step": 4286 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017206674909430434, + "loss": 0.7378, + "step": 4287 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001720542872283686, + "loss": 0.7402, + "step": 4288 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001720418230347361, + "loss": 0.6911, + "step": 4289 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017202935651380947, + "loss": 0.7487, + "step": 4290 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017201688766599145, + "loss": 0.6953, + "step": 4291 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017200441649168485, + "loss": 0.6736, + "step": 4292 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017199194299129247, + "loss": 0.7249, + "step": 4293 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017197946716521737, + "loss": 0.6796, + "step": 4294 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017196698901386252, + "loss": 0.7469, + "step": 4295 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017195450853763107, + "loss": 0.7292, + "step": 4296 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017194202573692614, + "loss": 0.7906, + "step": 4297 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017192954061215102, + "loss": 0.7165, + "step": 4298 + }, + { + "epoch": 0.98, + "learning_rate": 0.000171917053163709, + "loss": 0.68, + "step": 4299 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017190456339200353, + "loss": 0.7707, + "step": 4300 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001718920712974381, + "loss": 0.7054, + "step": 4301 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001718795768804162, + "loss": 0.7248, + "step": 4302 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001718670801413415, + "loss": 0.7028, + "step": 4303 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017185458108061772, + "loss": 0.7084, + "step": 4304 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017184207969864863, + "loss": 0.7971, + "step": 4305 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017182957599583805, + "loss": 0.7654, + "step": 4306 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017181706997258992, + "loss": 0.7064, + "step": 4307 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017180456162930828, + "loss": 0.7958, + "step": 4308 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017179205096639718, + "loss": 0.6892, + "step": 4309 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001717795379842608, + "loss": 0.8037, + "step": 4310 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001717670226833033, + "loss": 0.6956, + "step": 4311 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017175450506392908, + "loss": 0.7324, + "step": 4312 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017174198512654246, + "loss": 0.7462, + "step": 4313 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001717294628715479, + "loss": 0.8464, + "step": 4314 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017171693829934993, + "loss": 0.7019, + "step": 4315 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017170441141035317, + "loss": 0.8391, + "step": 4316 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017169188220496226, + "loss": 0.7351, + "step": 4317 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017167935068358202, + "loss": 0.7351, + "step": 4318 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017166681684661718, + "loss": 0.6751, + "step": 4319 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017165428069447274, + "loss": 0.7195, + "step": 4320 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017164174222755363, + "loss": 0.7506, + "step": 4321 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001716292014462649, + "loss": 0.6923, + "step": 4322 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017161665835101168, + "loss": 0.7351, + "step": 4323 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001716041129421992, + "loss": 0.8036, + "step": 4324 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017159156522023267, + "loss": 0.7866, + "step": 4325 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017157901518551752, + "loss": 0.7301, + "step": 4326 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017156646283845913, + "loss": 0.7746, + "step": 4327 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017155390817946305, + "loss": 0.8186, + "step": 4328 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017154135120893478, + "loss": 0.7578, + "step": 4329 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017152879192728002, + "loss": 0.7295, + "step": 4330 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017151623033490447, + "loss": 0.7535, + "step": 4331 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017150366643221393, + "loss": 0.7958, + "step": 4332 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001714911002196143, + "loss": 0.7965, + "step": 4333 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001714785316975115, + "loss": 0.7743, + "step": 4334 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017146596086631158, + "loss": 0.6943, + "step": 4335 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017145338772642064, + "loss": 0.7352, + "step": 4336 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001714408122782448, + "loss": 0.6326, + "step": 4337 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017142823452219038, + "loss": 0.7469, + "step": 4338 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017141565445866362, + "loss": 0.7244, + "step": 4339 + }, + { + "epoch": 0.99, + "learning_rate": 0.000171403072088071, + "loss": 0.7564, + "step": 4340 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017139048741081892, + "loss": 0.8202, + "step": 4341 + }, + { + "epoch": 0.99, + "learning_rate": 0.000171377900427314, + "loss": 0.6964, + "step": 4342 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017136531113796276, + "loss": 0.7702, + "step": 4343 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017135271954317198, + "loss": 0.7357, + "step": 4344 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001713401256433484, + "loss": 0.7455, + "step": 4345 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017132752943889887, + "loss": 0.7258, + "step": 4346 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001713149309302303, + "loss": 0.7108, + "step": 4347 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017130233011774964, + "loss": 0.7307, + "step": 4348 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017128972700186404, + "loss": 0.7176, + "step": 4349 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001712771215829806, + "loss": 0.7234, + "step": 4350 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001712645138615065, + "loss": 0.8093, + "step": 4351 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017125190383784908, + "loss": 0.7529, + "step": 4352 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001712392915124157, + "loss": 0.7137, + "step": 4353 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017122667688561377, + "loss": 0.7474, + "step": 4354 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001712140599578508, + "loss": 0.7299, + "step": 4355 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017120144072953441, + "loss": 0.8096, + "step": 4356 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017118881920107223, + "loss": 0.7291, + "step": 4357 + }, + { + "epoch": 1.0, + "learning_rate": 0.000171176195372872, + "loss": 0.7357, + "step": 4358 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001711635692453416, + "loss": 0.7234, + "step": 4359 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017115094081888882, + "loss": 0.7231, + "step": 4360 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017113831009392164, + "loss": 0.7046, + "step": 4361 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001711256770708481, + "loss": 0.7473, + "step": 4362 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017111304175007632, + "loss": 0.6763, + "step": 4363 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017110040413201446, + "loss": 0.7138, + "step": 4364 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017108776421707083, + "loss": 0.7548, + "step": 4365 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001710751220056537, + "loss": 0.7883, + "step": 4366 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001710624774981715, + "loss": 0.7274, + "step": 4367 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001710498306950327, + "loss": 0.6949, + "step": 4368 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017103718159664578, + "loss": 0.7667, + "step": 4369 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001710245302034195, + "loss": 0.7641, + "step": 4370 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017101187651576252, + "loss": 0.755, + "step": 4371 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017099922053408356, + "loss": 0.7505, + "step": 4372 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017098656225879148, + "loss": 0.7206, + "step": 4373 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017097390169029526, + "loss": 0.7398, + "step": 4374 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017096123882900385, + "loss": 0.7117, + "step": 4375 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017094857367532632, + "loss": 0.6821, + "step": 4376 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017093590622967183, + "loss": 0.8027, + "step": 4377 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001709232364924496, + "loss": 0.7603, + "step": 4378 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001709105644640689, + "loss": 0.6887, + "step": 4379 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017089789014493914, + "loss": 0.6973, + "step": 4380 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001708852135354697, + "loss": 0.7662, + "step": 4381 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017087253463607017, + "loss": 0.7176, + "step": 4382 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017085985344715007, + "loss": 0.7354, + "step": 4383 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017084716996911911, + "loss": 0.6403, + "step": 4384 + }, + { + "epoch": 1.0, + "learning_rate": 0.000170834484202387, + "loss": 0.764, + "step": 4385 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017082179614736357, + "loss": 0.7486, + "step": 4386 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017080910580445868, + "loss": 0.7429, + "step": 4387 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001707964131740823, + "loss": 0.737, + "step": 4388 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017078371825664446, + "loss": 0.8207, + "step": 4389 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017077102105255532, + "loss": 0.7753, + "step": 4390 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017075832156222495, + "loss": 0.7339, + "step": 4391 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001707456197860637, + "loss": 0.7441, + "step": 4392 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017073291572448183, + "loss": 0.7748, + "step": 4393 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017072020937788983, + "loss": 0.6995, + "step": 4394 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017070750074669804, + "loss": 0.7301, + "step": 4395 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017069478983131715, + "loss": 0.7666, + "step": 4396 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001706820766321577, + "loss": 0.6438, + "step": 4397 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017066936114963044, + "loss": 0.7347, + "step": 4398 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017065664338414608, + "loss": 0.7039, + "step": 4399 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001706439233361155, + "loss": 0.7016, + "step": 4400 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001706312010059496, + "loss": 0.7203, + "step": 4401 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017061847639405939, + "loss": 0.7351, + "step": 4402 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017060574950085596, + "loss": 0.729, + "step": 4403 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001705930203267504, + "loss": 0.7687, + "step": 4404 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017058028887215394, + "loss": 0.7348, + "step": 4405 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017056755513747787, + "loss": 0.7007, + "step": 4406 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017055481912313356, + "loss": 0.7054, + "step": 4407 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001705420808295324, + "loss": 0.8184, + "step": 4408 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017052934025708595, + "loss": 0.7809, + "step": 4409 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017051659740620578, + "loss": 0.7911, + "step": 4410 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017050385227730352, + "loss": 0.7316, + "step": 4411 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017049110487079094, + "loss": 0.7751, + "step": 4412 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017047835518707978, + "loss": 0.8124, + "step": 4413 + }, + { + "epoch": 1.01, + "learning_rate": 0.000170465603226582, + "loss": 0.6982, + "step": 4414 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017045284898970948, + "loss": 0.7692, + "step": 4415 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017044009247687428, + "loss": 0.7316, + "step": 4416 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017042733368848848, + "loss": 0.6934, + "step": 4417 + }, + { + "epoch": 1.01, + "eval_loss": 0.7041641473770142, + "eval_runtime": 1184.5539, + "eval_samples_per_second": 42.264, + "eval_steps_per_second": 10.566, + "step": 4417 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017041457262496424, + "loss": 0.6994, + "step": 4418 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001704018092867138, + "loss": 0.7303, + "step": 4419 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017038904367414949, + "loss": 0.6814, + "step": 4420 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001703762757876837, + "loss": 0.7598, + "step": 4421 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017036350562772893, + "loss": 0.7525, + "step": 4422 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017035073319469766, + "loss": 0.7809, + "step": 4423 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001703379584890025, + "loss": 0.7656, + "step": 4424 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001703251815110562, + "loss": 0.7949, + "step": 4425 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001703124022612714, + "loss": 0.7866, + "step": 4426 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017029962074006108, + "loss": 0.7684, + "step": 4427 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017028683694783804, + "loss": 0.7035, + "step": 4428 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017027405088501527, + "loss": 0.7393, + "step": 4429 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017026126255200583, + "loss": 0.7206, + "step": 4430 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017024847194922285, + "loss": 0.7572, + "step": 4431 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017023567907707956, + "loss": 0.7176, + "step": 4432 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017022288393598917, + "loss": 0.7166, + "step": 4433 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017021008652636508, + "loss": 0.7394, + "step": 4434 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017019728684862067, + "loss": 0.7315, + "step": 4435 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001701844849031694, + "loss": 0.738, + "step": 4436 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017017168069042492, + "loss": 0.6766, + "step": 4437 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001701588742108008, + "loss": 0.6461, + "step": 4438 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017014606546471078, + "loss": 0.8207, + "step": 4439 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001701332544525686, + "loss": 0.7553, + "step": 4440 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001701204411747882, + "loss": 0.6906, + "step": 4441 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017010762563178343, + "loss": 0.7116, + "step": 4442 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001700948078239683, + "loss": 0.6754, + "step": 4443 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017008198775175695, + "loss": 0.7735, + "step": 4444 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001700691654155635, + "loss": 0.7259, + "step": 4445 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017005634081580212, + "loss": 0.7872, + "step": 4446 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017004351395288718, + "loss": 0.7034, + "step": 4447 + }, + { + "epoch": 1.01, + "learning_rate": 0.000170030684827233, + "loss": 0.7923, + "step": 4448 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017001785343925406, + "loss": 0.8353, + "step": 4449 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017000501978936482, + "loss": 0.6914, + "step": 4450 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016999218387797992, + "loss": 0.7056, + "step": 4451 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016997934570551403, + "loss": 0.6592, + "step": 4452 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016996650527238181, + "loss": 0.7273, + "step": 4453 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016995366257899813, + "loss": 0.7073, + "step": 4454 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016994081762577786, + "loss": 0.7629, + "step": 4455 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016992797041313597, + "loss": 0.7323, + "step": 4456 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016991512094148743, + "loss": 0.7269, + "step": 4457 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016990226921124738, + "loss": 0.7291, + "step": 4458 + }, + { + "epoch": 1.01, + "learning_rate": 0.000169889415222831, + "loss": 0.7373, + "step": 4459 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016987655897665352, + "loss": 0.7431, + "step": 4460 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001698637004731302, + "loss": 0.6702, + "step": 4461 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016985083971267654, + "loss": 0.7688, + "step": 4462 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016983797669570795, + "loss": 0.7513, + "step": 4463 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016982511142263996, + "loss": 0.7799, + "step": 4464 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001698122438938882, + "loss": 0.7001, + "step": 4465 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016979937410986832, + "loss": 0.6744, + "step": 4466 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001697865020709961, + "loss": 0.6786, + "step": 4467 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016977362777768738, + "loss": 0.7162, + "step": 4468 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016976075123035806, + "loss": 0.7517, + "step": 4469 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001697478724294241, + "loss": 0.6957, + "step": 4470 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001697349913753015, + "loss": 0.6566, + "step": 4471 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016972210806840646, + "loss": 0.7075, + "step": 4472 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016970922250915513, + "loss": 0.7304, + "step": 4473 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001696963346979638, + "loss": 0.7078, + "step": 4474 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016968344463524877, + "loss": 0.6873, + "step": 4475 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016967055232142646, + "loss": 0.7399, + "step": 4476 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016965765775691337, + "loss": 0.7057, + "step": 4477 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001696447609421261, + "loss": 0.7093, + "step": 4478 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016963186187748117, + "loss": 0.6943, + "step": 4479 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016961896056339536, + "loss": 0.7757, + "step": 4480 + }, + { + "epoch": 1.01, + "learning_rate": 0.00016960605700028542, + "loss": 0.6866, + "step": 4481 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001695931511885682, + "loss": 0.658, + "step": 4482 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016958024312866064, + "loss": 0.7383, + "step": 4483 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001695673328209797, + "loss": 0.7988, + "step": 4484 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016955442026594245, + "loss": 0.7045, + "step": 4485 + }, + { + "epoch": 1.02, + "learning_rate": 0.000169541505463966, + "loss": 0.722, + "step": 4486 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016952858841546764, + "loss": 0.7654, + "step": 4487 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016951566912086458, + "loss": 0.7195, + "step": 4488 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016950274758057422, + "loss": 0.6172, + "step": 4489 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016948982379501394, + "loss": 0.7347, + "step": 4490 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001694768977646013, + "loss": 0.7548, + "step": 4491 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016946396948975382, + "loss": 0.6978, + "step": 4492 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016945103897088917, + "loss": 0.7107, + "step": 4493 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016943810620842506, + "loss": 0.7051, + "step": 4494 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001694251712027793, + "loss": 0.6452, + "step": 4495 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016941223395436968, + "loss": 0.7121, + "step": 4496 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016939929446361423, + "loss": 0.7298, + "step": 4497 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001693863527309309, + "loss": 0.7501, + "step": 4498 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001693734087567378, + "loss": 0.7216, + "step": 4499 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016936046254145305, + "loss": 0.7831, + "step": 4500 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001693475140854949, + "loss": 0.6855, + "step": 4501 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016933456338928164, + "loss": 0.7383, + "step": 4502 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016932161045323164, + "loss": 0.6914, + "step": 4503 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016930865527776335, + "loss": 0.7126, + "step": 4504 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016929569786329525, + "loss": 0.7604, + "step": 4505 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016928273821024593, + "loss": 0.7841, + "step": 4506 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016926977631903412, + "loss": 0.7275, + "step": 4507 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016925681219007846, + "loss": 0.7242, + "step": 4508 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016924384582379782, + "loss": 0.6939, + "step": 4509 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016923087722061104, + "loss": 0.6783, + "step": 4510 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016921790638093706, + "loss": 0.6509, + "step": 4511 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016920493330519493, + "loss": 0.7402, + "step": 4512 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001691919579938037, + "loss": 0.7496, + "step": 4513 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001691789804471826, + "loss": 0.7121, + "step": 4514 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016916600066575082, + "loss": 0.67, + "step": 4515 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016915301864992764, + "loss": 0.6564, + "step": 4516 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001691400344001325, + "loss": 0.6752, + "step": 4517 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016912704791678483, + "loss": 0.7225, + "step": 4518 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016911405920030416, + "loss": 0.7249, + "step": 4519 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001691010682511101, + "loss": 0.726, + "step": 4520 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001690880750696223, + "loss": 0.7546, + "step": 4521 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016907507965626049, + "loss": 0.7217, + "step": 4522 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001690620820114445, + "loss": 0.6897, + "step": 4523 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001690490821355942, + "loss": 0.7549, + "step": 4524 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016903608002912955, + "loss": 0.6886, + "step": 4525 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016902307569247065, + "loss": 0.7055, + "step": 4526 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001690100691260375, + "loss": 0.7272, + "step": 4527 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016899706033025034, + "loss": 0.6974, + "step": 4528 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001689840493055294, + "loss": 0.7435, + "step": 4529 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016897103605229496, + "loss": 0.6265, + "step": 4530 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016895802057096746, + "loss": 0.731, + "step": 4531 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016894500286196733, + "loss": 0.7098, + "step": 4532 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016893198292571512, + "loss": 0.6377, + "step": 4533 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016891896076263148, + "loss": 0.6211, + "step": 4534 + }, + { + "epoch": 1.03, + "learning_rate": 0.000168905936373137, + "loss": 0.7304, + "step": 4535 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016889290975765245, + "loss": 0.7186, + "step": 4536 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016887988091659873, + "loss": 0.7337, + "step": 4537 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016886684985039665, + "loss": 0.7052, + "step": 4538 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001688538165594672, + "loss": 0.825, + "step": 4539 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016884078104423145, + "loss": 0.7154, + "step": 4540 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016882774330511047, + "loss": 0.7332, + "step": 4541 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016881470334252546, + "loss": 0.7093, + "step": 4542 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016880166115689762, + "loss": 0.7632, + "step": 4543 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001687886167486483, + "loss": 0.7251, + "step": 4544 + }, + { + "epoch": 1.03, + "learning_rate": 0.000168775570118199, + "loss": 0.7627, + "step": 4545 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016876252126597104, + "loss": 0.6738, + "step": 4546 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016874947019238605, + "loss": 0.668, + "step": 4547 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001687364168978656, + "loss": 0.7307, + "step": 4548 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001687233613828314, + "loss": 0.7016, + "step": 4549 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001687103036477052, + "loss": 0.6806, + "step": 4550 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016869724369290883, + "loss": 0.6595, + "step": 4551 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016868418151886415, + "loss": 0.7271, + "step": 4552 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001686711171259932, + "loss": 0.6875, + "step": 4553 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016865805051471795, + "loss": 0.7032, + "step": 4554 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016864498168546054, + "loss": 0.6236, + "step": 4555 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001686319106386432, + "loss": 0.7177, + "step": 4556 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001686188373746881, + "loss": 0.7782, + "step": 4557 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016860576189401766, + "loss": 0.6658, + "step": 4558 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016859268419705422, + "loss": 0.762, + "step": 4559 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001685796042842203, + "loss": 0.6947, + "step": 4560 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016856652215593835, + "loss": 0.6746, + "step": 4561 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016855343781263112, + "loss": 0.6971, + "step": 4562 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016854035125472117, + "loss": 0.7167, + "step": 4563 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016852726248263133, + "loss": 0.7997, + "step": 4564 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016851417149678444, + "loss": 0.7078, + "step": 4565 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016850107829760336, + "loss": 0.7614, + "step": 4566 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016848798288551103, + "loss": 0.704, + "step": 4567 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001684748852609306, + "loss": 0.7294, + "step": 4568 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001684617854242851, + "loss": 0.7165, + "step": 4569 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016844868337599777, + "loss": 0.6535, + "step": 4570 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001684355791164918, + "loss": 0.7564, + "step": 4571 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001684224726461906, + "loss": 0.6719, + "step": 4572 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001684093639655175, + "loss": 0.6908, + "step": 4573 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016839625307489607, + "loss": 0.6948, + "step": 4574 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016838313997474976, + "loss": 0.7339, + "step": 4575 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001683700246655022, + "loss": 0.7143, + "step": 4576 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001683569071475771, + "loss": 0.6861, + "step": 4577 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001683437874213982, + "loss": 0.7691, + "step": 4578 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016833066548738935, + "loss": 0.7382, + "step": 4579 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016831754134597447, + "loss": 0.7852, + "step": 4580 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001683044149975775, + "loss": 0.6743, + "step": 4581 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016829128644262243, + "loss": 0.7501, + "step": 4582 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016827815568153348, + "loss": 0.6963, + "step": 4583 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016826502271473479, + "loss": 0.7082, + "step": 4584 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001682518875426506, + "loss": 0.6999, + "step": 4585 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016823875016570527, + "loss": 0.7034, + "step": 4586 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016822561058432318, + "loss": 0.7197, + "step": 4587 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016821246879892882, + "loss": 0.72, + "step": 4588 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016819932480994671, + "loss": 0.7049, + "step": 4589 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016818617861780148, + "loss": 0.7279, + "step": 4590 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001681730302229178, + "loss": 0.788, + "step": 4591 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016815987962572043, + "loss": 0.7086, + "step": 4592 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001681467268266342, + "loss": 0.7429, + "step": 4593 + }, + { + "epoch": 1.04, + "learning_rate": 0.000168133571826084, + "loss": 0.6684, + "step": 4594 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016812041462449485, + "loss": 0.6941, + "step": 4595 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016810725522229173, + "loss": 0.6956, + "step": 4596 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001680940936198997, + "loss": 0.7047, + "step": 4597 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001680809298177441, + "loss": 0.7021, + "step": 4598 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016806776381625004, + "loss": 0.7037, + "step": 4599 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016805459561584294, + "loss": 0.7009, + "step": 4600 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016804142521694812, + "loss": 0.706, + "step": 4601 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016802825261999109, + "loss": 0.6287, + "step": 4602 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001680150778253974, + "loss": 0.6344, + "step": 4603 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016800190083359262, + "loss": 0.7687, + "step": 4604 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016798872164500245, + "loss": 0.7098, + "step": 4605 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016797554026005265, + "loss": 0.7146, + "step": 4606 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016796235667916902, + "loss": 0.724, + "step": 4607 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016794917090277747, + "loss": 0.6858, + "step": 4608 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016793598293130395, + "loss": 0.7047, + "step": 4609 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016792279276517448, + "loss": 0.6871, + "step": 4610 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016790960040481522, + "loss": 0.6471, + "step": 4611 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001678964058506523, + "loss": 0.6673, + "step": 4612 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016788320910311196, + "loss": 0.7347, + "step": 4613 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016787001016262053, + "loss": 0.7735, + "step": 4614 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016785680902960443, + "loss": 0.7158, + "step": 4615 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001678436057044901, + "loss": 0.7207, + "step": 4616 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016783040018770407, + "loss": 0.7078, + "step": 4617 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001678171924796729, + "loss": 0.7421, + "step": 4618 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001678039825808233, + "loss": 0.7153, + "step": 4619 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016779077049158204, + "loss": 0.7658, + "step": 4620 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016777755621237588, + "loss": 0.7445, + "step": 4621 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016776433974363173, + "loss": 0.705, + "step": 4622 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016775112108577655, + "loss": 0.6745, + "step": 4623 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016773790023923737, + "loss": 0.7111, + "step": 4624 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016772467720444121, + "loss": 0.6843, + "step": 4625 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016771145198181539, + "loss": 0.8, + "step": 4626 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016769822457178704, + "loss": 0.6948, + "step": 4627 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016768499497478345, + "loss": 0.7483, + "step": 4628 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016767176319123207, + "loss": 0.7461, + "step": 4629 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001676585292215603, + "loss": 0.7626, + "step": 4630 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016764529306619567, + "loss": 0.7063, + "step": 4631 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001676320547255658, + "loss": 0.693, + "step": 4632 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001676188142000983, + "loss": 0.6765, + "step": 4633 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016760557149022096, + "loss": 0.6984, + "step": 4634 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016759232659636152, + "loss": 0.7021, + "step": 4635 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016757907951894793, + "loss": 0.7517, + "step": 4636 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016756583025840803, + "loss": 0.7451, + "step": 4637 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016755257881516996, + "loss": 0.6922, + "step": 4638 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001675393251896617, + "loss": 0.6471, + "step": 4639 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016752606938231148, + "loss": 0.7302, + "step": 4640 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016751281139354743, + "loss": 0.7126, + "step": 4641 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016749955122379796, + "loss": 0.7872, + "step": 4642 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016748628887349137, + "loss": 0.6827, + "step": 4643 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016747302434305615, + "loss": 0.6267, + "step": 4644 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001674597576329207, + "loss": 0.6523, + "step": 4645 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001674464887435137, + "loss": 0.6794, + "step": 4646 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016743321767526375, + "loss": 0.7334, + "step": 4647 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001674199444285996, + "loss": 0.7147, + "step": 4648 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016740666900395002, + "loss": 0.665, + "step": 4649 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016739339140174386, + "loss": 0.8183, + "step": 4650 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016738011162241009, + "loss": 0.7663, + "step": 4651 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016736682966637766, + "loss": 0.6982, + "step": 4652 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016735354553407562, + "loss": 0.719, + "step": 4653 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001673402592259332, + "loss": 0.7122, + "step": 4654 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016732697074237955, + "loss": 0.7002, + "step": 4655 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016731368008384397, + "loss": 0.701, + "step": 4656 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001673003872507558, + "loss": 0.7332, + "step": 4657 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001672870922435445, + "loss": 0.7698, + "step": 4658 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016727379506263949, + "loss": 0.7181, + "step": 4659 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001672604957084704, + "loss": 0.7333, + "step": 4660 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016724719418146684, + "loss": 0.6947, + "step": 4661 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016723389048205848, + "loss": 0.7313, + "step": 4662 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016722058461067512, + "loss": 0.7868, + "step": 4663 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016720727656774665, + "loss": 0.6462, + "step": 4664 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001671939663537029, + "loss": 0.7059, + "step": 4665 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001671806539689739, + "loss": 0.6919, + "step": 4666 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001671673394139897, + "loss": 0.6784, + "step": 4667 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001671540226891804, + "loss": 0.7092, + "step": 4668 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016714070379497624, + "loss": 0.7622, + "step": 4669 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016712738273180744, + "loss": 0.6901, + "step": 4670 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016711405950010433, + "loss": 0.7197, + "step": 4671 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016710073410029734, + "loss": 0.6817, + "step": 4672 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016708740653281694, + "loss": 0.8515, + "step": 4673 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016707407679809368, + "loss": 0.6889, + "step": 4674 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016706074489655816, + "loss": 0.6871, + "step": 4675 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016704741082864107, + "loss": 0.715, + "step": 4676 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016703407459477316, + "loss": 0.6691, + "step": 4677 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016702073619538526, + "loss": 0.729, + "step": 4678 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016700739563090826, + "loss": 0.734, + "step": 4679 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001669940529017731, + "loss": 0.7142, + "step": 4680 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016698070800841088, + "loss": 0.6852, + "step": 4681 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016696736095125263, + "loss": 0.7037, + "step": 4682 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001669540117307296, + "loss": 0.7073, + "step": 4683 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016694066034727293, + "loss": 0.7498, + "step": 4684 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016692730680131402, + "loss": 0.6475, + "step": 4685 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001669139510932842, + "loss": 0.7141, + "step": 4686 + }, + { + "epoch": 1.06, + "learning_rate": 0.000166900593223615, + "loss": 0.7056, + "step": 4687 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016688723319273786, + "loss": 0.7329, + "step": 4688 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016687387100108438, + "loss": 0.7781, + "step": 4689 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016686050664908625, + "loss": 0.6342, + "step": 4690 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016684714013717522, + "loss": 0.6589, + "step": 4691 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001668337714657831, + "loss": 0.7636, + "step": 4692 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016682040063534167, + "loss": 0.7081, + "step": 4693 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016680702764628299, + "loss": 0.6856, + "step": 4694 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016679365249903894, + "loss": 0.7672, + "step": 4695 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016678027519404176, + "loss": 0.6615, + "step": 4696 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016676689573172346, + "loss": 0.6022, + "step": 4697 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016675351411251635, + "loss": 0.6697, + "step": 4698 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016674013033685266, + "loss": 0.6978, + "step": 4699 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016672674440516478, + "loss": 0.7176, + "step": 4700 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016671335631788514, + "loss": 0.6828, + "step": 4701 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016669996607544625, + "loss": 0.7002, + "step": 4702 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016668657367828064, + "loss": 0.6938, + "step": 4703 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016667317912682098, + "loss": 0.7344, + "step": 4704 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016665978242149998, + "loss": 0.7557, + "step": 4705 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016664638356275039, + "loss": 0.7141, + "step": 4706 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016663298255100508, + "loss": 0.6875, + "step": 4707 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016661957938669697, + "loss": 0.7998, + "step": 4708 + }, + { + "epoch": 1.07, + "learning_rate": 0.000166606174070259, + "loss": 0.6424, + "step": 4709 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001665927666021243, + "loss": 0.6972, + "step": 4710 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016657935698272596, + "loss": 0.6892, + "step": 4711 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016656594521249714, + "loss": 0.724, + "step": 4712 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001665525312918712, + "loss": 0.7072, + "step": 4713 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016653911522128133, + "loss": 0.7288, + "step": 4714 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016652569700116105, + "loss": 0.7332, + "step": 4715 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016651227663194379, + "loss": 0.7066, + "step": 4716 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016649885411406312, + "loss": 0.6996, + "step": 4717 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001664854294479526, + "loss": 0.6813, + "step": 4718 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016647200263404595, + "loss": 0.7396, + "step": 4719 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001664585736727769, + "loss": 0.7091, + "step": 4720 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001664451425645793, + "loss": 0.678, + "step": 4721 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016643170930988698, + "loss": 0.752, + "step": 4722 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016641827390913397, + "loss": 0.6679, + "step": 4723 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016640483636275421, + "loss": 0.7198, + "step": 4724 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001663913966711819, + "loss": 0.7204, + "step": 4725 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016637795483485113, + "loss": 0.6636, + "step": 4726 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016636451085419617, + "loss": 0.734, + "step": 4727 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016635106472965132, + "loss": 0.6984, + "step": 4728 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016633761646165095, + "loss": 0.7356, + "step": 4729 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001663241660506295, + "loss": 0.6628, + "step": 4730 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016631071349702147, + "loss": 0.6456, + "step": 4731 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001662972588012615, + "loss": 0.6591, + "step": 4732 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016628380196378415, + "loss": 0.7858, + "step": 4733 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001662703429850242, + "loss": 0.7137, + "step": 4734 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016625688186541643, + "loss": 0.6909, + "step": 4735 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001662434186053957, + "loss": 0.684, + "step": 4736 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001662299532053969, + "loss": 0.6807, + "step": 4737 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016621648566585512, + "loss": 0.7723, + "step": 4738 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016620301598720531, + "loss": 0.7128, + "step": 4739 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016618954416988268, + "loss": 0.7044, + "step": 4740 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016617607021432242, + "loss": 0.6697, + "step": 4741 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016616259412095976, + "loss": 0.7171, + "step": 4742 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001661491158902301, + "loss": 0.7634, + "step": 4743 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016613563552256882, + "loss": 0.7093, + "step": 4744 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016612215301841144, + "loss": 0.6725, + "step": 4745 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016610866837819347, + "loss": 0.7345, + "step": 4746 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001660951816023505, + "loss": 0.7005, + "step": 4747 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016608169269131828, + "loss": 0.764, + "step": 4748 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016606820164553256, + "loss": 0.7221, + "step": 4749 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001660547084654291, + "loss": 0.7167, + "step": 4750 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001660412131514439, + "loss": 0.7315, + "step": 4751 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016602771570401278, + "loss": 0.7206, + "step": 4752 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016601421612357192, + "loss": 0.7276, + "step": 4753 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016600071441055732, + "loss": 0.7989, + "step": 4754 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016598721056540518, + "loss": 0.7231, + "step": 4755 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016597370458855176, + "loss": 0.7819, + "step": 4756 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016596019648043337, + "loss": 0.7889, + "step": 4757 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016594668624148634, + "loss": 0.7273, + "step": 4758 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016593317387214715, + "loss": 0.6926, + "step": 4759 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016591965937285227, + "loss": 0.7442, + "step": 4760 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016590614274403836, + "loss": 0.6939, + "step": 4761 + }, + { + "epoch": 1.08, + "learning_rate": 0.000165892623986142, + "loss": 0.6874, + "step": 4762 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016587910309959995, + "loss": 0.6719, + "step": 4763 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016586558008484898, + "loss": 0.6933, + "step": 4764 + }, + { + "epoch": 1.08, + "learning_rate": 0.000165852054942326, + "loss": 0.7505, + "step": 4765 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016583852767246783, + "loss": 0.7119, + "step": 4766 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001658249982757116, + "loss": 0.6781, + "step": 4767 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016581146675249424, + "loss": 0.7399, + "step": 4768 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016579793310325295, + "loss": 0.7521, + "step": 4769 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016578439732842495, + "loss": 0.7989, + "step": 4770 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016577085942844747, + "loss": 0.7144, + "step": 4771 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016575731940375788, + "loss": 0.6519, + "step": 4772 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016574377725479358, + "loss": 0.7534, + "step": 4773 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016573023298199202, + "loss": 0.6895, + "step": 4774 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016571668658579077, + "loss": 0.6762, + "step": 4775 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016570313806662742, + "loss": 0.7228, + "step": 4776 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016568958742493967, + "loss": 0.7016, + "step": 4777 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016567603466116529, + "loss": 0.7039, + "step": 4778 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016566247977574204, + "loss": 0.6725, + "step": 4779 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016564892276910788, + "loss": 0.7375, + "step": 4780 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016563536364170073, + "loss": 0.6924, + "step": 4781 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001656218023939586, + "loss": 0.7037, + "step": 4782 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001656082390263196, + "loss": 0.7303, + "step": 4783 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001655946735392219, + "loss": 0.7016, + "step": 4784 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001655811059331037, + "loss": 0.6994, + "step": 4785 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016556753620840332, + "loss": 0.7314, + "step": 4786 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016555396436555912, + "loss": 0.7515, + "step": 4787 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016554039040500955, + "loss": 0.7122, + "step": 4788 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001655268143271931, + "loss": 0.7004, + "step": 4789 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016551323613254833, + "loss": 0.6996, + "step": 4790 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001654996558215139, + "loss": 0.7714, + "step": 4791 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016548607339452853, + "loss": 0.7894, + "step": 4792 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016547248885203096, + "loss": 0.6979, + "step": 4793 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016545890219446006, + "loss": 0.768, + "step": 4794 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016544531342225474, + "loss": 0.7714, + "step": 4795 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016543172253585396, + "loss": 0.7589, + "step": 4796 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016541812953569686, + "loss": 0.6673, + "step": 4797 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016540453442222245, + "loss": 0.7562, + "step": 4798 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016539093719586994, + "loss": 0.7669, + "step": 4799 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016537733785707866, + "loss": 0.6993, + "step": 4800 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001653637364062878, + "loss": 0.7813, + "step": 4801 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016535013284393687, + "loss": 0.7702, + "step": 4802 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001653365271704653, + "loss": 0.6699, + "step": 4803 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016532291938631258, + "loss": 0.6634, + "step": 4804 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016530930949191835, + "loss": 0.6621, + "step": 4805 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016529569748772224, + "loss": 0.73, + "step": 4806 + }, + { + "epoch": 1.09, + "learning_rate": 0.000165282083374164, + "loss": 0.71, + "step": 4807 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016526846715168346, + "loss": 0.6652, + "step": 4808 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016525484882072043, + "loss": 0.7153, + "step": 4809 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001652412283817149, + "loss": 0.7208, + "step": 4810 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016522760583510682, + "loss": 0.7331, + "step": 4811 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016521398118133628, + "loss": 0.7141, + "step": 4812 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001652003544208435, + "loss": 0.6897, + "step": 4813 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016518672555406857, + "loss": 0.6774, + "step": 4814 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016517309458145184, + "loss": 0.6695, + "step": 4815 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016515946150343365, + "loss": 0.7138, + "step": 4816 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001651458263204544, + "loss": 0.6195, + "step": 4817 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016513218903295456, + "loss": 0.6753, + "step": 4818 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001651185496413747, + "loss": 0.7009, + "step": 4819 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001651049081461554, + "loss": 0.7544, + "step": 4820 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001650912645477374, + "loss": 0.7383, + "step": 4821 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016507761884656144, + "loss": 0.7099, + "step": 4822 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001650639710430683, + "loss": 0.7044, + "step": 4823 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001650503211376989, + "loss": 0.6749, + "step": 4824 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016503666913089422, + "loss": 0.737, + "step": 4825 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016502301502309522, + "loss": 0.7983, + "step": 4826 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001650093588147431, + "loss": 0.7251, + "step": 4827 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016499570050627888, + "loss": 0.7462, + "step": 4828 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001649820400981439, + "loss": 0.7405, + "step": 4829 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001649683775907794, + "loss": 0.7187, + "step": 4830 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016495471298462678, + "loss": 0.7184, + "step": 4831 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016494104628012743, + "loss": 0.7356, + "step": 4832 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016492737747772287, + "loss": 0.7318, + "step": 4833 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001649137065778547, + "loss": 0.7213, + "step": 4834 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001649000335809645, + "loss": 0.6772, + "step": 4835 + }, + { + "epoch": 1.1, + "learning_rate": 0.000164886358487494, + "loss": 0.6934, + "step": 4836 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016487268129788501, + "loss": 0.7773, + "step": 4837 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001648590020125793, + "loss": 0.691, + "step": 4838 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001648453206320188, + "loss": 0.7202, + "step": 4839 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001648316371566455, + "loss": 0.741, + "step": 4840 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016481795158690142, + "loss": 0.656, + "step": 4841 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016480426392322865, + "loss": 0.7017, + "step": 4842 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016479057416606945, + "loss": 0.6782, + "step": 4843 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016477688231586595, + "loss": 0.6878, + "step": 4844 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016396470087619462, + "loss": 0.6767, + "step": 4845 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016395071774281524, + "loss": 0.7063, + "step": 4846 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001639367324934625, + "loss": 0.689, + "step": 4847 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016392274512859914, + "loss": 0.6851, + "step": 4848 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016390875564868787, + "loss": 0.7232, + "step": 4849 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016389476405419173, + "loss": 0.6756, + "step": 4850 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016388077034557355, + "loss": 0.7767, + "step": 4851 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001638667745232964, + "loss": 0.7715, + "step": 4852 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016385277658782332, + "loss": 0.6857, + "step": 4853 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016383877653961752, + "loss": 0.6884, + "step": 4854 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001638247743791422, + "loss": 0.6448, + "step": 4855 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016381077010686068, + "loss": 0.6969, + "step": 4856 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016379676372323628, + "loss": 0.7312, + "step": 4857 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001637827552287325, + "loss": 0.7356, + "step": 4858 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016376874462381279, + "loss": 0.6286, + "step": 4859 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016375473190894074, + "loss": 0.6425, + "step": 4860 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016374071708458003, + "loss": 0.7228, + "step": 4861 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016372670015119434, + "loss": 0.7349, + "step": 4862 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001637126811092475, + "loss": 0.7657, + "step": 4863 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016369865995920332, + "loss": 0.7099, + "step": 4864 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016368463670152572, + "loss": 0.6672, + "step": 4865 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016367061133667875, + "loss": 0.7146, + "step": 4866 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016365658386512642, + "loss": 0.6883, + "step": 4867 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001636425542873329, + "loss": 0.7156, + "step": 4868 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016362852260376237, + "loss": 0.6752, + "step": 4869 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016361448881487914, + "loss": 0.7177, + "step": 4870 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001636004529211475, + "loss": 0.6808, + "step": 4871 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001635864149230319, + "loss": 0.6523, + "step": 4872 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016357237482099684, + "loss": 0.67, + "step": 4873 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001635583326155068, + "loss": 0.7223, + "step": 4874 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016354428830702648, + "loss": 0.7343, + "step": 4875 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016353024189602055, + "loss": 0.634, + "step": 4876 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016351619338295375, + "loss": 0.7267, + "step": 4877 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016350214276829094, + "loss": 0.6076, + "step": 4878 + }, + { + "epoch": 1.13, + "learning_rate": 0.000163488090052497, + "loss": 0.7105, + "step": 4879 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001634740352360369, + "loss": 0.6441, + "step": 4880 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001634599783193757, + "loss": 0.6678, + "step": 4881 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016344591930297849, + "loss": 0.6645, + "step": 4882 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016343185818731046, + "loss": 0.7744, + "step": 4883 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016341779497283684, + "loss": 0.7541, + "step": 4884 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016340372966002295, + "loss": 0.7607, + "step": 4885 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016338966224933423, + "loss": 0.6787, + "step": 4886 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016337559274123606, + "loss": 0.6753, + "step": 4887 + }, + { + "epoch": 1.13, + "learning_rate": 0.000163361521136194, + "loss": 0.7324, + "step": 4888 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016334744743467364, + "loss": 0.7214, + "step": 4889 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016333337163714067, + "loss": 0.7296, + "step": 4890 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016331929374406077, + "loss": 0.7356, + "step": 4891 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001633052137558998, + "loss": 0.7163, + "step": 4892 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001632911316731236, + "loss": 0.7057, + "step": 4893 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001632770474961981, + "loss": 0.7125, + "step": 4894 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016326296122558937, + "loss": 0.7692, + "step": 4895 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016324887286176342, + "loss": 0.7276, + "step": 4896 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016323478240518643, + "loss": 0.7724, + "step": 4897 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016322068985632462, + "loss": 0.7055, + "step": 4898 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016320659521564432, + "loss": 0.6612, + "step": 4899 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016319249848361183, + "loss": 0.6913, + "step": 4900 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016317839966069355, + "loss": 0.6931, + "step": 4901 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016316429874735605, + "loss": 0.7262, + "step": 4902 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016315019574406585, + "loss": 0.7034, + "step": 4903 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001631360906512896, + "loss": 0.6597, + "step": 4904 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016312198346949398, + "loss": 0.7341, + "step": 4905 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001631078741991458, + "loss": 0.7197, + "step": 4906 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016309376284071188, + "loss": 0.7013, + "step": 4907 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016307964939465914, + "loss": 0.7509, + "step": 4908 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016306553386145453, + "loss": 0.7007, + "step": 4909 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016305141624156514, + "loss": 0.709, + "step": 4910 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016303729653545807, + "loss": 0.6722, + "step": 4911 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001630231747436005, + "loss": 0.7089, + "step": 4912 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001630090508664597, + "loss": 0.6099, + "step": 4913 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016299492490450297, + "loss": 0.656, + "step": 4914 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016298079685819774, + "loss": 0.6955, + "step": 4915 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629666667280114, + "loss": 0.7347, + "step": 4916 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629525345144116, + "loss": 0.6868, + "step": 4917 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016293840021786585, + "loss": 0.7249, + "step": 4918 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016292426383884183, + "loss": 0.7746, + "step": 4919 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629101253778073, + "loss": 0.6645, + "step": 4920 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016289598483523008, + "loss": 0.6745, + "step": 4921 + }, + { + "epoch": 1.14, + "learning_rate": 0.000162881842211578, + "loss": 0.6812, + "step": 4922 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016286769750731902, + "loss": 0.7217, + "step": 4923 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001628535507229212, + "loss": 0.6865, + "step": 4924 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016283940185885255, + "loss": 0.7031, + "step": 4925 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001628252509155813, + "loss": 0.6997, + "step": 4926 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001628110978935756, + "loss": 0.7621, + "step": 4927 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016279694279330376, + "loss": 0.7119, + "step": 4928 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016278278561523415, + "loss": 0.7202, + "step": 4929 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016276862635983518, + "loss": 0.753, + "step": 4930 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001627544650275754, + "loss": 0.6849, + "step": 4931 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016274030161892328, + "loss": 0.7155, + "step": 4932 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016272613613434756, + "loss": 0.6993, + "step": 4933 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016271196857431684, + "loss": 0.7003, + "step": 4934 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016269779893929992, + "loss": 0.7397, + "step": 4935 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016268362722976569, + "loss": 0.685, + "step": 4936 + }, + { + "epoch": 1.14, + "learning_rate": 0.000162669453446183, + "loss": 0.7012, + "step": 4937 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016265527758902088, + "loss": 0.7341, + "step": 4938 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001626410996587483, + "loss": 0.727, + "step": 4939 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016262691965583445, + "loss": 0.6624, + "step": 4940 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016261273758074845, + "loss": 0.7152, + "step": 4941 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016259855343395957, + "loss": 0.7383, + "step": 4942 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016258436721593714, + "loss": 0.6973, + "step": 4943 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016257017892715059, + "loss": 0.6731, + "step": 4944 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016255598856806927, + "loss": 0.7559, + "step": 4945 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016254179613916278, + "loss": 0.7351, + "step": 4946 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001625276016409007, + "loss": 0.7269, + "step": 4947 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001625134050737527, + "loss": 0.6718, + "step": 4948 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001624992064381885, + "loss": 0.7901, + "step": 4949 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016248500573467784, + "loss": 0.7393, + "step": 4950 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001624708029636907, + "loss": 0.7094, + "step": 4951 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016245659812569692, + "loss": 0.764, + "step": 4952 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016244239122116655, + "loss": 0.6714, + "step": 4953 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001624281822505697, + "loss": 0.7573, + "step": 4954 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001624139712143764, + "loss": 0.7032, + "step": 4955 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016239975811305692, + "loss": 0.6907, + "step": 4956 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016238554294708158, + "loss": 0.6691, + "step": 4957 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001623713257169207, + "loss": 0.7128, + "step": 4958 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001623571064230446, + "loss": 0.6998, + "step": 4959 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016234288506592387, + "loss": 0.6842, + "step": 4960 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016232866164602906, + "loss": 0.7049, + "step": 4961 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016231443616383074, + "loss": 0.7541, + "step": 4962 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001623002086197996, + "loss": 0.659, + "step": 4963 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001622859790144064, + "loss": 0.7129, + "step": 4964 + }, + { + "epoch": 1.15, + "learning_rate": 0.000162271747348122, + "loss": 0.7052, + "step": 4965 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016225751362141722, + "loss": 0.6795, + "step": 4966 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016224327783476313, + "loss": 0.6632, + "step": 4967 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016222903998863063, + "loss": 0.7449, + "step": 4968 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016221480008349088, + "loss": 0.6746, + "step": 4969 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016220055811981508, + "loss": 0.6817, + "step": 4970 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016218631409807438, + "loss": 0.7151, + "step": 4971 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016217206801874011, + "loss": 0.7452, + "step": 4972 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001621578198822837, + "loss": 0.7144, + "step": 4973 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016214356968917648, + "loss": 0.6005, + "step": 4974 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016212931743989003, + "loss": 0.7699, + "step": 4975 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016211506313489591, + "loss": 0.7623, + "step": 4976 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016210080677466575, + "loss": 0.744, + "step": 4977 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016208654835967124, + "loss": 0.7207, + "step": 4978 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001620722878903842, + "loss": 0.7138, + "step": 4979 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016205802536727642, + "loss": 0.7454, + "step": 4980 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016204376079081986, + "loss": 0.674, + "step": 4981 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016202949416148647, + "loss": 0.7283, + "step": 4982 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016201522547974832, + "loss": 0.6529, + "step": 4983 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016200095474607753, + "loss": 0.7559, + "step": 4984 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016198668196094627, + "loss": 0.7466, + "step": 4985 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016197240712482674, + "loss": 0.7058, + "step": 4986 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016195813023819135, + "loss": 0.7211, + "step": 4987 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016194385130151244, + "loss": 0.6863, + "step": 4988 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001619295703152625, + "loss": 0.7081, + "step": 4989 + }, + { + "epoch": 1.16, + "learning_rate": 0.000161915287279914, + "loss": 0.6963, + "step": 4990 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016190100219593955, + "loss": 0.7649, + "step": 4991 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016188671506381182, + "loss": 0.6973, + "step": 4992 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016187242588400354, + "loss": 0.6272, + "step": 4993 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016185813465698749, + "loss": 0.7467, + "step": 4994 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016184384138323655, + "loss": 0.6663, + "step": 4995 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016182954606322364, + "loss": 0.6643, + "step": 4996 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016181524869742173, + "loss": 0.7302, + "step": 4997 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001618009492863039, + "loss": 0.7521, + "step": 4998 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016178664783034332, + "loss": 0.6421, + "step": 4999 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016177234433001315, + "loss": 0.7345, + "step": 5000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016175803878578667, + "loss": 0.7065, + "step": 5001 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016174373119813722, + "loss": 0.7296, + "step": 5002 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001617294215675382, + "loss": 0.6596, + "step": 5003 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001617151098944631, + "loss": 0.7564, + "step": 5004 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016170079617938541, + "loss": 0.6629, + "step": 5005 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016168648042277879, + "loss": 0.67, + "step": 5006 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016167216262511687, + "loss": 0.6711, + "step": 5007 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001616578427868734, + "loss": 0.747, + "step": 5008 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001616435209085222, + "loss": 0.7235, + "step": 5009 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016162919699053716, + "loss": 0.636, + "step": 5010 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016161487103339222, + "loss": 0.7371, + "step": 5011 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016160054303756137, + "loss": 0.7285, + "step": 5012 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016158621300351865, + "loss": 0.6999, + "step": 5013 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001615718809317383, + "loss": 0.7371, + "step": 5014 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016155754682269446, + "loss": 0.704, + "step": 5015 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016154321067686148, + "loss": 0.716, + "step": 5016 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016152887249471363, + "loss": 0.7309, + "step": 5017 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016151453227672538, + "loss": 0.7569, + "step": 5018 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016150019002337117, + "loss": 0.7775, + "step": 5019 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016148584573512557, + "loss": 0.6367, + "step": 5020 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016147149941246323, + "loss": 0.6723, + "step": 5021 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001614571510558588, + "loss": 0.7602, + "step": 5022 + }, + { + "epoch": 1.16, + "learning_rate": 0.000161442800665787, + "loss": 0.73, + "step": 5023 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016142844824272272, + "loss": 0.6453, + "step": 5024 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001614140937871408, + "loss": 0.7176, + "step": 5025 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016139973729951623, + "loss": 0.7047, + "step": 5026 + }, + { + "epoch": 1.16, + "learning_rate": 0.000161385378780324, + "loss": 0.7218, + "step": 5027 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001613710182300392, + "loss": 0.7208, + "step": 5028 + }, + { + "epoch": 1.16, + "learning_rate": 0.000161356655649137, + "loss": 0.7123, + "step": 5029 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001613422910380926, + "loss": 0.7845, + "step": 5030 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016132792439738132, + "loss": 0.7122, + "step": 5031 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016131355572747853, + "loss": 0.6986, + "step": 5032 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016129918502885956, + "loss": 0.7365, + "step": 5033 + }, + { + "epoch": 1.17, + "learning_rate": 0.000161284812302, + "loss": 0.7768, + "step": 5034 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001612704375473754, + "loss": 0.6867, + "step": 5035 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016125606076546135, + "loss": 0.6514, + "step": 5036 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016124168195673355, + "loss": 0.7027, + "step": 5037 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016122730112166778, + "loss": 0.7177, + "step": 5038 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016121291826073984, + "loss": 0.7192, + "step": 5039 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001611985333744256, + "loss": 0.6569, + "step": 5040 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001611841464632011, + "loss": 0.7497, + "step": 5041 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016116975752754234, + "loss": 0.7354, + "step": 5042 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016115536656792539, + "loss": 0.6701, + "step": 5043 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016114097358482638, + "loss": 0.7228, + "step": 5044 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016112657857872162, + "loss": 0.7348, + "step": 5045 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016111218155008734, + "loss": 0.6682, + "step": 5046 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016109778249939996, + "loss": 0.7288, + "step": 5047 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016108338142713585, + "loss": 0.7426, + "step": 5048 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016106897833377155, + "loss": 0.6927, + "step": 5049 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001610545732197836, + "loss": 0.7271, + "step": 5050 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016104016608564861, + "loss": 0.6573, + "step": 5051 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016102575693184332, + "loss": 0.7282, + "step": 5052 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001610113457588445, + "loss": 0.7459, + "step": 5053 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016099693256712896, + "loss": 0.6926, + "step": 5054 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016098251735717357, + "loss": 0.7193, + "step": 5055 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609681001294553, + "loss": 0.7293, + "step": 5056 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016095368088445125, + "loss": 0.6974, + "step": 5057 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016093925962263846, + "loss": 0.727, + "step": 5058 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609248363444941, + "loss": 0.7567, + "step": 5059 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609104110504954, + "loss": 0.7157, + "step": 5060 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016089598374111966, + "loss": 0.6979, + "step": 5061 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016088155441684423, + "loss": 0.7012, + "step": 5062 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016086712307814658, + "loss": 0.7571, + "step": 5063 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016085268972550418, + "loss": 0.6612, + "step": 5064 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001608382543593946, + "loss": 0.6884, + "step": 5065 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016082381698029547, + "loss": 0.6944, + "step": 5066 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016080937758868448, + "loss": 0.7226, + "step": 5067 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016079493618503942, + "loss": 0.7308, + "step": 5068 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001607804927698381, + "loss": 0.7843, + "step": 5069 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016076604734355842, + "loss": 0.7666, + "step": 5070 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016075159990667834, + "loss": 0.6716, + "step": 5071 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016073715045967588, + "loss": 0.705, + "step": 5072 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016072269900302918, + "loss": 0.7271, + "step": 5073 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016070824553721637, + "loss": 0.6616, + "step": 5074 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016069379006271566, + "loss": 0.7037, + "step": 5075 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016067933258000542, + "loss": 0.7225, + "step": 5076 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016066487308956395, + "loss": 0.6553, + "step": 5077 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016065041159186967, + "loss": 0.8088, + "step": 5078 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016063594808740113, + "loss": 0.7518, + "step": 5079 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016062148257663686, + "loss": 0.727, + "step": 5080 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016060701506005547, + "loss": 0.7441, + "step": 5081 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016059254553813567, + "loss": 0.6884, + "step": 5082 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016057807401135624, + "loss": 0.706, + "step": 5083 + }, + { + "epoch": 1.18, + "learning_rate": 0.000160563600480196, + "loss": 0.68, + "step": 5084 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016054912494513387, + "loss": 0.6836, + "step": 5085 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001605346474066487, + "loss": 0.6875, + "step": 5086 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016052016786521965, + "loss": 0.6808, + "step": 5087 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016050568632132575, + "loss": 0.7708, + "step": 5088 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016049120277544617, + "loss": 0.6545, + "step": 5089 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016047671722806012, + "loss": 0.7527, + "step": 5090 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001604622296796469, + "loss": 0.6664, + "step": 5091 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016044774013068588, + "loss": 0.7081, + "step": 5092 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016043324858165648, + "loss": 0.7379, + "step": 5093 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001604187550330382, + "loss": 0.6839, + "step": 5094 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016040425948531052, + "loss": 0.7928, + "step": 5095 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001603897619389532, + "loss": 0.6491, + "step": 5096 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001603752623944458, + "loss": 0.7441, + "step": 5097 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016036076085226814, + "loss": 0.7818, + "step": 5098 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016034625731290003, + "loss": 0.705, + "step": 5099 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016033175177682137, + "loss": 0.7438, + "step": 5100 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016031724424451208, + "loss": 0.7184, + "step": 5101 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001603027347164522, + "loss": 0.7135, + "step": 5102 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001602882231931218, + "loss": 0.8175, + "step": 5103 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016027370967500103, + "loss": 0.739, + "step": 5104 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016025919416257013, + "loss": 0.7385, + "step": 5105 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001602446766563094, + "loss": 0.781, + "step": 5106 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016023015715669914, + "loss": 0.7847, + "step": 5107 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001602156356642198, + "loss": 0.6864, + "step": 5108 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016020111217935183, + "loss": 0.7456, + "step": 5109 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016018658670257578, + "loss": 0.7489, + "step": 5110 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016017205923437227, + "loss": 0.7402, + "step": 5111 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016015752977522203, + "loss": 0.7529, + "step": 5112 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016014299832560575, + "loss": 0.7723, + "step": 5113 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016012846488600423, + "loss": 0.7186, + "step": 5114 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016011392945689833, + "loss": 0.7656, + "step": 5115 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001600993920387691, + "loss": 0.6987, + "step": 5116 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016008485263209742, + "loss": 0.6843, + "step": 5117 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016007031123736444, + "loss": 0.7778, + "step": 5118 + }, + { + "epoch": 1.19, + "learning_rate": 0.00016005576785505126, + "loss": 0.7251, + "step": 5119 + }, + { + "epoch": 1.19, + "learning_rate": 0.00016004122248563913, + "loss": 0.7458, + "step": 5120 + }, + { + "epoch": 1.19, + "learning_rate": 0.00016002667512960927, + "loss": 0.7319, + "step": 5121 + }, + { + "epoch": 1.19, + "learning_rate": 0.00016001212578744306, + "loss": 0.7235, + "step": 5122 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015999757445962187, + "loss": 0.7495, + "step": 5123 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015998302114662716, + "loss": 0.6418, + "step": 5124 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001599684658489405, + "loss": 0.7002, + "step": 5125 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001599539085670435, + "loss": 0.6765, + "step": 5126 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015993934930141773, + "loss": 0.6905, + "step": 5127 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015992478805254505, + "loss": 0.6751, + "step": 5128 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015991022482090718, + "loss": 0.6603, + "step": 5129 + }, + { + "epoch": 1.19, + "learning_rate": 0.000159895659606986, + "loss": 0.7158, + "step": 5130 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001598810924112634, + "loss": 0.6301, + "step": 5131 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015986652323422146, + "loss": 0.8029, + "step": 5132 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015985195207634213, + "loss": 0.7254, + "step": 5133 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001598373789381076, + "loss": 0.7298, + "step": 5134 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015982280382000007, + "loss": 0.7201, + "step": 5135 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001598082267225018, + "loss": 0.6937, + "step": 5136 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015979364764609504, + "loss": 0.7175, + "step": 5137 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015977906659126224, + "loss": 0.7247, + "step": 5138 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015976448355848585, + "loss": 0.7242, + "step": 5139 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015974989854824833, + "loss": 0.7248, + "step": 5140 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015973531156103233, + "loss": 0.7022, + "step": 5141 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015972072259732046, + "loss": 0.6791, + "step": 5142 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015970613165759544, + "loss": 0.6983, + "step": 5143 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015969153874234007, + "loss": 0.7425, + "step": 5144 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015967694385203714, + "loss": 0.6901, + "step": 5145 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015966234698716963, + "loss": 0.672, + "step": 5146 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015964774814822044, + "loss": 0.726, + "step": 5147 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001596331473356727, + "loss": 0.7205, + "step": 5148 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015961854455000943, + "loss": 0.6905, + "step": 5149 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015960393979171387, + "loss": 0.7092, + "step": 5150 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015958933306126918, + "loss": 0.7001, + "step": 5151 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015957472435915873, + "loss": 0.7004, + "step": 5152 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015956011368586584, + "loss": 0.6652, + "step": 5153 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015954550104187398, + "loss": 0.6514, + "step": 5154 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001595308864276666, + "loss": 0.6674, + "step": 5155 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015951626984372731, + "loss": 0.6928, + "step": 5156 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015950165129053972, + "loss": 0.7241, + "step": 5157 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001594870307685875, + "loss": 0.6749, + "step": 5158 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015947240827835448, + "loss": 0.7094, + "step": 5159 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015945778382032435, + "loss": 0.7237, + "step": 5160 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015944315739498113, + "loss": 0.7806, + "step": 5161 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001594285290028087, + "loss": 0.6632, + "step": 5162 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001594138986442911, + "loss": 0.6319, + "step": 5163 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001593992663199124, + "loss": 0.8023, + "step": 5164 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001593846320301568, + "loss": 0.6772, + "step": 5165 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001593699957755084, + "loss": 0.752, + "step": 5166 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015935535755645162, + "loss": 0.6644, + "step": 5167 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015934071737347066, + "loss": 0.7653, + "step": 5168 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015932607522705007, + "loss": 0.7215, + "step": 5169 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015931143111767423, + "loss": 0.7191, + "step": 5170 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015929678504582765, + "loss": 0.6904, + "step": 5171 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015928213701199505, + "loss": 0.7283, + "step": 5172 + }, + { + "epoch": 1.2, + "learning_rate": 0.000159267487016661, + "loss": 0.6525, + "step": 5173 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001592528350603103, + "loss": 0.6936, + "step": 5174 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015923818114342767, + "loss": 0.6603, + "step": 5175 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015922352526649803, + "loss": 0.6929, + "step": 5176 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015920886743000628, + "loss": 0.7357, + "step": 5177 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015919420763443743, + "loss": 0.6748, + "step": 5178 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015917954588027652, + "loss": 0.7238, + "step": 5179 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001591648821680087, + "loss": 0.7297, + "step": 5180 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015915021649811912, + "loss": 0.729, + "step": 5181 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015913554887109305, + "loss": 0.7022, + "step": 5182 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001591208792874158, + "loss": 0.6981, + "step": 5183 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015910620774757278, + "loss": 0.7613, + "step": 5184 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001590915342520494, + "loss": 0.6751, + "step": 5185 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001590768588013312, + "loss": 0.6573, + "step": 5186 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001590621813959037, + "loss": 0.6846, + "step": 5187 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001590475020362526, + "loss": 0.6527, + "step": 5188 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001590328207228636, + "loss": 0.6752, + "step": 5189 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015901813745622243, + "loss": 0.6898, + "step": 5190 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015900345223681495, + "loss": 0.6861, + "step": 5191 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015898876506512704, + "loss": 0.6985, + "step": 5192 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015897407594164467, + "loss": 0.6646, + "step": 5193 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015895938486685385, + "loss": 0.753, + "step": 5194 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001589446918412407, + "loss": 0.7444, + "step": 5195 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001589299968652914, + "loss": 0.7217, + "step": 5196 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015891529993949214, + "loss": 0.7155, + "step": 5197 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001589006010643292, + "loss": 0.7103, + "step": 5198 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015888590024028894, + "loss": 0.6981, + "step": 5199 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015887119746785776, + "loss": 0.7209, + "step": 5200 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015885649274752215, + "loss": 0.767, + "step": 5201 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015884178607976864, + "loss": 0.6802, + "step": 5202 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015882707746508388, + "loss": 0.8035, + "step": 5203 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015881236690395447, + "loss": 0.7089, + "step": 5204 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015879765439686722, + "loss": 0.6592, + "step": 5205 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015878293994430888, + "loss": 0.6789, + "step": 5206 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015876822354676638, + "loss": 0.7403, + "step": 5207 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015875350520472655, + "loss": 0.7286, + "step": 5208 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015873878491867645, + "loss": 0.702, + "step": 5209 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015872406268910316, + "loss": 0.8267, + "step": 5210 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015870933851649374, + "loss": 0.6578, + "step": 5211 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001586946124013354, + "loss": 0.6901, + "step": 5212 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015867988434411544, + "loss": 0.7255, + "step": 5213 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001586651543453211, + "loss": 0.7437, + "step": 5214 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015865042240543984, + "loss": 0.7497, + "step": 5215 + }, + { + "epoch": 1.21, + "learning_rate": 0.000158635688524959, + "loss": 0.6637, + "step": 5216 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015862095270436616, + "loss": 0.7227, + "step": 5217 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015860621494414892, + "loss": 0.6801, + "step": 5218 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015859147524479483, + "loss": 0.7069, + "step": 5219 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015857673360679164, + "loss": 0.6542, + "step": 5220 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001585619900306271, + "loss": 0.6805, + "step": 5221 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015854724451678907, + "loss": 0.742, + "step": 5222 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015853249706576544, + "loss": 0.731, + "step": 5223 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015851774767804413, + "loss": 0.7773, + "step": 5224 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015850299635411315, + "loss": 0.6978, + "step": 5225 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015848824309446066, + "loss": 0.6903, + "step": 5226 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015847348789957476, + "loss": 0.6866, + "step": 5227 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015845873076994366, + "loss": 0.6822, + "step": 5228 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015844397170605565, + "loss": 0.6712, + "step": 5229 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015842921070839908, + "loss": 0.7153, + "step": 5230 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001584144477774623, + "loss": 0.7299, + "step": 5231 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015839968291373385, + "loss": 0.6926, + "step": 5232 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015838491611770225, + "loss": 0.6324, + "step": 5233 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001583701473898561, + "loss": 0.7838, + "step": 5234 + }, + { + "epoch": 1.21, + "learning_rate": 0.000158355376730684, + "loss": 0.6941, + "step": 5235 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015834060414067472, + "loss": 0.7214, + "step": 5236 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001583258296203171, + "loss": 0.7165, + "step": 5237 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001583110531700999, + "loss": 0.7171, + "step": 5238 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015829627479051209, + "loss": 0.7456, + "step": 5239 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015828149448204263, + "loss": 0.7388, + "step": 5240 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015826671224518056, + "loss": 0.7085, + "step": 5241 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015825192808041502, + "loss": 0.6297, + "step": 5242 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015823714198823517, + "loss": 0.7963, + "step": 5243 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015822235396913023, + "loss": 0.7147, + "step": 5244 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015820756402358952, + "loss": 0.6649, + "step": 5245 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001581927721521024, + "loss": 0.7713, + "step": 5246 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015817797835515827, + "loss": 0.6672, + "step": 5247 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015816318263324664, + "loss": 0.6915, + "step": 5248 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015814838498685705, + "loss": 0.7113, + "step": 5249 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015813358541647915, + "loss": 0.6665, + "step": 5250 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015811878392260265, + "loss": 0.7028, + "step": 5251 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001581039805057172, + "loss": 0.6986, + "step": 5252 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015808917516631262, + "loss": 0.7754, + "step": 5253 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001580743679048789, + "loss": 0.6665, + "step": 5254 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015805955872190584, + "loss": 0.7124, + "step": 5255 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015804474761788351, + "loss": 0.6771, + "step": 5256 + }, + { + "epoch": 1.22, + "learning_rate": 0.000158029934593302, + "loss": 0.724, + "step": 5257 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015801511964865135, + "loss": 0.7464, + "step": 5258 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001580003027844218, + "loss": 0.7309, + "step": 5259 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001579854840011036, + "loss": 0.7453, + "step": 5260 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001579706632991871, + "loss": 0.7762, + "step": 5261 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015795584067916264, + "loss": 0.6872, + "step": 5262 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015794101614152067, + "loss": 0.7001, + "step": 5263 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001579261896867517, + "loss": 0.6885, + "step": 5264 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015791136131534627, + "loss": 0.7172, + "step": 5265 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015789653102779512, + "loss": 0.7555, + "step": 5266 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015788169882458881, + "loss": 0.642, + "step": 5267 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015786686470621822, + "loss": 0.6579, + "step": 5268 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015785202867317407, + "loss": 0.6197, + "step": 5269 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015783719072594732, + "loss": 0.6838, + "step": 5270 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015782235086502893, + "loss": 0.747, + "step": 5271 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015780750909090988, + "loss": 0.7075, + "step": 5272 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577926654040812, + "loss": 0.6606, + "step": 5273 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577778198050341, + "loss": 0.6944, + "step": 5274 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015776297229425982, + "loss": 0.7017, + "step": 5275 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015774812287224955, + "loss": 0.6453, + "step": 5276 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015773327153949465, + "loss": 0.7336, + "step": 5277 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015771841829648653, + "loss": 0.746, + "step": 5278 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577035631437166, + "loss": 0.7513, + "step": 5279 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015768870608167643, + "loss": 0.7039, + "step": 5280 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001576738471108576, + "loss": 0.7422, + "step": 5281 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015765898623175176, + "loss": 0.6751, + "step": 5282 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015764412344485056, + "loss": 0.7412, + "step": 5283 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015762925875064583, + "loss": 0.6919, + "step": 5284 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001576143921496294, + "loss": 0.6723, + "step": 5285 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015759952364229318, + "loss": 0.7097, + "step": 5286 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001575846532291291, + "loss": 0.6936, + "step": 5287 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001575697809106292, + "loss": 0.6982, + "step": 5288 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015755490668728557, + "loss": 0.6733, + "step": 5289 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015754003055959035, + "loss": 0.7124, + "step": 5290 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015752515252803577, + "loss": 0.6689, + "step": 5291 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015751027259311413, + "loss": 0.685, + "step": 5292 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015749539075531773, + "loss": 0.6766, + "step": 5293 + }, + { + "epoch": 1.23, + "learning_rate": 0.000157480507015139, + "loss": 0.6956, + "step": 5294 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001574656213730704, + "loss": 0.7139, + "step": 5295 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015745073382960443, + "loss": 0.7512, + "step": 5296 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015743584438523373, + "loss": 0.7031, + "step": 5297 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015742095304045094, + "loss": 0.7144, + "step": 5298 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015740605979574877, + "loss": 0.6895, + "step": 5299 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015739116465161997, + "loss": 0.7007, + "step": 5300 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015737626760855747, + "loss": 0.677, + "step": 5301 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015736136866705407, + "loss": 0.6635, + "step": 5302 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015734646782760282, + "loss": 0.6811, + "step": 5303 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015733156509069673, + "loss": 0.696, + "step": 5304 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015731666045682889, + "loss": 0.6845, + "step": 5305 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015730175392649243, + "loss": 0.7144, + "step": 5306 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015728684550018064, + "loss": 0.7704, + "step": 5307 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015727193517838678, + "loss": 0.716, + "step": 5308 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015725702296160416, + "loss": 0.7301, + "step": 5309 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001572421088503262, + "loss": 0.7065, + "step": 5310 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015722719284504637, + "loss": 0.7663, + "step": 5311 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001572122749462582, + "loss": 0.6689, + "step": 5312 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015719735515445536, + "loss": 0.7112, + "step": 5313 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001571824334701314, + "loss": 0.7452, + "step": 5314 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015716750989378013, + "loss": 0.6958, + "step": 5315 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001571525844258953, + "loss": 0.6624, + "step": 5316 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001571376570669707, + "loss": 0.6827, + "step": 5317 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015712272781750035, + "loss": 0.787, + "step": 5318 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015710779667797814, + "loss": 0.719, + "step": 5319 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015709286364889814, + "loss": 0.6357, + "step": 5320 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015707792873075445, + "loss": 0.7439, + "step": 5321 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015706299192404122, + "loss": 0.7221, + "step": 5322 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001570480532292527, + "loss": 0.6704, + "step": 5323 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015703311264688309, + "loss": 0.75, + "step": 5324 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015701817017742684, + "loss": 0.7816, + "step": 5325 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015700322582137827, + "loss": 0.7113, + "step": 5326 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015698827957923194, + "loss": 0.704, + "step": 5327 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001569733314514823, + "loss": 0.6707, + "step": 5328 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015695838143862405, + "loss": 0.704, + "step": 5329 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015694342954115174, + "loss": 0.6711, + "step": 5330 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015692847575956016, + "loss": 0.7605, + "step": 5331 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001569135200943441, + "loss": 0.7233, + "step": 5332 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015689856254599832, + "loss": 0.7543, + "step": 5333 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015688360311501783, + "loss": 0.6921, + "step": 5334 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015686864180189757, + "loss": 0.7464, + "step": 5335 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015685367860713255, + "loss": 0.7362, + "step": 5336 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015683871353121788, + "loss": 0.7159, + "step": 5337 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001568237465746487, + "loss": 0.7613, + "step": 5338 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001568087777379203, + "loss": 0.6609, + "step": 5339 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015679380702152788, + "loss": 0.6487, + "step": 5340 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015677883442596682, + "loss": 0.6534, + "step": 5341 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015676385995173252, + "loss": 0.7037, + "step": 5342 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015674888359932043, + "loss": 0.6933, + "step": 5343 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015673390536922611, + "loss": 0.7115, + "step": 5344 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015671892526194516, + "loss": 0.7013, + "step": 5345 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015670394327797323, + "loss": 0.7082, + "step": 5346 + }, + { + "epoch": 1.24, + "learning_rate": 0.000156688959417806, + "loss": 0.648, + "step": 5347 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015667397368193932, + "loss": 0.7009, + "step": 5348 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015665898607086892, + "loss": 0.6999, + "step": 5349 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015664399658509083, + "loss": 0.7247, + "step": 5350 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015662900522510096, + "loss": 0.7366, + "step": 5351 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001566140119913953, + "loss": 0.6563, + "step": 5352 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015659901688446997, + "loss": 0.7142, + "step": 5353 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015658401990482118, + "loss": 0.7295, + "step": 5354 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015656902105294504, + "loss": 0.7254, + "step": 5355 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001565540203293379, + "loss": 0.7472, + "step": 5356 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015653901773449605, + "loss": 0.6438, + "step": 5357 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015652401326891595, + "loss": 0.6673, + "step": 5358 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015650900693309398, + "loss": 0.7681, + "step": 5359 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001564939987275267, + "loss": 0.6815, + "step": 5360 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015647898865271077, + "loss": 0.7082, + "step": 5361 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001564639767091427, + "loss": 0.721, + "step": 5362 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015644896289731928, + "loss": 0.6493, + "step": 5363 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001564339472177373, + "loss": 0.7687, + "step": 5364 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015641892967089354, + "loss": 0.6792, + "step": 5365 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015640391025728488, + "loss": 0.6657, + "step": 5366 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015638888897740838, + "loss": 0.6801, + "step": 5367 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015637386583176093, + "loss": 0.6697, + "step": 5368 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015635884082083968, + "loss": 0.7151, + "step": 5369 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015634381394514176, + "loss": 0.7314, + "step": 5370 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015632878520516436, + "loss": 0.723, + "step": 5371 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015631375460140476, + "loss": 0.7339, + "step": 5372 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015629872213436028, + "loss": 0.7379, + "step": 5373 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015628368780452832, + "loss": 0.7265, + "step": 5374 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001562686516124063, + "loss": 0.7533, + "step": 5375 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015625361355849177, + "loss": 0.7242, + "step": 5376 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015623857364328227, + "loss": 0.6795, + "step": 5377 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015622353186727544, + "loss": 0.711, + "step": 5378 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015620848823096897, + "loss": 0.7575, + "step": 5379 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015619344273486063, + "loss": 0.696, + "step": 5380 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015617839537944824, + "loss": 0.7155, + "step": 5381 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001561633461652297, + "loss": 0.7998, + "step": 5382 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001561482950927029, + "loss": 0.7523, + "step": 5383 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001561332421623659, + "loss": 0.7423, + "step": 5384 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001561181873747167, + "loss": 0.6893, + "step": 5385 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015610313073025347, + "loss": 0.732, + "step": 5386 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001560880722294744, + "loss": 0.7088, + "step": 5387 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015607301187287774, + "loss": 0.7274, + "step": 5388 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015605794966096177, + "loss": 0.6843, + "step": 5389 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015604288559422491, + "loss": 0.6603, + "step": 5390 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015602781967316554, + "loss": 0.6858, + "step": 5391 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015601275189828219, + "loss": 0.769, + "step": 5392 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001559976822700734, + "loss": 0.6851, + "step": 5393 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001559826107890378, + "loss": 0.6809, + "step": 5394 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015596753745567406, + "loss": 0.7161, + "step": 5395 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001559524622704809, + "loss": 0.6185, + "step": 5396 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015593738523395716, + "loss": 0.6461, + "step": 5397 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015592230634660167, + "loss": 0.7389, + "step": 5398 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001559072256089134, + "loss": 0.7177, + "step": 5399 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001558921430213913, + "loss": 0.7615, + "step": 5400 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001558770585845344, + "loss": 0.745, + "step": 5401 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015586197229884184, + "loss": 0.781, + "step": 5402 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015584688416481277, + "loss": 0.6849, + "step": 5403 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015583179418294642, + "loss": 0.7185, + "step": 5404 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015581670235374211, + "loss": 0.7088, + "step": 5405 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015580160867769913, + "loss": 0.7041, + "step": 5406 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015578651315531695, + "loss": 0.68, + "step": 5407 + }, + { + "epoch": 1.25, + "learning_rate": 0.000155771415787095, + "loss": 0.7677, + "step": 5408 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015575631657353288, + "loss": 0.6686, + "step": 5409 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015574121551513011, + "loss": 0.7174, + "step": 5410 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015572611261238642, + "loss": 0.7535, + "step": 5411 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015571100786580146, + "loss": 0.6805, + "step": 5412 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015569590127587505, + "loss": 0.7128, + "step": 5413 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015568079284310703, + "loss": 0.7669, + "step": 5414 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001556656825679973, + "loss": 0.7198, + "step": 5415 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001556505704510458, + "loss": 0.652, + "step": 5416 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015563545649275256, + "loss": 0.6928, + "step": 5417 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015562034069361766, + "loss": 0.6619, + "step": 5418 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015560522305414125, + "loss": 0.6715, + "step": 5419 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001555901035748236, + "loss": 0.6871, + "step": 5420 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015557498225616487, + "loss": 0.7593, + "step": 5421 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015555985909866544, + "loss": 0.6912, + "step": 5422 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001555447341028257, + "loss": 0.716, + "step": 5423 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001555296072691461, + "loss": 0.7232, + "step": 5424 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015551447859812716, + "loss": 0.6873, + "step": 5425 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015549934809026941, + "loss": 0.6761, + "step": 5426 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001554842157460735, + "loss": 0.6692, + "step": 5427 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015546908156604015, + "loss": 0.6214, + "step": 5428 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015545394555067005, + "loss": 0.7082, + "step": 5429 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001554388077004641, + "loss": 0.7539, + "step": 5430 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015542366801592311, + "loss": 0.7316, + "step": 5431 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015540852649754806, + "loss": 0.7609, + "step": 5432 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015539338314583988, + "loss": 0.6957, + "step": 5433 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015537823796129971, + "loss": 0.7272, + "step": 5434 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015536309094442863, + "loss": 0.7821, + "step": 5435 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015534794209572778, + "loss": 0.6709, + "step": 5436 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015533279141569843, + "loss": 0.7704, + "step": 5437 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001553176389048419, + "loss": 0.703, + "step": 5438 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015530248456365953, + "loss": 0.6875, + "step": 5439 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015528732839265272, + "loss": 0.7356, + "step": 5440 + }, + { + "epoch": 1.26, + "learning_rate": 0.000155272170392323, + "loss": 0.7078, + "step": 5441 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015525701056317187, + "loss": 0.6774, + "step": 5442 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015524184890570094, + "loss": 0.7454, + "step": 5443 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015522668542041187, + "loss": 0.6794, + "step": 5444 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015521152010780642, + "loss": 0.7714, + "step": 5445 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015519635296838634, + "loss": 0.6947, + "step": 5446 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015518118400265345, + "loss": 0.7266, + "step": 5447 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015516601321110968, + "loss": 0.7108, + "step": 5448 + }, + { + "epoch": 1.26, + "learning_rate": 0.000155150840594257, + "loss": 0.7489, + "step": 5449 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015513566615259742, + "loss": 0.7167, + "step": 5450 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015512048988663307, + "loss": 0.6687, + "step": 5451 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015510531179686602, + "loss": 0.6977, + "step": 5452 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015509013188379854, + "loss": 0.7126, + "step": 5453 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001550749501479329, + "loss": 0.7436, + "step": 5454 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015505976658977138, + "loss": 0.5818, + "step": 5455 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015504458120981632, + "loss": 0.6792, + "step": 5456 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015502939400857034, + "loss": 0.6662, + "step": 5457 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015501420498653578, + "loss": 0.6232, + "step": 5458 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001549990141442153, + "loss": 0.6668, + "step": 5459 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015498382148211145, + "loss": 0.7566, + "step": 5460 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015496862700072698, + "loss": 0.729, + "step": 5461 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015495343070056464, + "loss": 0.7556, + "step": 5462 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015493823258212722, + "loss": 0.7765, + "step": 5463 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001549230326459176, + "loss": 0.7266, + "step": 5464 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001549078308924387, + "loss": 0.6456, + "step": 5465 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015489262732219352, + "loss": 0.7222, + "step": 5466 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015487742193568503, + "loss": 0.681, + "step": 5467 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015486221473341648, + "loss": 0.6887, + "step": 5468 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015484700571589095, + "loss": 0.7154, + "step": 5469 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015483179488361167, + "loss": 0.6497, + "step": 5470 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015481658223708196, + "loss": 0.6862, + "step": 5471 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015480136777680514, + "loss": 0.7414, + "step": 5472 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015478615150328463, + "loss": 0.6986, + "step": 5473 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015477093341702394, + "loss": 0.7183, + "step": 5474 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001547557135185265, + "loss": 0.6895, + "step": 5475 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015474049180829602, + "loss": 0.7242, + "step": 5476 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015472526828683603, + "loss": 0.6697, + "step": 5477 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015471004295465035, + "loss": 0.7585, + "step": 5478 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015469481581224272, + "loss": 0.7699, + "step": 5479 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015467958686011689, + "loss": 0.7497, + "step": 5480 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015466435609877682, + "loss": 0.6666, + "step": 5481 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015464912352872648, + "loss": 0.6701, + "step": 5482 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015463388915046983, + "loss": 0.7171, + "step": 5483 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015461865296451094, + "loss": 0.6837, + "step": 5484 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015460341497135395, + "loss": 0.7132, + "step": 5485 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015458817517150305, + "loss": 0.6877, + "step": 5486 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015457293356546252, + "loss": 0.7375, + "step": 5487 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001545576901537366, + "loss": 0.7164, + "step": 5488 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001545424449368297, + "loss": 0.753, + "step": 5489 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015452719791524624, + "loss": 0.6215, + "step": 5490 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015451194908949073, + "loss": 0.7405, + "step": 5491 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015449669846006765, + "loss": 0.6539, + "step": 5492 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015449669846006765, + "loss": 0.7083, + "step": 5493 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015448144602748167, + "loss": 0.7882, + "step": 5494 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015446619179223741, + "loss": 0.657, + "step": 5495 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015445093575483968, + "loss": 0.7455, + "step": 5496 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015443567791579316, + "loss": 0.7543, + "step": 5497 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015442041827560274, + "loss": 0.7413, + "step": 5498 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015440515683477334, + "loss": 0.7048, + "step": 5499 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001543898935938099, + "loss": 0.6354, + "step": 5500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015437462855321746, + "loss": 0.7136, + "step": 5501 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015435936171350107, + "loss": 0.6734, + "step": 5502 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015434409307516593, + "loss": 0.6775, + "step": 5503 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015432882263871717, + "loss": 0.701, + "step": 5504 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015431355040466013, + "loss": 0.6759, + "step": 5505 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015429827637350007, + "loss": 0.6878, + "step": 5506 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001542830005457424, + "loss": 0.6851, + "step": 5507 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015426772292189255, + "loss": 0.7414, + "step": 5508 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015425244350245602, + "loss": 0.7072, + "step": 5509 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015423716228793834, + "loss": 0.652, + "step": 5510 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015422187927884525, + "loss": 0.6775, + "step": 5511 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015420659447568222, + "loss": 0.736, + "step": 5512 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015419130787895516, + "loss": 0.6885, + "step": 5513 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001541760194891698, + "loss": 0.7175, + "step": 5514 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015416072930683202, + "loss": 0.7097, + "step": 5515 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015414543733244772, + "loss": 0.6895, + "step": 5516 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015413014356652286, + "loss": 0.7429, + "step": 5517 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015411484800956348, + "loss": 0.7122, + "step": 5518 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001540995506620757, + "loss": 0.6934, + "step": 5519 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015408425152456563, + "loss": 0.6821, + "step": 5520 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001540689505975395, + "loss": 0.6946, + "step": 5521 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001540536478815036, + "loss": 0.7108, + "step": 5522 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015403834337696424, + "loss": 0.6905, + "step": 5523 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015402303708442783, + "loss": 0.7327, + "step": 5524 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015400772900440077, + "loss": 0.7169, + "step": 5525 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001539924191373896, + "loss": 0.6602, + "step": 5526 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001539771074839009, + "loss": 0.6371, + "step": 5527 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015396179404444124, + "loss": 0.741, + "step": 5528 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001539464788195174, + "loss": 0.6989, + "step": 5529 + }, + { + "epoch": 1.28, + "learning_rate": 0.000153931161809636, + "loss": 0.7565, + "step": 5530 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015391584301530396, + "loss": 0.7506, + "step": 5531 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015390052243702805, + "loss": 0.6911, + "step": 5532 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015388520007531524, + "loss": 0.7319, + "step": 5533 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015386987593067253, + "loss": 0.6693, + "step": 5534 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001538545500036069, + "loss": 0.7156, + "step": 5535 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015383922229462549, + "loss": 0.7084, + "step": 5536 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015382389280423542, + "loss": 0.6447, + "step": 5537 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015380856153294397, + "loss": 0.6903, + "step": 5538 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015379322848125832, + "loss": 0.7594, + "step": 5539 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015377789364968587, + "loss": 0.7209, + "step": 5540 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015376255703873404, + "loss": 0.6788, + "step": 5541 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015374721864891018, + "loss": 0.7116, + "step": 5542 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015373187848072192, + "loss": 0.7663, + "step": 5543 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001537165365346767, + "loss": 0.6818, + "step": 5544 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015370119281128228, + "loss": 0.6723, + "step": 5545 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015368584731104625, + "loss": 0.6938, + "step": 5546 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015367050003447637, + "loss": 0.6852, + "step": 5547 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001536551509820805, + "loss": 0.6399, + "step": 5548 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015363980015436644, + "loss": 0.69, + "step": 5549 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015362444755184212, + "loss": 0.6819, + "step": 5550 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015360909317501555, + "loss": 0.7332, + "step": 5551 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015359373702439477, + "loss": 0.6889, + "step": 5552 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015357837910048784, + "loss": 0.6694, + "step": 5553 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015356301940380298, + "loss": 0.7817, + "step": 5554 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015354765793484834, + "loss": 0.7484, + "step": 5555 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015353229469413217, + "loss": 0.751, + "step": 5556 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001535169296821629, + "loss": 0.673, + "step": 5557 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015350156289944885, + "loss": 0.7204, + "step": 5558 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015348619434649852, + "loss": 0.7315, + "step": 5559 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015347082402382035, + "loss": 0.6864, + "step": 5560 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015345545193192297, + "loss": 0.717, + "step": 5561 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015344007807131495, + "loss": 0.6874, + "step": 5562 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015342470244250503, + "loss": 0.7277, + "step": 5563 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015340932504600188, + "loss": 0.7692, + "step": 5564 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015339394588231438, + "loss": 0.7395, + "step": 5565 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015337856495195137, + "loss": 0.7304, + "step": 5566 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001533631822554217, + "loss": 0.7775, + "step": 5567 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015334779779323444, + "loss": 0.6936, + "step": 5568 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015333241156589855, + "loss": 0.6473, + "step": 5569 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015331702357392318, + "loss": 0.6723, + "step": 5570 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015330163381781749, + "loss": 0.6929, + "step": 5571 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001532862422980906, + "loss": 0.7466, + "step": 5572 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015327084901525182, + "loss": 0.7057, + "step": 5573 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001532554539698105, + "loss": 0.6947, + "step": 5574 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015324005716227607, + "loss": 0.713, + "step": 5575 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015322465859315786, + "loss": 0.7134, + "step": 5576 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015320925826296545, + "loss": 0.718, + "step": 5577 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015319385617220836, + "loss": 0.6731, + "step": 5578 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001531784523213962, + "loss": 0.6967, + "step": 5579 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001531630467110387, + "loss": 0.6893, + "step": 5580 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001531476393416456, + "loss": 0.7108, + "step": 5581 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015313223021372663, + "loss": 0.7128, + "step": 5582 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015311681932779164, + "loss": 0.7618, + "step": 5583 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015310140668435058, + "loss": 0.6977, + "step": 5584 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015308599228391342, + "loss": 0.6991, + "step": 5585 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001530705761269902, + "loss": 0.6616, + "step": 5586 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015305515821409092, + "loss": 0.6693, + "step": 5587 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015303973854572577, + "loss": 0.676, + "step": 5588 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015302431712240497, + "loss": 0.6724, + "step": 5589 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015300889394463871, + "loss": 0.6504, + "step": 5590 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015299346901293743, + "loss": 0.7938, + "step": 5591 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015297804232781138, + "loss": 0.662, + "step": 5592 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015296261388977108, + "loss": 0.74, + "step": 5593 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015294718369932692, + "loss": 0.7233, + "step": 5594 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015293175175698953, + "loss": 0.64, + "step": 5595 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015291631806326947, + "loss": 0.6816, + "step": 5596 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015290088261867748, + "loss": 0.7381, + "step": 5597 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001528854454237242, + "loss": 0.6644, + "step": 5598 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001528700064789204, + "loss": 0.6661, + "step": 5599 + }, + { + "epoch": 1.3, + "learning_rate": 0.000152854565784777, + "loss": 0.6199, + "step": 5600 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001528391233418048, + "loss": 0.7011, + "step": 5601 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015282367915051486, + "loss": 0.7196, + "step": 5602 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015280823321141808, + "loss": 0.6804, + "step": 5603 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001527927855250256, + "loss": 0.6653, + "step": 5604 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015277733609184852, + "loss": 0.7461, + "step": 5605 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015276188491239803, + "loss": 0.6657, + "step": 5606 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015274643198718538, + "loss": 0.7566, + "step": 5607 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001527309773167218, + "loss": 0.689, + "step": 5608 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001527155209015188, + "loss": 0.7067, + "step": 5609 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015270006274208765, + "loss": 0.667, + "step": 5610 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001526846028389399, + "loss": 0.6683, + "step": 5611 + }, + { + "epoch": 1.3, + "learning_rate": 0.000152669141192587, + "loss": 0.6714, + "step": 5612 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015265367780354066, + "loss": 0.7001, + "step": 5613 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015263821267231243, + "loss": 0.6963, + "step": 5614 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015262274579941405, + "loss": 0.7578, + "step": 5615 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015260727718535727, + "loss": 0.7479, + "step": 5616 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001525918068306539, + "loss": 0.6954, + "step": 5617 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015257633473581583, + "loss": 0.6623, + "step": 5618 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015256086090135505, + "loss": 0.6434, + "step": 5619 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015254538532778343, + "loss": 0.6838, + "step": 5620 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001525299080156131, + "loss": 0.7002, + "step": 5621 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015251442896535618, + "loss": 0.6826, + "step": 5622 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015249894817752477, + "loss": 0.7232, + "step": 5623 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015248346565263114, + "loss": 0.7384, + "step": 5624 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015246798139118757, + "loss": 0.7251, + "step": 5625 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015245249539370636, + "loss": 0.7295, + "step": 5626 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015243700766069993, + "loss": 0.6879, + "step": 5627 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015242151819268074, + "loss": 0.7053, + "step": 5628 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015240602699016128, + "loss": 0.6642, + "step": 5629 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015239053405365408, + "loss": 0.7318, + "step": 5630 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015237503938367186, + "loss": 0.7483, + "step": 5631 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015235954298072724, + "loss": 0.682, + "step": 5632 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015234404484533294, + "loss": 0.7201, + "step": 5633 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015232854497800177, + "loss": 0.67, + "step": 5634 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015231304337924662, + "loss": 0.6761, + "step": 5635 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015229754004958038, + "loss": 0.6893, + "step": 5636 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015228203498951598, + "loss": 0.6891, + "step": 5637 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001522665281995665, + "loss": 0.6751, + "step": 5638 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015225101968024504, + "loss": 0.6895, + "step": 5639 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015223550943206462, + "loss": 0.6154, + "step": 5640 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015221999745553854, + "loss": 0.7101, + "step": 5641 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015220448375118, + "loss": 0.7441, + "step": 5642 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015218896831950237, + "loss": 0.7386, + "step": 5643 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015217345116101902, + "loss": 0.7068, + "step": 5644 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015215793227624328, + "loss": 0.6744, + "step": 5645 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001521424116656887, + "loss": 0.7385, + "step": 5646 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001521268893298688, + "loss": 0.7022, + "step": 5647 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001521113652692972, + "loss": 0.7673, + "step": 5648 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015209583948448753, + "loss": 0.6874, + "step": 5649 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015208031197595356, + "loss": 0.7132, + "step": 5650 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015206478274420895, + "loss": 0.6985, + "step": 5651 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001520492517897676, + "loss": 0.7084, + "step": 5652 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001520337191131434, + "loss": 0.6884, + "step": 5653 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001520181847148502, + "loss": 0.6769, + "step": 5654 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015200264859540214, + "loss": 0.6944, + "step": 5655 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015198711075531314, + "loss": 0.7186, + "step": 5656 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015197157119509736, + "loss": 0.7517, + "step": 5657 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015195602991526896, + "loss": 0.6427, + "step": 5658 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001519404869163422, + "loss": 0.7075, + "step": 5659 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001519249421988313, + "loss": 0.7226, + "step": 5660 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015190939576325066, + "loss": 0.6844, + "step": 5661 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015189384761011461, + "loss": 0.7503, + "step": 5662 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015187829773993762, + "loss": 0.6064, + "step": 5663 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015186274615323417, + "loss": 0.7497, + "step": 5664 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001518471928505189, + "loss": 0.6359, + "step": 5665 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015183163783230645, + "loss": 0.7164, + "step": 5666 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015181608109911138, + "loss": 0.7967, + "step": 5667 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015180052265144846, + "loss": 0.6336, + "step": 5668 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015178496248983254, + "loss": 0.7086, + "step": 5669 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015176940061477841, + "loss": 0.7549, + "step": 5670 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015175383702680106, + "loss": 0.6785, + "step": 5671 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015173827172641535, + "loss": 0.6738, + "step": 5672 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015172270471413632, + "loss": 0.6379, + "step": 5673 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015170713599047905, + "loss": 0.7746, + "step": 5674 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001516915655559587, + "loss": 0.6663, + "step": 5675 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015167599341109046, + "loss": 0.667, + "step": 5676 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015166041955638954, + "loss": 0.7281, + "step": 5677 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015164484399237123, + "loss": 0.621, + "step": 5678 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015162926671955093, + "loss": 0.6725, + "step": 5679 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015161368773844403, + "loss": 0.741, + "step": 5680 + }, + { + "epoch": 1.32, + "learning_rate": 0.000151598107049566, + "loss": 0.7177, + "step": 5681 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015158252465343242, + "loss": 0.6734, + "step": 5682 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015156694055055877, + "loss": 0.7304, + "step": 5683 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015155135474146077, + "loss": 0.6733, + "step": 5684 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001515357672266541, + "loss": 0.6673, + "step": 5685 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015152017800665451, + "loss": 0.6996, + "step": 5686 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001515045870819778, + "loss": 0.6494, + "step": 5687 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015148899445313981, + "loss": 0.7323, + "step": 5688 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015147340012065653, + "loss": 0.6509, + "step": 5689 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015145780408504388, + "loss": 0.7218, + "step": 5690 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015144220634681795, + "loss": 0.7129, + "step": 5691 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015142660690649478, + "loss": 0.7788, + "step": 5692 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015141100576459052, + "loss": 0.7135, + "step": 5693 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001513954029216214, + "loss": 0.725, + "step": 5694 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001513797983781037, + "loss": 0.6612, + "step": 5695 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015136419213455365, + "loss": 0.717, + "step": 5696 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015134858419148768, + "loss": 0.6983, + "step": 5697 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015133297454942226, + "loss": 0.7222, + "step": 5698 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001513173632088738, + "loss": 0.7416, + "step": 5699 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001513017501703589, + "loss": 0.7109, + "step": 5700 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001512861354343941, + "loss": 0.6976, + "step": 5701 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001512705190014961, + "loss": 0.692, + "step": 5702 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001512549008721816, + "loss": 0.6511, + "step": 5703 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015123928104696734, + "loss": 0.683, + "step": 5704 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015122365952637017, + "loss": 0.6943, + "step": 5705 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015120803631090695, + "loss": 0.7531, + "step": 5706 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015119241140109467, + "loss": 0.6709, + "step": 5707 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015117678479745022, + "loss": 0.7227, + "step": 5708 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001511611565004907, + "loss": 0.6956, + "step": 5709 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015114552651073325, + "loss": 0.7029, + "step": 5710 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015112989482869498, + "loss": 0.6964, + "step": 5711 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001511142614548931, + "loss": 0.7074, + "step": 5712 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001510986263898449, + "loss": 0.714, + "step": 5713 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001510829896340677, + "loss": 0.7128, + "step": 5714 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001510673511880789, + "loss": 0.7284, + "step": 5715 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001510517110523959, + "loss": 0.6557, + "step": 5716 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001510360692275362, + "loss": 0.7105, + "step": 5717 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001510204257140174, + "loss": 0.661, + "step": 5718 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015100478051235705, + "loss": 0.6846, + "step": 5719 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015098913362307284, + "loss": 0.7302, + "step": 5720 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001509734850466825, + "loss": 0.7429, + "step": 5721 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015095783478370374, + "loss": 0.6727, + "step": 5722 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015094218283465445, + "loss": 0.6994, + "step": 5723 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001509265292000525, + "loss": 0.6899, + "step": 5724 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015091087388041581, + "loss": 0.7679, + "step": 5725 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015089521687626243, + "loss": 0.7286, + "step": 5726 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015087955818811037, + "loss": 0.6608, + "step": 5727 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001508638978164777, + "loss": 0.7242, + "step": 5728 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001508482357618827, + "loss": 0.6788, + "step": 5729 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015083257202484348, + "loss": 0.6482, + "step": 5730 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015081690660587836, + "loss": 0.6766, + "step": 5731 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015080123950550562, + "loss": 0.6688, + "step": 5732 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015078557072424373, + "loss": 0.7271, + "step": 5733 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015076990026261107, + "loss": 0.7041, + "step": 5734 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001507542281211262, + "loss": 0.7208, + "step": 5735 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015073855430030756, + "loss": 0.6652, + "step": 5736 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015072287880067386, + "loss": 0.7469, + "step": 5737 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015070720162274373, + "loss": 0.7166, + "step": 5738 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015069152276703593, + "loss": 0.7184, + "step": 5739 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015067584223406918, + "loss": 0.7553, + "step": 5740 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001506601600243623, + "loss": 0.7287, + "step": 5741 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015064447613843423, + "loss": 0.6969, + "step": 5742 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015062879057680385, + "loss": 0.6912, + "step": 5743 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015061310333999023, + "loss": 0.6803, + "step": 5744 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001505974144285124, + "loss": 0.6983, + "step": 5745 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001505817238428894, + "loss": 0.7334, + "step": 5746 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001505660315836405, + "loss": 0.7337, + "step": 5747 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001505503376512848, + "loss": 0.7422, + "step": 5748 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001505346420463417, + "loss": 0.6568, + "step": 5749 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015051894476933043, + "loss": 0.7088, + "step": 5750 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015050324582077047, + "loss": 0.6751, + "step": 5751 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015048754520118114, + "loss": 0.7372, + "step": 5752 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015047184291108203, + "loss": 0.7518, + "step": 5753 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015045613895099265, + "loss": 0.7284, + "step": 5754 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001504404333214326, + "loss": 0.7324, + "step": 5755 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001504247260229216, + "loss": 0.723, + "step": 5756 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015040901705597927, + "loss": 0.7773, + "step": 5757 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015039330642112545, + "loss": 0.627, + "step": 5758 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015037759411887995, + "loss": 0.6988, + "step": 5759 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015036188014976265, + "loss": 0.7303, + "step": 5760 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015034616451429353, + "loss": 0.6967, + "step": 5761 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001503304472129925, + "loss": 0.7388, + "step": 5762 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015031472824637967, + "loss": 0.6789, + "step": 5763 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015029900761497506, + "loss": 0.732, + "step": 5764 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015028328531929895, + "loss": 0.6603, + "step": 5765 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015026756135987145, + "loss": 0.7064, + "step": 5766 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015025183573721292, + "loss": 0.7027, + "step": 5767 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001502361084518436, + "loss": 0.7784, + "step": 5768 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015022037950428386, + "loss": 0.7254, + "step": 5769 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001502046488950542, + "loss": 0.6812, + "step": 5770 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015018891662467508, + "loss": 0.6983, + "step": 5771 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015017318269366708, + "loss": 0.6897, + "step": 5772 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001501574471025507, + "loss": 0.6675, + "step": 5773 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015014170985184667, + "loss": 0.6939, + "step": 5774 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015012597094207566, + "loss": 0.664, + "step": 5775 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015011023037375844, + "loss": 0.6432, + "step": 5776 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001500944881474159, + "loss": 0.7147, + "step": 5777 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001500787442635688, + "loss": 0.7011, + "step": 5778 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015006299872273812, + "loss": 0.7045, + "step": 5779 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015004725152544484, + "loss": 0.7526, + "step": 5780 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015003150267220997, + "loss": 0.7562, + "step": 5781 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015001575216355465, + "loss": 0.7324, + "step": 5782 + }, + { + "epoch": 1.34, + "learning_rate": 0.00015000000000000001, + "loss": 0.6731, + "step": 5783 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014998424618206722, + "loss": 0.6366, + "step": 5784 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014996849071027759, + "loss": 0.7265, + "step": 5785 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014995273358515236, + "loss": 0.7293, + "step": 5786 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001499369748072129, + "loss": 0.6169, + "step": 5787 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014992121437698073, + "loss": 0.6889, + "step": 5788 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014990545229497722, + "loss": 0.6897, + "step": 5789 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014988968856172394, + "loss": 0.7054, + "step": 5790 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014987392317774247, + "loss": 0.7071, + "step": 5791 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001498581561435544, + "loss": 0.7374, + "step": 5792 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014984238745968152, + "loss": 0.7247, + "step": 5793 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014982661712664553, + "loss": 0.7245, + "step": 5794 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014981084514496823, + "loss": 0.7421, + "step": 5795 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014979507151517145, + "loss": 0.7093, + "step": 5796 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014977929623777712, + "loss": 0.7565, + "step": 5797 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014976351931330722, + "loss": 0.6528, + "step": 5798 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014974774074228377, + "loss": 0.7436, + "step": 5799 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001497319605252288, + "loss": 0.687, + "step": 5800 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014971617866266448, + "loss": 0.6368, + "step": 5801 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014970039515511304, + "loss": 0.7235, + "step": 5802 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014968461000309662, + "loss": 0.7643, + "step": 5803 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014966882320713757, + "loss": 0.6908, + "step": 5804 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001496530347677582, + "loss": 0.7563, + "step": 5805 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014963724468548096, + "loss": 0.6949, + "step": 5806 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014962145296082828, + "loss": 0.7119, + "step": 5807 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014960565959432266, + "loss": 0.7721, + "step": 5808 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014958986458648668, + "loss": 0.6802, + "step": 5809 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014957406793784293, + "loss": 0.7157, + "step": 5810 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014955826964891414, + "loss": 0.707, + "step": 5811 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014954246972022296, + "loss": 0.7236, + "step": 5812 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014952666815229223, + "loss": 0.7575, + "step": 5813 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014951086494564478, + "loss": 0.7226, + "step": 5814 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014949506010080346, + "loss": 0.7179, + "step": 5815 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014947925361829127, + "loss": 0.7269, + "step": 5816 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001494634454986312, + "loss": 0.6439, + "step": 5817 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014944763574234623, + "loss": 0.7114, + "step": 5818 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014943182434995955, + "loss": 0.7034, + "step": 5819 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014941601132199427, + "loss": 0.7273, + "step": 5820 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001494001966589736, + "loss": 0.7028, + "step": 5821 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014938438036142087, + "loss": 0.7548, + "step": 5822 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014936856242985937, + "loss": 0.6326, + "step": 5823 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014935274286481245, + "loss": 0.7072, + "step": 5824 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014933692166680355, + "loss": 0.7242, + "step": 5825 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014932109883635618, + "loss": 0.7017, + "step": 5826 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001493052743739939, + "loss": 0.6424, + "step": 5827 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014928944828024023, + "loss": 0.7062, + "step": 5828 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014927362055561883, + "loss": 0.7469, + "step": 5829 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001492577912006535, + "loss": 0.7117, + "step": 5830 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014924196021586785, + "loss": 0.7911, + "step": 5831 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014922612760178577, + "loss": 0.7231, + "step": 5832 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014921029335893113, + "loss": 0.6614, + "step": 5833 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014919445748782782, + "loss": 0.6983, + "step": 5834 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014917861998899984, + "loss": 0.6791, + "step": 5835 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014916278086297117, + "loss": 0.6506, + "step": 5836 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014914694011026593, + "loss": 0.646, + "step": 5837 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014913109773140822, + "loss": 0.6768, + "step": 5838 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014911525372692224, + "loss": 0.6995, + "step": 5839 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014909940809733222, + "loss": 0.6355, + "step": 5840 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001490835608431625, + "loss": 0.7389, + "step": 5841 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014906771196493735, + "loss": 0.6835, + "step": 5842 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001490518614631812, + "loss": 0.7012, + "step": 5843 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014903600933841853, + "loss": 0.7002, + "step": 5844 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014902015559117385, + "loss": 0.6958, + "step": 5845 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014900430022197168, + "loss": 0.7232, + "step": 5846 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001489884432313367, + "loss": 0.7206, + "step": 5847 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014897258461979348, + "loss": 0.7019, + "step": 5848 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001489567243878668, + "loss": 0.7194, + "step": 5849 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014894086253608148, + "loss": 0.6698, + "step": 5850 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014892499906496228, + "loss": 0.7083, + "step": 5851 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014890913397503413, + "loss": 0.7616, + "step": 5852 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014889326726682197, + "loss": 0.6664, + "step": 5853 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001488773989408507, + "loss": 0.6983, + "step": 5854 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014886152899764546, + "loss": 0.7577, + "step": 5855 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014884565743773136, + "loss": 0.6901, + "step": 5856 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001488297842616335, + "loss": 0.7298, + "step": 5857 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014881390946987707, + "loss": 0.738, + "step": 5858 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014879803306298736, + "loss": 0.6938, + "step": 5859 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014878215504148968, + "loss": 0.7276, + "step": 5860 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014876627540590941, + "loss": 0.7702, + "step": 5861 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014875039415677196, + "loss": 0.7334, + "step": 5862 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014873451129460277, + "loss": 0.6758, + "step": 5863 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001487186268199274, + "loss": 0.6236, + "step": 5864 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014870274073327137, + "loss": 0.688, + "step": 5865 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001486868530351604, + "loss": 0.6837, + "step": 5866 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014867096372612012, + "loss": 0.7247, + "step": 5867 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014865507280667628, + "loss": 0.6831, + "step": 5868 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014863918027735467, + "loss": 0.6854, + "step": 5869 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014862328613868115, + "loss": 0.7122, + "step": 5870 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014860739039118158, + "loss": 0.7541, + "step": 5871 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014859149303538197, + "loss": 0.7668, + "step": 5872 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001485755940718083, + "loss": 0.6887, + "step": 5873 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014855969350098663, + "loss": 0.6874, + "step": 5874 + }, + { + "epoch": 1.36, + "learning_rate": 0.000148543791323443, + "loss": 0.7115, + "step": 5875 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014852788753970367, + "loss": 0.7424, + "step": 5876 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014851198215029484, + "loss": 0.7182, + "step": 5877 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014849607515574276, + "loss": 0.7542, + "step": 5878 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014848016655657375, + "loss": 0.6991, + "step": 5879 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014846425635331418, + "loss": 0.7424, + "step": 5880 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484483445464905, + "loss": 0.7358, + "step": 5881 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014843243113662918, + "loss": 0.7429, + "step": 5882 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484165161242568, + "loss": 0.6949, + "step": 5883 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484005995098999, + "loss": 0.74, + "step": 5884 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001483846812940851, + "loss": 0.8488, + "step": 5885 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014836876147733918, + "loss": 0.7137, + "step": 5886 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014835284006018882, + "loss": 0.6787, + "step": 5887 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001483369170431608, + "loss": 0.6464, + "step": 5888 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014832099242678205, + "loss": 0.6492, + "step": 5889 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014830506621157946, + "loss": 0.6915, + "step": 5890 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014828913839807993, + "loss": 0.7693, + "step": 5891 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001482732089868105, + "loss": 0.7087, + "step": 5892 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014825727797829826, + "loss": 0.7293, + "step": 5893 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014824134537307033, + "loss": 0.8153, + "step": 5894 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014822541117165384, + "loss": 0.7319, + "step": 5895 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014820947537457608, + "loss": 0.7222, + "step": 5896 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014819353798236427, + "loss": 0.7504, + "step": 5897 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014817759899554572, + "loss": 0.6521, + "step": 5898 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014816165841464787, + "loss": 0.7267, + "step": 5899 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014814571624019814, + "loss": 0.7216, + "step": 5900 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014812977247272402, + "loss": 0.7, + "step": 5901 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014811382711275303, + "loss": 0.735, + "step": 5902 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014809788016081275, + "loss": 0.7547, + "step": 5903 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001480819316174309, + "loss": 0.811, + "step": 5904 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001480659814831351, + "loss": 0.76, + "step": 5905 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014805002975845313, + "loss": 0.6951, + "step": 5906 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001480340764439128, + "loss": 0.7336, + "step": 5907 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014801812154004192, + "loss": 0.6764, + "step": 5908 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014800216504736848, + "loss": 0.7144, + "step": 5909 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014798620696642042, + "loss": 0.7043, + "step": 5910 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014797024729772568, + "loss": 0.7506, + "step": 5911 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014795428604181242, + "loss": 0.6277, + "step": 5912 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014793832319920868, + "loss": 0.7065, + "step": 5913 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001479223587704427, + "loss": 0.7686, + "step": 5914 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014790639275604267, + "loss": 0.7066, + "step": 5915 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014789042515653687, + "loss": 0.7728, + "step": 5916 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014787445597245363, + "loss": 0.7263, + "step": 5917 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014785848520432133, + "loss": 0.6765, + "step": 5918 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001478425128526684, + "loss": 0.6948, + "step": 5919 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014782653891802334, + "loss": 0.6908, + "step": 5920 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001478105634009147, + "loss": 0.7445, + "step": 5921 + }, + { + "epoch": 1.37, + "learning_rate": 0.000147794586301871, + "loss": 0.7468, + "step": 5922 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014777860762142097, + "loss": 0.7001, + "step": 5923 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014776262736009325, + "loss": 0.7724, + "step": 5924 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001477466455184166, + "loss": 0.7378, + "step": 5925 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014773066209691988, + "loss": 0.694, + "step": 5926 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014771467709613184, + "loss": 0.7426, + "step": 5927 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014769869051658148, + "loss": 0.7348, + "step": 5928 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014768270235879767, + "loss": 0.6756, + "step": 5929 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014766671262330948, + "loss": 0.7289, + "step": 5930 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014765072131064593, + "loss": 0.717, + "step": 5931 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001476347284213362, + "loss": 0.7583, + "step": 5932 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014761873395590936, + "loss": 0.7393, + "step": 5933 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014760273791489473, + "loss": 0.7225, + "step": 5934 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014758674029882152, + "loss": 0.737, + "step": 5935 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014757074110821904, + "loss": 0.6843, + "step": 5936 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014755474034361672, + "loss": 0.7388, + "step": 5937 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001475387380055439, + "loss": 0.781, + "step": 5938 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014752273409453015, + "loss": 0.6595, + "step": 5939 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014750672861110496, + "loss": 0.6743, + "step": 5940 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014749072155579787, + "loss": 0.6835, + "step": 5941 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014747471292913863, + "loss": 0.6924, + "step": 5942 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014745870273165677, + "loss": 0.6698, + "step": 5943 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014744269096388218, + "loss": 0.7044, + "step": 5944 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014742667762634456, + "loss": 0.6533, + "step": 5945 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014741066271957377, + "loss": 0.7347, + "step": 5946 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014739464624409976, + "loss": 0.6451, + "step": 5947 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001473786282004524, + "loss": 0.6624, + "step": 5948 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014736260858916165, + "loss": 0.6967, + "step": 5949 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014734658741075767, + "loss": 0.7512, + "step": 5950 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001473305646657705, + "loss": 0.7577, + "step": 5951 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014731454035473034, + "loss": 0.6889, + "step": 5952 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014729851447816737, + "loss": 0.6994, + "step": 5953 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014728248703661182, + "loss": 0.6983, + "step": 5954 + }, + { + "epoch": 1.38, + "learning_rate": 0.000147266458030594, + "loss": 0.6747, + "step": 5955 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014725042746064433, + "loss": 0.6663, + "step": 5956 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014723439532729317, + "loss": 0.72, + "step": 5957 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014721836163107102, + "loss": 0.7151, + "step": 5958 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014720232637250834, + "loss": 0.7233, + "step": 5959 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014718628955213574, + "loss": 0.6802, + "step": 5960 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014717025117048381, + "loss": 0.7075, + "step": 5961 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001471542112280833, + "loss": 0.7419, + "step": 5962 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014713816972546483, + "loss": 0.6998, + "step": 5963 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014712212666315923, + "loss": 0.6986, + "step": 5964 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014710608204169734, + "loss": 0.6324, + "step": 5965 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014709003586160994, + "loss": 0.7197, + "step": 5966 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014707398812342808, + "loss": 0.6438, + "step": 5967 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001470579388276827, + "loss": 0.7146, + "step": 5968 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014704188797490483, + "loss": 0.7217, + "step": 5969 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014702583556562554, + "loss": 0.7562, + "step": 5970 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014700978160037595, + "loss": 0.7289, + "step": 5971 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014699372607968725, + "loss": 0.7944, + "step": 5972 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014697766900409074, + "loss": 0.7116, + "step": 5973 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014696161037411768, + "loss": 0.6528, + "step": 5974 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014694555019029938, + "loss": 0.6887, + "step": 5975 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014692948845316722, + "loss": 0.6619, + "step": 5976 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014691342516325268, + "loss": 0.7516, + "step": 5977 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014689736032108728, + "loss": 0.6155, + "step": 5978 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014688129392720253, + "loss": 0.7902, + "step": 5979 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014686522598213002, + "loss": 0.7143, + "step": 5980 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014684915648640142, + "loss": 0.6633, + "step": 5981 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001468330854405484, + "loss": 0.6982, + "step": 5982 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014681701284510274, + "loss": 0.6571, + "step": 5983 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014680093870059625, + "loss": 0.6799, + "step": 5984 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001467848630075608, + "loss": 0.7502, + "step": 5985 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014676878576652823, + "loss": 0.6953, + "step": 5986 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014675270697803053, + "loss": 0.6634, + "step": 5987 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014673662664259975, + "loss": 0.7359, + "step": 5988 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014672054476076783, + "loss": 0.8505, + "step": 5989 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014670446133306707, + "loss": 0.7246, + "step": 5990 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014668837636002945, + "loss": 0.75, + "step": 5991 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001466722898421873, + "loss": 0.7024, + "step": 5992 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014665620178007277, + "loss": 0.6771, + "step": 5993 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014664011217421828, + "loss": 0.7571, + "step": 5994 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014662402102515618, + "loss": 0.7231, + "step": 5995 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014660792833341883, + "loss": 0.6522, + "step": 5996 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014659183409953877, + "loss": 0.6647, + "step": 5997 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014657573832404846, + "loss": 0.7596, + "step": 5998 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014655964100748048, + "loss": 0.7084, + "step": 5999 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001465435421503675, + "loss": 0.7394, + "step": 6000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014652744175324217, + "loss": 0.6953, + "step": 6001 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014651133981663714, + "loss": 0.7161, + "step": 6002 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014649523634108525, + "loss": 0.724, + "step": 6003 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014647913132711935, + "loss": 0.7388, + "step": 6004 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014646302477527227, + "loss": 0.7458, + "step": 6005 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014644691668607695, + "loss": 0.7913, + "step": 6006 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014643080706006637, + "loss": 0.7001, + "step": 6007 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014641469589777355, + "loss": 0.7269, + "step": 6008 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001463985831997316, + "loss": 0.7354, + "step": 6009 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001463824689664736, + "loss": 0.6775, + "step": 6010 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014636635319853275, + "loss": 0.6977, + "step": 6011 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014635023589644228, + "loss": 0.7233, + "step": 6012 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001463341170607355, + "loss": 0.6738, + "step": 6013 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001463179966919457, + "loss": 0.8296, + "step": 6014 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014630187479060632, + "loss": 0.6844, + "step": 6015 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014628575135725074, + "loss": 0.6772, + "step": 6016 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014626962639241247, + "loss": 0.7277, + "step": 6017 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014625349989662501, + "loss": 0.7232, + "step": 6018 + }, + { + "epoch": 1.39, + "learning_rate": 0.000146237371870422, + "loss": 0.7476, + "step": 6019 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014622124231433706, + "loss": 0.6425, + "step": 6020 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014620511122890388, + "loss": 0.6204, + "step": 6021 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014618897861465617, + "loss": 0.6774, + "step": 6022 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014617284447212775, + "loss": 0.7085, + "step": 6023 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014615670880185243, + "loss": 0.6717, + "step": 6024 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014614057160436414, + "loss": 0.7391, + "step": 6025 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001461244328801968, + "loss": 0.7701, + "step": 6026 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001461082926298844, + "loss": 0.714, + "step": 6027 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014609215085396095, + "loss": 0.7074, + "step": 6028 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014607600755296062, + "loss": 0.7072, + "step": 6029 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014605986272741748, + "loss": 0.7122, + "step": 6030 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014604371637786577, + "loss": 0.7186, + "step": 6031 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014602756850483968, + "loss": 0.7003, + "step": 6032 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014601141910887354, + "loss": 0.7549, + "step": 6033 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001459952681905017, + "loss": 0.7385, + "step": 6034 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014597911575025855, + "loss": 0.7601, + "step": 6035 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014596296178867854, + "loss": 0.7899, + "step": 6036 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014594680630629613, + "loss": 0.6542, + "step": 6037 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014593064930364593, + "loss": 0.7587, + "step": 6038 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014591449078126244, + "loss": 0.7573, + "step": 6039 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001458983307396804, + "loss": 0.6571, + "step": 6040 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014588216917943447, + "loss": 0.6958, + "step": 6041 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014586600610105938, + "loss": 0.8214, + "step": 6042 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014584984150508998, + "loss": 0.663, + "step": 6043 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014583367539206102, + "loss": 0.6907, + "step": 6044 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014581750776250749, + "loss": 0.6696, + "step": 6045 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001458013386169643, + "loss": 0.7819, + "step": 6046 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014578516795596644, + "loss": 0.7336, + "step": 6047 + }, + { + "epoch": 1.4, + "learning_rate": 0.000145768995780049, + "loss": 0.7442, + "step": 6048 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014575282208974702, + "loss": 0.6542, + "step": 6049 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014573664688559567, + "loss": 0.7158, + "step": 6050 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014572047016813019, + "loss": 0.6901, + "step": 6051 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014570429193788576, + "loss": 0.775, + "step": 6052 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001456881121953977, + "loss": 0.7249, + "step": 6053 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014567193094120143, + "loss": 0.66, + "step": 6054 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014565574817583223, + "loss": 0.6439, + "step": 6055 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001456395638998256, + "loss": 0.72, + "step": 6056 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014562337811371707, + "loss": 0.6665, + "step": 6057 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014560719081804216, + "loss": 0.7122, + "step": 6058 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014559100201333647, + "loss": 0.6459, + "step": 6059 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014557481170013564, + "loss": 0.6592, + "step": 6060 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014555861987897536, + "loss": 0.69, + "step": 6061 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014554242655039139, + "loss": 0.7288, + "step": 6062 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014552623171491954, + "loss": 0.8172, + "step": 6063 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014551003537309568, + "loss": 0.6955, + "step": 6064 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014549383752545566, + "loss": 0.7604, + "step": 6065 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014547763817253544, + "loss": 0.6631, + "step": 6066 + }, + { + "epoch": 1.4, + "learning_rate": 0.000145461437314871, + "loss": 0.7376, + "step": 6067 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014544523495299842, + "loss": 0.7163, + "step": 6068 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014542903108745377, + "loss": 0.7116, + "step": 6069 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014541282571877326, + "loss": 0.7457, + "step": 6070 + }, + { + "epoch": 1.41, + "learning_rate": 0.000145396618847493, + "loss": 0.7392, + "step": 6071 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014538041047414927, + "loss": 0.6931, + "step": 6072 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014536420059927838, + "loss": 0.66, + "step": 6073 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014534798922341666, + "loss": 0.7095, + "step": 6074 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014533177634710052, + "loss": 0.7376, + "step": 6075 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014531556197086638, + "loss": 0.767, + "step": 6076 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014529934609525076, + "loss": 0.7177, + "step": 6077 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014528312872079017, + "loss": 0.7036, + "step": 6078 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001452669098480212, + "loss": 0.6955, + "step": 6079 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014525068947748057, + "loss": 0.6893, + "step": 6080 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014523446760970492, + "loss": 0.7379, + "step": 6081 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014521824424523096, + "loss": 0.7407, + "step": 6082 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014520201938459553, + "loss": 0.7072, + "step": 6083 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014518579302833542, + "loss": 0.6929, + "step": 6084 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001451695651769876, + "loss": 0.6782, + "step": 6085 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014515333583108896, + "loss": 0.7194, + "step": 6086 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014513710499117647, + "loss": 0.7415, + "step": 6087 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001451208726577872, + "loss": 0.7028, + "step": 6088 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014510463883145826, + "loss": 0.6425, + "step": 6089 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001450884035127267, + "loss": 0.7448, + "step": 6090 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014507216670212985, + "loss": 0.7297, + "step": 6091 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014505592840020478, + "loss": 0.6732, + "step": 6092 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014503968860748892, + "loss": 0.7028, + "step": 6093 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001450234473245195, + "loss": 0.6965, + "step": 6094 + }, + { + "epoch": 1.41, + "learning_rate": 0.000145007204551834, + "loss": 0.7266, + "step": 6095 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014499096028996978, + "loss": 0.7451, + "step": 6096 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014497471453946432, + "loss": 0.6882, + "step": 6097 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014495846730085517, + "loss": 0.6677, + "step": 6098 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014494221857467996, + "loss": 0.6013, + "step": 6099 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014492596836147628, + "loss": 0.6797, + "step": 6100 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014490971666178176, + "loss": 0.6986, + "step": 6101 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014489346347613423, + "loss": 0.6555, + "step": 6102 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001448772088050714, + "loss": 0.7071, + "step": 6103 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014486095264913112, + "loss": 0.6817, + "step": 6104 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014484469500885126, + "loss": 0.7136, + "step": 6105 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014482843588476974, + "loss": 0.6932, + "step": 6106 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014481217527742457, + "loss": 0.6906, + "step": 6107 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001447959131873537, + "loss": 0.7784, + "step": 6108 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014477964961509527, + "loss": 0.7012, + "step": 6109 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014476338456118737, + "loss": 0.6845, + "step": 6110 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001447471180261682, + "loss": 0.7136, + "step": 6111 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014473085001057596, + "loss": 0.7504, + "step": 6112 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014471458051494893, + "loss": 0.6342, + "step": 6113 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001446983095398254, + "loss": 0.6795, + "step": 6114 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014468203708574375, + "loss": 0.661, + "step": 6115 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001446657631532424, + "loss": 0.7034, + "step": 6116 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014464948774285983, + "loss": 0.622, + "step": 6117 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014463321085513454, + "loss": 0.747, + "step": 6118 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014461693249060508, + "loss": 0.6757, + "step": 6119 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014460065264981008, + "loss": 0.7277, + "step": 6120 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001445843713332882, + "loss": 0.6845, + "step": 6121 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014456808854157815, + "loss": 0.6985, + "step": 6122 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014455180427521867, + "loss": 0.7303, + "step": 6123 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014453551853474856, + "loss": 0.7602, + "step": 6124 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001445192313207067, + "loss": 0.6491, + "step": 6125 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014450294263363203, + "loss": 0.7106, + "step": 6126 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001444866524740634, + "loss": 0.7445, + "step": 6127 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001444703608425399, + "loss": 0.7221, + "step": 6128 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014445406773960055, + "loss": 0.6625, + "step": 6129 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014443777316578446, + "loss": 0.7524, + "step": 6130 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014442147712163077, + "loss": 0.7426, + "step": 6131 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014440517960767868, + "loss": 0.7427, + "step": 6132 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001443888806244674, + "loss": 0.741, + "step": 6133 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014437258017253627, + "loss": 0.7162, + "step": 6134 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014435627825242463, + "loss": 0.7273, + "step": 6135 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014433997486467185, + "loss": 0.7017, + "step": 6136 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014432367000981734, + "loss": 0.646, + "step": 6137 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014430736368840067, + "loss": 0.7011, + "step": 6138 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014429105590096133, + "loss": 0.6885, + "step": 6139 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001442747466480389, + "loss": 0.72, + "step": 6140 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014425843593017298, + "loss": 0.6769, + "step": 6141 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014424212374790333, + "loss": 0.7561, + "step": 6142 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014422581010176962, + "loss": 0.6973, + "step": 6143 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014420949499231172, + "loss": 0.733, + "step": 6144 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001441931784200693, + "loss": 0.714, + "step": 6145 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014417686038558234, + "loss": 0.7211, + "step": 6146 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014416054088939076, + "loss": 0.7744, + "step": 6147 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014414421993203452, + "loss": 0.6788, + "step": 6148 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014412789751405363, + "loss": 0.6956, + "step": 6149 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014411157363598823, + "loss": 0.6502, + "step": 6150 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001440952482983783, + "loss": 0.6732, + "step": 6151 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001440789215017641, + "loss": 0.7097, + "step": 6152 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014406259324668583, + "loss": 0.6668, + "step": 6153 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014404626353368375, + "loss": 0.7255, + "step": 6154 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001440299323632982, + "loss": 0.6592, + "step": 6155 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014401359973606947, + "loss": 0.6359, + "step": 6156 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014399726565253798, + "loss": 0.6552, + "step": 6157 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014398093011324425, + "loss": 0.7131, + "step": 6158 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014396459311872875, + "loss": 0.6965, + "step": 6159 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014394825466953202, + "loss": 0.7241, + "step": 6160 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014393191476619465, + "loss": 0.7624, + "step": 6161 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001439155734092573, + "loss": 0.6798, + "step": 6162 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014389923059926062, + "loss": 0.7129, + "step": 6163 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014388288633674544, + "loss": 0.7064, + "step": 6164 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014386654062225257, + "loss": 0.7995, + "step": 6165 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014385019345632274, + "loss": 0.7205, + "step": 6166 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014383384483949687, + "loss": 0.7173, + "step": 6167 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014381749477231591, + "loss": 0.6924, + "step": 6168 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014380114325532087, + "loss": 0.7491, + "step": 6169 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014378479028905278, + "loss": 0.7266, + "step": 6170 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001437684358740527, + "loss": 0.7027, + "step": 6171 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014375208001086173, + "loss": 0.7044, + "step": 6172 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001437357227000211, + "loss": 0.7762, + "step": 6173 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014371936394207198, + "loss": 0.7119, + "step": 6174 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014370300373755566, + "loss": 0.6717, + "step": 6175 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014368664208701352, + "loss": 0.6833, + "step": 6176 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014367027899098687, + "loss": 0.6932, + "step": 6177 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001436539144500171, + "loss": 0.7082, + "step": 6178 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001436375484646457, + "loss": 0.7279, + "step": 6179 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014362118103541417, + "loss": 0.6943, + "step": 6180 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014360481216286414, + "loss": 0.7114, + "step": 6181 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014358844184753712, + "loss": 0.6439, + "step": 6182 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014357207008997483, + "loss": 0.7425, + "step": 6183 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001435556968907189, + "loss": 0.7103, + "step": 6184 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014353932225031113, + "loss": 0.7295, + "step": 6185 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014352294616929334, + "loss": 0.6648, + "step": 6186 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014350656864820733, + "loss": 0.7833, + "step": 6187 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014349018968759503, + "loss": 0.6516, + "step": 6188 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014347380928799832, + "loss": 0.6209, + "step": 6189 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014345742744995924, + "loss": 0.6938, + "step": 6190 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014344104417401979, + "loss": 0.6892, + "step": 6191 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001434246594607221, + "loss": 0.6993, + "step": 6192 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014340827331060826, + "loss": 0.7302, + "step": 6193 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014339188572422046, + "loss": 0.7229, + "step": 6194 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014337549670210092, + "loss": 0.7281, + "step": 6195 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014335910624479193, + "loss": 0.6999, + "step": 6196 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001433427143528358, + "loss": 0.6595, + "step": 6197 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014332632102677487, + "loss": 0.7443, + "step": 6198 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001433099262671516, + "loss": 0.7256, + "step": 6199 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014329353007450844, + "loss": 0.6759, + "step": 6200 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001432771324493879, + "loss": 0.7491, + "step": 6201 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001432607333923325, + "loss": 0.6639, + "step": 6202 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001432443329038849, + "loss": 0.6831, + "step": 6203 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014322793098458774, + "loss": 0.6955, + "step": 6204 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001432115276349837, + "loss": 0.724, + "step": 6205 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001431951228556155, + "loss": 0.6787, + "step": 6206 + }, + { + "epoch": 1.44, + "learning_rate": 0.000143178716647026, + "loss": 0.7161, + "step": 6207 + }, + { + "epoch": 1.44, + "learning_rate": 0.000143162309009758, + "loss": 0.781, + "step": 6208 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001431458999443544, + "loss": 0.7076, + "step": 6209 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014312948945135814, + "loss": 0.6992, + "step": 6210 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001431130775313122, + "loss": 0.7121, + "step": 6211 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014309666418475963, + "loss": 0.6688, + "step": 6212 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014308024941224345, + "loss": 0.6799, + "step": 6213 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014306383321430682, + "loss": 0.6228, + "step": 6214 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014304741559149293, + "loss": 0.702, + "step": 6215 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014303099654434497, + "loss": 0.7666, + "step": 6216 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014301457607340625, + "loss": 0.7167, + "step": 6217 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014299815417922, + "loss": 0.6634, + "step": 6218 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001429817308623297, + "loss": 0.715, + "step": 6219 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014296530612327863, + "loss": 0.7199, + "step": 6220 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014294887996261032, + "loss": 0.6805, + "step": 6221 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014293245238086827, + "loss": 0.7034, + "step": 6222 + }, + { + "epoch": 1.44, + "learning_rate": 0.000142916023378596, + "loss": 0.6886, + "step": 6223 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014289959295633712, + "loss": 0.7444, + "step": 6224 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014288316111463529, + "loss": 0.6556, + "step": 6225 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001428667278540342, + "loss": 0.7657, + "step": 6226 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014285029317507753, + "loss": 0.6993, + "step": 6227 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014283385707830913, + "loss": 0.6941, + "step": 6228 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014281741956427278, + "loss": 0.7725, + "step": 6229 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014280098063351238, + "loss": 0.757, + "step": 6230 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014278454028657188, + "loss": 0.6956, + "step": 6231 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001427680985239952, + "loss": 0.6555, + "step": 6232 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014275165534632638, + "loss": 0.6513, + "step": 6233 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014273521075410948, + "loss": 0.7014, + "step": 6234 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014271876474788864, + "loss": 0.6531, + "step": 6235 + }, + { + "epoch": 1.44, + "learning_rate": 0.000142702317328208, + "loss": 0.6644, + "step": 6236 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014268586849561175, + "loss": 0.6019, + "step": 6237 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014266941825064416, + "loss": 0.7851, + "step": 6238 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014265296659384956, + "loss": 0.7101, + "step": 6239 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014263651352577222, + "loss": 0.6996, + "step": 6240 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014262005904695657, + "loss": 0.7245, + "step": 6241 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014260360315794705, + "loss": 0.7087, + "step": 6242 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014258714585928814, + "loss": 0.6688, + "step": 6243 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001425706871515244, + "loss": 0.767, + "step": 6244 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014255422703520035, + "loss": 0.6656, + "step": 6245 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014253776551086068, + "loss": 0.7173, + "step": 6246 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014252130257905, + "loss": 0.7227, + "step": 6247 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014250483824031307, + "loss": 0.7543, + "step": 6248 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014248837249519468, + "loss": 0.6794, + "step": 6249 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014247190534423954, + "loss": 0.7207, + "step": 6250 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014245543678799265, + "loss": 0.7171, + "step": 6251 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014243896682699877, + "loss": 0.6406, + "step": 6252 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014242249546180297, + "loss": 0.6467, + "step": 6253 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014240602269295017, + "loss": 0.6746, + "step": 6254 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014238954852098545, + "loss": 0.7975, + "step": 6255 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001423730729464539, + "loss": 0.6205, + "step": 6256 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014235659596990063, + "loss": 0.7418, + "step": 6257 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014234011759187083, + "loss": 0.7358, + "step": 6258 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014232363781290975, + "loss": 0.6773, + "step": 6259 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014230715663356264, + "loss": 0.6946, + "step": 6260 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001422906740543749, + "loss": 0.6958, + "step": 6261 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014227419007589175, + "loss": 0.7888, + "step": 6262 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014225770469865874, + "loss": 0.6772, + "step": 6263 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014224121792322125, + "loss": 0.65, + "step": 6264 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014222472975012482, + "loss": 0.7172, + "step": 6265 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014220824017991505, + "loss": 0.6948, + "step": 6266 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014219174921313745, + "loss": 0.7707, + "step": 6267 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001421752568503377, + "loss": 0.7444, + "step": 6268 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014215876309206152, + "loss": 0.691, + "step": 6269 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001421422679388546, + "loss": 0.6969, + "step": 6270 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001421257713912628, + "loss": 0.7209, + "step": 6271 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001421092734498319, + "loss": 0.6472, + "step": 6272 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014209277411510773, + "loss": 0.703, + "step": 6273 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001420762733876363, + "loss": 0.6845, + "step": 6274 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001420597712679635, + "loss": 0.7607, + "step": 6275 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014204326775663542, + "loss": 0.7943, + "step": 6276 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014202676285419812, + "loss": 0.6493, + "step": 6277 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001420102565611976, + "loss": 0.6645, + "step": 6278 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014199374887818012, + "loss": 0.6692, + "step": 6279 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014197723980569185, + "loss": 0.734, + "step": 6280 + }, + { + "epoch": 1.45, + "learning_rate": 0.000141960729344279, + "loss": 0.6963, + "step": 6281 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014194421749448796, + "loss": 0.6874, + "step": 6282 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014192770425686498, + "loss": 0.666, + "step": 6283 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014191118963195642, + "loss": 0.7274, + "step": 6284 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014189467362030875, + "loss": 0.6926, + "step": 6285 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014187815622246846, + "loss": 0.6823, + "step": 6286 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014186163743898207, + "loss": 0.6356, + "step": 6287 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014184511727039612, + "loss": 0.7102, + "step": 6288 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014182859571725725, + "loss": 0.6904, + "step": 6289 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014181207278011207, + "loss": 0.6699, + "step": 6290 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001417955484595073, + "loss": 0.7055, + "step": 6291 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014177902275598975, + "loss": 0.7343, + "step": 6292 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014176249567010614, + "loss": 0.7291, + "step": 6293 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014174596720240335, + "loss": 0.6885, + "step": 6294 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014172943735342826, + "loss": 0.7321, + "step": 6295 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001417129061237278, + "loss": 0.7892, + "step": 6296 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014169637351384897, + "loss": 0.7261, + "step": 6297 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014167983952433874, + "loss": 0.6894, + "step": 6298 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014166330415574423, + "loss": 0.757, + "step": 6299 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001416467674086125, + "loss": 0.6634, + "step": 6300 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014163022928349078, + "loss": 0.659, + "step": 6301 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014161368978092624, + "loss": 0.7438, + "step": 6302 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014159714890146614, + "loss": 0.6361, + "step": 6303 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014158060664565776, + "loss": 0.7331, + "step": 6304 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014156406301404846, + "loss": 0.7088, + "step": 6305 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014154751800718563, + "loss": 0.6894, + "step": 6306 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014153097162561667, + "loss": 0.738, + "step": 6307 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001415144238698891, + "loss": 0.7268, + "step": 6308 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014149787474055045, + "loss": 0.6966, + "step": 6309 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014148132423814826, + "loss": 0.7005, + "step": 6310 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014146477236323015, + "loss": 0.712, + "step": 6311 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001414482191163438, + "loss": 0.7486, + "step": 6312 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001414316644980369, + "loss": 0.7154, + "step": 6313 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014141510850885721, + "loss": 0.6991, + "step": 6314 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014139855114935252, + "loss": 0.6349, + "step": 6315 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014138199242007068, + "loss": 0.6589, + "step": 6316 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014136543232155959, + "loss": 0.7141, + "step": 6317 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014134887085436715, + "loss": 0.7827, + "step": 6318 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014133230801904135, + "loss": 0.7045, + "step": 6319 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014131574381613026, + "loss": 0.6837, + "step": 6320 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001412991782461819, + "loss": 0.6418, + "step": 6321 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014128261130974437, + "loss": 0.7203, + "step": 6322 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014126604300736584, + "loss": 0.6475, + "step": 6323 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014124947333959457, + "loss": 0.6641, + "step": 6324 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014123290230697876, + "loss": 0.7823, + "step": 6325 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001412163299100667, + "loss": 0.7543, + "step": 6326 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014119975614940673, + "loss": 0.6959, + "step": 6327 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014118318102554727, + "loss": 0.7386, + "step": 6328 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014116660453903677, + "loss": 0.6719, + "step": 6329 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014115002669042362, + "loss": 0.663, + "step": 6330 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001411334474802564, + "loss": 0.7249, + "step": 6331 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014111686690908366, + "loss": 0.7121, + "step": 6332 + }, + { + "epoch": 1.47, + "learning_rate": 0.000141100284977454, + "loss": 0.6275, + "step": 6333 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001410837016859161, + "loss": 0.6827, + "step": 6334 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014106711703501865, + "loss": 0.6529, + "step": 6335 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014105053102531038, + "loss": 0.625, + "step": 6336 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001410339436573401, + "loss": 0.7342, + "step": 6337 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014101735493165667, + "loss": 0.7584, + "step": 6338 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014100076484880892, + "loss": 0.6781, + "step": 6339 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014098417340934578, + "loss": 0.687, + "step": 6340 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014096758061381628, + "loss": 0.633, + "step": 6341 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014095098646276934, + "loss": 0.6642, + "step": 6342 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001409343909567541, + "loss": 0.7228, + "step": 6343 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014091779409631963, + "loss": 0.6827, + "step": 6344 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001409011958820151, + "loss": 0.7583, + "step": 6345 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014088459631438971, + "loss": 0.6696, + "step": 6346 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014086799539399266, + "loss": 0.6697, + "step": 6347 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001408513931213732, + "loss": 0.7302, + "step": 6348 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014083478949708078, + "loss": 0.6334, + "step": 6349 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014081818452166468, + "loss": 0.7318, + "step": 6350 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014080157819567433, + "loss": 0.6693, + "step": 6351 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014078497051965927, + "loss": 0.6558, + "step": 6352 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014076836149416887, + "loss": 0.7387, + "step": 6353 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014075175111975282, + "loss": 0.7243, + "step": 6354 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014073513939696062, + "loss": 0.701, + "step": 6355 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014071852632634196, + "loss": 0.6341, + "step": 6356 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001407019119084465, + "loss": 0.733, + "step": 6357 + }, + { + "epoch": 1.47, + "learning_rate": 0.000140685296143824, + "loss": 0.7024, + "step": 6358 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001406686790330242, + "loss": 0.7386, + "step": 6359 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014065206057659691, + "loss": 0.7632, + "step": 6360 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014063544077509206, + "loss": 0.6716, + "step": 6361 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014061881962905954, + "loss": 0.6655, + "step": 6362 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014060219713904926, + "loss": 0.7099, + "step": 6363 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014058557330561122, + "loss": 0.7505, + "step": 6364 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001405689481292955, + "loss": 0.6202, + "step": 6365 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014055232161065215, + "loss": 0.7058, + "step": 6366 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014053569375023134, + "loss": 0.6213, + "step": 6367 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014051906454858325, + "loss": 0.7275, + "step": 6368 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014050243400625805, + "loss": 0.7527, + "step": 6369 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014048580212380602, + "loss": 0.6691, + "step": 6370 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001404691689017775, + "loss": 0.7226, + "step": 6371 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001404525343407228, + "loss": 0.7103, + "step": 6372 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014043589844119235, + "loss": 0.7006, + "step": 6373 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001404192612037366, + "loss": 0.6689, + "step": 6374 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014040262262890598, + "loss": 0.7079, + "step": 6375 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014038598271725103, + "loss": 0.7385, + "step": 6376 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014036934146932238, + "loss": 0.7059, + "step": 6377 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014035269888567066, + "loss": 0.7798, + "step": 6378 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014033605496684644, + "loss": 0.691, + "step": 6379 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001403194097134005, + "loss": 0.7505, + "step": 6380 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014030276312588358, + "loss": 0.7274, + "step": 6381 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014028611520484641, + "loss": 0.6194, + "step": 6382 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014026946595083995, + "loss": 0.7521, + "step": 6383 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014025281536441498, + "loss": 0.7444, + "step": 6384 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014023616344612246, + "loss": 0.6965, + "step": 6385 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014021951019651334, + "loss": 0.6863, + "step": 6386 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014020285561613867, + "loss": 0.7191, + "step": 6387 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001401861997055495, + "loss": 0.699, + "step": 6388 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014016954246529696, + "loss": 0.743, + "step": 6389 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014015288389593216, + "loss": 0.7211, + "step": 6390 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014013622399800627, + "loss": 0.748, + "step": 6391 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014011956277207058, + "loss": 0.7482, + "step": 6392 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014010290021867634, + "loss": 0.6866, + "step": 6393 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001400862363383749, + "loss": 0.753, + "step": 6394 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001400695711317176, + "loss": 0.6556, + "step": 6395 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014005290459925585, + "loss": 0.6926, + "step": 6396 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001400362367415411, + "loss": 0.7577, + "step": 6397 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014001956755912488, + "loss": 0.6828, + "step": 6398 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001400028970525587, + "loss": 0.7035, + "step": 6399 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013998622522239416, + "loss": 0.724, + "step": 6400 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001399695520691829, + "loss": 0.6696, + "step": 6401 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013995287759347659, + "loss": 0.6745, + "step": 6402 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001399362017958269, + "loss": 0.6712, + "step": 6403 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013991952467678567, + "loss": 0.7772, + "step": 6404 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013990284623690465, + "loss": 0.7559, + "step": 6405 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013988616647673573, + "loss": 0.6934, + "step": 6406 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001398694853968308, + "loss": 0.7459, + "step": 6407 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013985280299774172, + "loss": 0.7313, + "step": 6408 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013983611928002057, + "loss": 0.6485, + "step": 6409 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013981943424421932, + "loss": 0.7169, + "step": 6410 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013980274789089, + "loss": 0.746, + "step": 6411 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013978606022058482, + "loss": 0.6676, + "step": 6412 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013976937123385587, + "loss": 0.6763, + "step": 6413 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013975268093125538, + "loss": 0.6971, + "step": 6414 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013973598931333553, + "loss": 0.6646, + "step": 6415 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013971929638064866, + "loss": 0.7225, + "step": 6416 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013970260213374712, + "loss": 0.6756, + "step": 6417 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001396859065731832, + "loss": 0.5922, + "step": 6418 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013966920969950942, + "loss": 0.6852, + "step": 6419 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013965251151327812, + "loss": 0.7009, + "step": 6420 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001396358120150419, + "loss": 0.6908, + "step": 6421 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013961911120535325, + "loss": 0.7019, + "step": 6422 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001396024090847648, + "loss": 0.7245, + "step": 6423 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013958570565382914, + "loss": 0.7202, + "step": 6424 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013956900091309895, + "loss": 0.7273, + "step": 6425 + }, + { + "epoch": 1.49, + "learning_rate": 0.000139552294863127, + "loss": 0.7035, + "step": 6426 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013953558750446601, + "loss": 0.7346, + "step": 6427 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013951887883766878, + "loss": 0.6666, + "step": 6428 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001395021688632882, + "loss": 0.7129, + "step": 6429 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001394854575818771, + "loss": 0.6569, + "step": 6430 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001394687449939885, + "loss": 0.636, + "step": 6431 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013945203110017526, + "loss": 0.7719, + "step": 6432 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013943531590099052, + "loss": 0.7074, + "step": 6433 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001394185993969873, + "loss": 0.7271, + "step": 6434 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013940188158871868, + "loss": 0.7257, + "step": 6435 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013938516247673786, + "loss": 0.6833, + "step": 6436 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013936844206159798, + "loss": 0.7109, + "step": 6437 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013935172034385234, + "loss": 0.6897, + "step": 6438 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013933499732405417, + "loss": 0.7138, + "step": 6439 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013931827300275683, + "loss": 0.6897, + "step": 6440 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013930154738051365, + "loss": 0.749, + "step": 6441 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001392848204578781, + "loss": 0.711, + "step": 6442 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013926809223540357, + "loss": 0.6961, + "step": 6443 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013925136271364358, + "loss": 0.707, + "step": 6444 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013923463189315167, + "loss": 0.6495, + "step": 6445 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013921789977448142, + "loss": 0.7342, + "step": 6446 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001392011663581865, + "loss": 0.7173, + "step": 6447 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013918443164482046, + "loss": 0.6757, + "step": 6448 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001391676956349371, + "loss": 0.7175, + "step": 6449 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001391509583290902, + "loss": 0.7372, + "step": 6450 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001391342197278335, + "loss": 0.7303, + "step": 6451 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013911747983172084, + "loss": 0.6366, + "step": 6452 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013910073864130613, + "loss": 0.6337, + "step": 6453 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001390839961571433, + "loss": 0.7604, + "step": 6454 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013906725237978625, + "loss": 0.6695, + "step": 6455 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013905050730978906, + "loss": 0.787, + "step": 6456 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001390337609477058, + "loss": 0.6449, + "step": 6457 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001390170132940905, + "loss": 0.6771, + "step": 6458 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013900026434949736, + "loss": 0.7235, + "step": 6459 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001389835141144805, + "loss": 0.724, + "step": 6460 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013896676258959414, + "loss": 0.6564, + "step": 6461 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013895000977539267, + "loss": 0.6984, + "step": 6462 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001389332556724303, + "loss": 0.6874, + "step": 6463 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013891650028126135, + "loss": 0.7356, + "step": 6464 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013889974360244028, + "loss": 0.683, + "step": 6465 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001388829856365215, + "loss": 0.7072, + "step": 6466 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013886622638405952, + "loss": 0.727, + "step": 6467 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013884946584560883, + "loss": 0.676, + "step": 6468 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013883270402172403, + "loss": 0.6361, + "step": 6469 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001388159409129597, + "loss": 0.6659, + "step": 6470 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013879917651987047, + "loss": 0.7259, + "step": 6471 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013878241084301103, + "loss": 0.6515, + "step": 6472 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001387656438829362, + "loss": 0.6687, + "step": 6473 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001387488756402007, + "loss": 0.6862, + "step": 6474 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013873210611535934, + "loss": 0.6795, + "step": 6475 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013871533530896698, + "loss": 0.6883, + "step": 6476 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013869856322157854, + "loss": 0.7458, + "step": 6477 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013868178985374898, + "loss": 0.7555, + "step": 6478 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001386650152060333, + "loss": 0.6893, + "step": 6479 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013864823927898647, + "loss": 0.7643, + "step": 6480 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013863146207316365, + "loss": 0.6994, + "step": 6481 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013861468358911986, + "loss": 0.7519, + "step": 6482 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001385979038274103, + "loss": 0.722, + "step": 6483 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001385811227885902, + "loss": 0.6838, + "step": 6484 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013856434047321482, + "loss": 0.72, + "step": 6485 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001385475568818394, + "loss": 0.7385, + "step": 6486 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013853077201501929, + "loss": 0.6936, + "step": 6487 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013851398587330982, + "loss": 0.6713, + "step": 6488 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013849719845726642, + "loss": 0.7097, + "step": 6489 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013848040976744457, + "loss": 0.7076, + "step": 6490 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013846361980439977, + "loss": 0.7383, + "step": 6491 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001384468285686875, + "loss": 0.6443, + "step": 6492 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013843003606086343, + "loss": 0.6773, + "step": 6493 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001384132422814831, + "loss": 0.703, + "step": 6494 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013839644723110224, + "loss": 0.7536, + "step": 6495 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013837965091027655, + "loss": 0.7157, + "step": 6496 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001383628533195617, + "loss": 0.7132, + "step": 6497 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013834605445951355, + "loss": 0.7117, + "step": 6498 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013832925433068796, + "loss": 0.6955, + "step": 6499 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013831245293364073, + "loss": 0.7903, + "step": 6500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013829565026892785, + "loss": 0.7171, + "step": 6501 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013827884633710525, + "loss": 0.674, + "step": 6502 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001382620411387289, + "loss": 0.7209, + "step": 6503 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013824523467435487, + "loss": 0.7512, + "step": 6504 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013822842694453924, + "loss": 0.7433, + "step": 6505 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013821161794983815, + "loss": 0.668, + "step": 6506 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013819480769080778, + "loss": 0.6601, + "step": 6507 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013817799616800428, + "loss": 0.7044, + "step": 6508 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013816118338198395, + "loss": 0.7104, + "step": 6509 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001381443693333031, + "loss": 0.7318, + "step": 6510 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013812755402251804, + "loss": 0.7035, + "step": 6511 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013811073745018514, + "loss": 0.7458, + "step": 6512 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013809391961686082, + "loss": 0.6667, + "step": 6513 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013807710052310155, + "loss": 0.6898, + "step": 6514 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013806028016946384, + "loss": 0.6943, + "step": 6515 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013804345855650423, + "loss": 0.7073, + "step": 6516 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013802663568477928, + "loss": 0.6951, + "step": 6517 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001380098115548457, + "loss": 0.7086, + "step": 6518 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013799298616726005, + "loss": 0.6936, + "step": 6519 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013797615952257911, + "loss": 0.6563, + "step": 6520 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001379593316213596, + "loss": 0.7246, + "step": 6521 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013794250246415836, + "loss": 0.6885, + "step": 6522 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013792567205153216, + "loss": 0.7131, + "step": 6523 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013790884038403795, + "loss": 0.7243, + "step": 6524 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013789200746223257, + "loss": 0.7704, + "step": 6525 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013787517328667305, + "loss": 0.6999, + "step": 6526 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013785833785791635, + "loss": 0.7372, + "step": 6527 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013784150117651954, + "loss": 0.6791, + "step": 6528 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013782466324303969, + "loss": 0.7332, + "step": 6529 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013780782405803393, + "loss": 0.7584, + "step": 6530 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001377909836220594, + "loss": 0.6994, + "step": 6531 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013777414193567338, + "loss": 0.731, + "step": 6532 + }, + { + "epoch": 1.51, + "learning_rate": 0.000137757298999433, + "loss": 0.6975, + "step": 6533 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013774045481389568, + "loss": 0.7852, + "step": 6534 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001377236093796187, + "loss": 0.7259, + "step": 6535 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013770676269715942, + "loss": 0.7221, + "step": 6536 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013768991476707527, + "loss": 0.7406, + "step": 6537 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013767306558992373, + "loss": 0.7038, + "step": 6538 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013765621516626225, + "loss": 0.664, + "step": 6539 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013763936349664836, + "loss": 0.6767, + "step": 6540 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013762251058163972, + "loss": 0.7362, + "step": 6541 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001376056564217939, + "loss": 0.6862, + "step": 6542 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001375888010176686, + "loss": 0.6897, + "step": 6543 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013757194436982143, + "loss": 0.73, + "step": 6544 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013755508647881023, + "loss": 0.6905, + "step": 6545 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013753822734519276, + "loss": 0.7361, + "step": 6546 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013752136696952683, + "loss": 0.7105, + "step": 6547 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013750450535237037, + "loss": 0.7245, + "step": 6548 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001374876424942812, + "loss": 0.6723, + "step": 6549 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013747077839581728, + "loss": 0.6615, + "step": 6550 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013745391305753667, + "loss": 0.7073, + "step": 6551 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013743704647999736, + "loss": 0.6378, + "step": 6552 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013742017866375745, + "loss": 0.715, + "step": 6553 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013740330960937503, + "loss": 0.6374, + "step": 6554 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013738643931740827, + "loss": 0.6596, + "step": 6555 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013736956778841533, + "loss": 0.6785, + "step": 6556 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013735269502295448, + "loss": 0.6934, + "step": 6557 + }, + { + "epoch": 1.52, + "learning_rate": 0.000137335821021584, + "loss": 0.7343, + "step": 6558 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013731894578486225, + "loss": 0.6768, + "step": 6559 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001373020693133475, + "loss": 0.7113, + "step": 6560 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013728519160759818, + "loss": 0.6876, + "step": 6561 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013726831266817278, + "loss": 0.7046, + "step": 6562 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013725143249562974, + "loss": 0.6961, + "step": 6563 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013723455109052762, + "loss": 0.7006, + "step": 6564 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013721766845342493, + "loss": 0.66, + "step": 6565 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013720078458488036, + "loss": 0.6749, + "step": 6566 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001371838994854524, + "loss": 0.6167, + "step": 6567 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013716701315569991, + "loss": 0.6532, + "step": 6568 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013715012559618154, + "loss": 0.7328, + "step": 6569 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013713323680745608, + "loss": 0.7619, + "step": 6570 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001371163467900823, + "loss": 0.7625, + "step": 6571 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013709945554461907, + "loss": 0.7354, + "step": 6572 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013708256307162525, + "loss": 0.7318, + "step": 6573 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013706566937165984, + "loss": 0.7909, + "step": 6574 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013704877444528182, + "loss": 0.7229, + "step": 6575 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001370318782930501, + "loss": 0.7366, + "step": 6576 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001370149809155238, + "loss": 0.7357, + "step": 6577 + }, + { + "epoch": 1.52, + "learning_rate": 0.000136998082313262, + "loss": 0.6983, + "step": 6578 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013698118248682382, + "loss": 0.6655, + "step": 6579 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001369642814367685, + "loss": 0.7358, + "step": 6580 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013694737916365517, + "loss": 0.6901, + "step": 6581 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001369304756680431, + "loss": 0.6684, + "step": 6582 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013691357095049165, + "loss": 0.6977, + "step": 6583 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013689666501156005, + "loss": 0.675, + "step": 6584 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013687975785180777, + "loss": 0.8054, + "step": 6585 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013686284947179425, + "loss": 0.6745, + "step": 6586 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013684593987207881, + "loss": 0.7315, + "step": 6587 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013682902905322105, + "loss": 0.7315, + "step": 6588 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013681211701578052, + "loss": 0.7207, + "step": 6589 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013679520376031672, + "loss": 0.6735, + "step": 6590 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013677828928738934, + "loss": 0.6796, + "step": 6591 + }, + { + "epoch": 1.53, + "learning_rate": 0.000136761373597558, + "loss": 0.7345, + "step": 6592 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013674445669138241, + "loss": 0.7423, + "step": 6593 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001367275385694223, + "loss": 0.7169, + "step": 6594 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013671061923223746, + "loss": 0.6732, + "step": 6595 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013669369868038773, + "loss": 0.8212, + "step": 6596 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013667677691443295, + "loss": 0.6889, + "step": 6597 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013665985393493299, + "loss": 0.6718, + "step": 6598 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013664292974244782, + "loss": 0.6635, + "step": 6599 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013662600433753745, + "loss": 0.691, + "step": 6600 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013660907772076187, + "loss": 0.7796, + "step": 6601 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001365921498926811, + "loss": 0.688, + "step": 6602 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001365752208538553, + "loss": 0.6879, + "step": 6603 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001365582906048446, + "loss": 0.7587, + "step": 6604 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013654135914620917, + "loss": 0.7267, + "step": 6605 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001365244264785092, + "loss": 0.6962, + "step": 6606 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013650749260230502, + "loss": 0.6695, + "step": 6607 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001364905575181569, + "loss": 0.769, + "step": 6608 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013647362122662515, + "loss": 0.6652, + "step": 6609 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013645668372827015, + "loss": 0.6888, + "step": 6610 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001364397450236524, + "loss": 0.6454, + "step": 6611 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001364228051133323, + "loss": 0.6537, + "step": 6612 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001364058639978703, + "loss": 0.6512, + "step": 6613 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013638892167782703, + "loss": 0.7281, + "step": 6614 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013637197815376305, + "loss": 0.761, + "step": 6615 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013635503342623897, + "loss": 0.704, + "step": 6616 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013633808749581542, + "loss": 0.74, + "step": 6617 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013632114036305314, + "loss": 0.6907, + "step": 6618 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013630419202851284, + "loss": 0.6816, + "step": 6619 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013628724249275536, + "loss": 0.6542, + "step": 6620 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013627029175634144, + "loss": 0.695, + "step": 6621 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013625333981983194, + "loss": 0.7283, + "step": 6622 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013623638668378783, + "loss": 0.7441, + "step": 6623 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013621943234877, + "loss": 0.7719, + "step": 6624 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013620247681533942, + "loss": 0.7227, + "step": 6625 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013618552008405713, + "loss": 0.6764, + "step": 6626 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013616856215548418, + "loss": 0.6717, + "step": 6627 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001361516030301817, + "loss": 0.6907, + "step": 6628 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013613464270871076, + "loss": 0.7002, + "step": 6629 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013611768119163259, + "loss": 0.7168, + "step": 6630 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013610071847950834, + "loss": 0.6083, + "step": 6631 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001360837545728993, + "loss": 0.7579, + "step": 6632 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013606678947236686, + "loss": 0.7154, + "step": 6633 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001360498231784722, + "loss": 0.7349, + "step": 6634 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013603285569177675, + "loss": 0.6976, + "step": 6635 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013601588701284197, + "loss": 0.6974, + "step": 6636 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013599891714222924, + "loss": 0.6305, + "step": 6637 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001359819460805001, + "loss": 0.7136, + "step": 6638 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001359649738282161, + "loss": 0.7169, + "step": 6639 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013594800038593872, + "loss": 0.6983, + "step": 6640 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013593102575422964, + "loss": 0.6931, + "step": 6641 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001359140499336505, + "loss": 0.7623, + "step": 6642 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013589707292476302, + "loss": 0.7429, + "step": 6643 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013588009472812889, + "loss": 0.7415, + "step": 6644 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013586311534430982, + "loss": 0.7145, + "step": 6645 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013584613477386773, + "loss": 0.6447, + "step": 6646 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001358291530173644, + "loss": 0.7238, + "step": 6647 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013581217007536172, + "loss": 0.7002, + "step": 6648 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013579518594842161, + "loss": 0.6764, + "step": 6649 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013577820063710608, + "loss": 0.6509, + "step": 6650 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013576121414197705, + "loss": 0.6504, + "step": 6651 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013574422646359663, + "loss": 0.7739, + "step": 6652 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013572723760252688, + "loss": 0.7503, + "step": 6653 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013571024755932993, + "loss": 0.6799, + "step": 6654 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013569325633456793, + "loss": 0.6731, + "step": 6655 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001356762639288031, + "loss": 0.7218, + "step": 6656 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001356592703425976, + "loss": 0.7584, + "step": 6657 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013564227557651376, + "loss": 0.6731, + "step": 6658 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013562527963111392, + "loss": 0.6495, + "step": 6659 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013560828250696043, + "loss": 0.7048, + "step": 6660 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013559128420461564, + "loss": 0.7315, + "step": 6661 + }, + { + "epoch": 1.54, + "learning_rate": 0.000135574284724642, + "loss": 0.7057, + "step": 6662 + }, + { + "epoch": 1.54, + "learning_rate": 0.000135557284067602, + "loss": 0.6461, + "step": 6663 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001355402822340581, + "loss": 0.7147, + "step": 6664 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013552327922457298, + "loss": 0.6975, + "step": 6665 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013550627503970908, + "loss": 0.7346, + "step": 6666 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013548926968002907, + "loss": 0.7197, + "step": 6667 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013547226314609563, + "loss": 0.761, + "step": 6668 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013545525543847146, + "loss": 0.709, + "step": 6669 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001354382465577193, + "loss": 0.7637, + "step": 6670 + }, + { + "epoch": 1.54, + "learning_rate": 0.000135421236504402, + "loss": 0.7541, + "step": 6671 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013540422527908227, + "loss": 0.7743, + "step": 6672 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013538721288232304, + "loss": 0.6866, + "step": 6673 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013537019931468715, + "loss": 0.6862, + "step": 6674 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001353531845767376, + "loss": 0.6702, + "step": 6675 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013533616866903735, + "loss": 0.7074, + "step": 6676 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001353191515921494, + "loss": 0.6624, + "step": 6677 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013530213334663683, + "loss": 0.7291, + "step": 6678 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013528511393306268, + "loss": 0.799, + "step": 6679 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013526809335199014, + "loss": 0.7151, + "step": 6680 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013525107160398237, + "loss": 0.6973, + "step": 6681 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013523404868960253, + "loss": 0.6784, + "step": 6682 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001352170246094139, + "loss": 0.7246, + "step": 6683 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013519999936397974, + "loss": 0.6847, + "step": 6684 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013518297295386344, + "loss": 0.7443, + "step": 6685 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001351659453796283, + "loss": 0.7006, + "step": 6686 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013514891664183777, + "loss": 0.6812, + "step": 6687 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013513188674105524, + "loss": 0.7732, + "step": 6688 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001351148556778442, + "loss": 0.6404, + "step": 6689 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013509782345276817, + "loss": 0.6945, + "step": 6690 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013508079006639072, + "loss": 0.7154, + "step": 6691 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013506375551927547, + "loss": 0.7429, + "step": 6692 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013504671981198594, + "loss": 0.6607, + "step": 6693 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013502968294508592, + "loss": 0.6583, + "step": 6694 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013501264491913906, + "loss": 0.7211, + "step": 6695 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013499560573470914, + "loss": 0.8086, + "step": 6696 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013497856539235995, + "loss": 0.7364, + "step": 6697 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013496152389265523, + "loss": 0.6505, + "step": 6698 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013494448123615893, + "loss": 0.6882, + "step": 6699 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001349274374234349, + "loss": 0.6356, + "step": 6700 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001349103924550471, + "loss": 0.6603, + "step": 6701 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001348933463315595, + "loss": 0.7258, + "step": 6702 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013487629905353617, + "loss": 0.7595, + "step": 6703 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013485925062154105, + "loss": 0.6694, + "step": 6704 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013484220103613832, + "loss": 0.721, + "step": 6705 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001348251502978921, + "loss": 0.7469, + "step": 6706 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001348080984073665, + "loss": 0.683, + "step": 6707 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001347910453651258, + "loss": 0.6913, + "step": 6708 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013477399117173418, + "loss": 0.6743, + "step": 6709 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013475693582775598, + "loss": 0.7008, + "step": 6710 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013473987933375548, + "loss": 0.751, + "step": 6711 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013472282169029704, + "loss": 0.693, + "step": 6712 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013470576289794506, + "loss": 0.6732, + "step": 6713 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013468870295726398, + "loss": 0.738, + "step": 6714 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001346716418688183, + "loss": 0.6784, + "step": 6715 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013465457963317246, + "loss": 0.6833, + "step": 6716 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013463751625089105, + "loss": 0.6664, + "step": 6717 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013462045172253867, + "loss": 0.7264, + "step": 6718 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013460338604867993, + "loss": 0.6449, + "step": 6719 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013458631922987947, + "loss": 0.6842, + "step": 6720 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013456925126670204, + "loss": 0.7134, + "step": 6721 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001345521821597123, + "loss": 0.7096, + "step": 6722 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001345351119094751, + "loss": 0.7149, + "step": 6723 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013451804051655523, + "loss": 0.7178, + "step": 6724 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001345009679815175, + "loss": 0.8252, + "step": 6725 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013448389430492687, + "loss": 0.7466, + "step": 6726 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001344668194873482, + "loss": 0.7954, + "step": 6727 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001344497435293465, + "loss": 0.6349, + "step": 6728 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013443266643148674, + "loss": 0.7394, + "step": 6729 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013441558819433398, + "loss": 0.6999, + "step": 6730 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001343985088184533, + "loss": 0.6535, + "step": 6731 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001343814283044098, + "loss": 0.652, + "step": 6732 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013436434665276865, + "loss": 0.6617, + "step": 6733 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013434726386409504, + "loss": 0.6516, + "step": 6734 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013433017993895417, + "loss": 0.6914, + "step": 6735 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001343130948779113, + "loss": 0.7524, + "step": 6736 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001342960086815318, + "loss": 0.7061, + "step": 6737 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001342789213503809, + "loss": 0.6328, + "step": 6738 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001342618328850241, + "loss": 0.6948, + "step": 6739 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013424474328602677, + "loss": 0.6882, + "step": 6740 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013422765255395431, + "loss": 0.6476, + "step": 6741 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013421056068937227, + "loss": 0.6883, + "step": 6742 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013419346769284616, + "loss": 0.6902, + "step": 6743 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013417637356494155, + "loss": 0.6982, + "step": 6744 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013415927830622408, + "loss": 0.6813, + "step": 6745 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001341421819172593, + "loss": 0.6758, + "step": 6746 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013412508439861293, + "loss": 0.6375, + "step": 6747 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013410798575085072, + "loss": 0.7294, + "step": 6748 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001340908859745384, + "loss": 0.7719, + "step": 6749 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013407378507024177, + "loss": 0.6911, + "step": 6750 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013405668303852664, + "loss": 0.7515, + "step": 6751 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013403957987995882, + "loss": 0.6601, + "step": 6752 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001340224755951043, + "loss": 0.7163, + "step": 6753 + }, + { + "epoch": 1.56, + "learning_rate": 0.000134005370184529, + "loss": 0.7132, + "step": 6754 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001339882636487989, + "loss": 0.6744, + "step": 6755 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013397115598848004, + "loss": 0.6848, + "step": 6756 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013395404720413834, + "loss": 0.7531, + "step": 6757 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013393693729634002, + "loss": 0.7381, + "step": 6758 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013391982626565114, + "loss": 0.7334, + "step": 6759 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001339027141126379, + "loss": 0.661, + "step": 6760 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013388560083786653, + "loss": 0.7552, + "step": 6761 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013386848644190318, + "loss": 0.6651, + "step": 6762 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013385137092531413, + "loss": 0.7383, + "step": 6763 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013383425428866577, + "loss": 0.606, + "step": 6764 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013381713653252438, + "loss": 0.6578, + "step": 6765 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001338000176574564, + "loss": 0.6638, + "step": 6766 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001337828976640282, + "loss": 0.7462, + "step": 6767 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013376577655280626, + "loss": 0.6608, + "step": 6768 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013374865432435706, + "loss": 0.6931, + "step": 6769 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013373153097924717, + "loss": 0.713, + "step": 6770 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013371440651804313, + "loss": 0.6551, + "step": 6771 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001336972809413116, + "loss": 0.65, + "step": 6772 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001336801542496191, + "loss": 0.6926, + "step": 6773 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013366302644353243, + "loss": 0.7106, + "step": 6774 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013364589752361822, + "loss": 0.7121, + "step": 6775 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013362876749044333, + "loss": 0.709, + "step": 6776 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013361163634457448, + "loss": 0.7622, + "step": 6777 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001335945040865785, + "loss": 0.7049, + "step": 6778 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013357737071702225, + "loss": 0.7634, + "step": 6779 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013356023623647265, + "loss": 0.6703, + "step": 6780 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013354310064549662, + "loss": 0.6673, + "step": 6781 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013352596394466119, + "loss": 0.7178, + "step": 6782 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001335088261345333, + "loss": 0.7506, + "step": 6783 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013349168721568005, + "loss": 0.7099, + "step": 6784 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013347454718866847, + "loss": 0.7226, + "step": 6785 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013345740605406572, + "loss": 0.6705, + "step": 6786 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013344026381243896, + "loss": 0.6955, + "step": 6787 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013342312046435542, + "loss": 0.7779, + "step": 6788 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013340597601038223, + "loss": 0.7194, + "step": 6789 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013338883045108674, + "loss": 0.6829, + "step": 6790 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013337168378703624, + "loss": 0.6837, + "step": 6791 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013335453601879803, + "loss": 0.6805, + "step": 6792 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013333738714693956, + "loss": 0.7422, + "step": 6793 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001333202371720282, + "loss": 0.6753, + "step": 6794 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001333030860946314, + "loss": 0.6927, + "step": 6795 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013328593391531666, + "loss": 0.7417, + "step": 6796 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013326878063465148, + "loss": 0.7522, + "step": 6797 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013325162625320346, + "loss": 0.7098, + "step": 6798 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013323447077154016, + "loss": 0.7056, + "step": 6799 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013321731419022924, + "loss": 0.7238, + "step": 6800 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013320015650983836, + "loss": 0.7119, + "step": 6801 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013318299773093522, + "loss": 0.7194, + "step": 6802 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013316583785408755, + "loss": 0.7378, + "step": 6803 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013314867687986317, + "loss": 0.7521, + "step": 6804 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013313151480882985, + "loss": 0.6933, + "step": 6805 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013311435164155546, + "loss": 0.6917, + "step": 6806 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001330971873786079, + "loss": 0.6962, + "step": 6807 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013308002202055508, + "loss": 0.6831, + "step": 6808 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013306285556796495, + "loss": 0.7213, + "step": 6809 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001330456880214055, + "loss": 0.7221, + "step": 6810 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001330285193814448, + "loss": 0.6712, + "step": 6811 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013301134964865092, + "loss": 0.6188, + "step": 6812 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013299417882359191, + "loss": 0.7011, + "step": 6813 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013297700690683597, + "loss": 0.6598, + "step": 6814 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013295983389895123, + "loss": 0.6346, + "step": 6815 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013294265980050594, + "loss": 0.7412, + "step": 6816 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001329254846120683, + "loss": 0.7053, + "step": 6817 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013290830833420663, + "loss": 0.6958, + "step": 6818 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001328911309674893, + "loss": 0.7649, + "step": 6819 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013287395251248456, + "loss": 0.6567, + "step": 6820 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013285677296976086, + "loss": 0.7409, + "step": 6821 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001328395923398867, + "loss": 0.698, + "step": 6822 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013282241062343038, + "loss": 0.7526, + "step": 6823 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013280522782096056, + "loss": 0.712, + "step": 6824 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013278804393304568, + "loss": 0.7222, + "step": 6825 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013277085896025434, + "loss": 0.6763, + "step": 6826 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013275367290315515, + "loss": 0.7, + "step": 6827 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001327364857623168, + "loss": 0.6663, + "step": 6828 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013271929753830788, + "loss": 0.7383, + "step": 6829 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013270210823169718, + "loss": 0.7344, + "step": 6830 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013268491784305342, + "loss": 0.7569, + "step": 6831 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013266772637294538, + "loss": 0.6886, + "step": 6832 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013265053382194192, + "loss": 0.6704, + "step": 6833 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013263334019061188, + "loss": 0.7012, + "step": 6834 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013261614547952416, + "loss": 0.7037, + "step": 6835 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013259894968924765, + "loss": 0.741, + "step": 6836 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013258175282035136, + "loss": 0.7313, + "step": 6837 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001325645548734043, + "loss": 0.6861, + "step": 6838 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001325473558489755, + "loss": 0.7494, + "step": 6839 + }, + { + "epoch": 1.58, + "learning_rate": 0.000132530155747634, + "loss": 0.6867, + "step": 6840 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013251295456994899, + "loss": 0.6676, + "step": 6841 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013249575231648948, + "loss": 0.7415, + "step": 6842 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001324785489878248, + "loss": 0.6957, + "step": 6843 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013246134458452406, + "loss": 0.7124, + "step": 6844 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013244413910715657, + "loss": 0.7, + "step": 6845 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001324269325562916, + "loss": 0.6121, + "step": 6846 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013240972493249847, + "loss": 0.7372, + "step": 6847 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001323925162363465, + "loss": 0.6477, + "step": 6848 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013237530646840516, + "loss": 0.7254, + "step": 6849 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001323580956292438, + "loss": 0.6822, + "step": 6850 + }, + { + "epoch": 1.59, + "learning_rate": 0.000132340883719432, + "loss": 0.7156, + "step": 6851 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013232367073953913, + "loss": 0.7198, + "step": 6852 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013230645669013474, + "loss": 0.6114, + "step": 6853 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001322892415717885, + "loss": 0.7346, + "step": 6854 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013227202538506995, + "loss": 0.7426, + "step": 6855 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013225480813054875, + "loss": 0.7751, + "step": 6856 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013223758980879458, + "loss": 0.6869, + "step": 6857 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001322203704203771, + "loss": 0.6742, + "step": 6858 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013220314996586612, + "loss": 0.7986, + "step": 6859 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001321859284458314, + "loss": 0.7208, + "step": 6860 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013216870586084273, + "loss": 0.6832, + "step": 6861 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013215148221147006, + "loss": 0.6368, + "step": 6862 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013213425749828315, + "loss": 0.6553, + "step": 6863 + }, + { + "epoch": 1.59, + "learning_rate": 0.000132117031721852, + "loss": 0.71, + "step": 6864 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013209980488274652, + "loss": 0.6994, + "step": 6865 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013208257698153677, + "loss": 0.6191, + "step": 6866 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013206534801879274, + "loss": 0.696, + "step": 6867 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013204811799508452, + "loss": 0.738, + "step": 6868 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013203088691098218, + "loss": 0.6921, + "step": 6869 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013201365476705584, + "loss": 0.6871, + "step": 6870 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013199642156387571, + "loss": 0.6286, + "step": 6871 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013197918730201199, + "loss": 0.7449, + "step": 6872 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001319619519820349, + "loss": 0.7434, + "step": 6873 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013194471560451474, + "loss": 0.694, + "step": 6874 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013192747817002178, + "loss": 0.6858, + "step": 6875 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013191023967912637, + "loss": 0.6548, + "step": 6876 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013189300013239893, + "loss": 0.6949, + "step": 6877 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013187575953040986, + "loss": 0.72, + "step": 6878 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001318585178737296, + "loss": 0.7472, + "step": 6879 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013184127516292863, + "loss": 0.7234, + "step": 6880 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013182403139857748, + "loss": 0.7491, + "step": 6881 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001318067865812467, + "loss": 0.745, + "step": 6882 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013178954071150687, + "loss": 0.7318, + "step": 6883 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001317722937899286, + "loss": 0.7131, + "step": 6884 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001317550458170826, + "loss": 0.676, + "step": 6885 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013173779679353956, + "loss": 0.6657, + "step": 6886 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013172054671987013, + "loss": 0.7332, + "step": 6887 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001317032955966451, + "loss": 0.7166, + "step": 6888 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001316860434244354, + "loss": 0.6219, + "step": 6889 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001316687902038117, + "loss": 0.742, + "step": 6890 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001316515359353449, + "loss": 0.7268, + "step": 6891 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013163428061960594, + "loss": 0.6693, + "step": 6892 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013161702425716572, + "loss": 0.7151, + "step": 6893 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013159976684859527, + "loss": 0.6886, + "step": 6894 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013158250839446554, + "loss": 0.6987, + "step": 6895 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013156524889534757, + "loss": 0.6954, + "step": 6896 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013154798835181248, + "loss": 0.7579, + "step": 6897 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013153072676443133, + "loss": 0.697, + "step": 6898 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001315134641337753, + "loss": 0.7314, + "step": 6899 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013149620046041554, + "loss": 0.6458, + "step": 6900 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013147893574492328, + "loss": 0.7058, + "step": 6901 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013146166998786974, + "loss": 0.6569, + "step": 6902 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013144440318982626, + "loss": 0.6695, + "step": 6903 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013142713535136414, + "loss": 0.6704, + "step": 6904 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013140986647305466, + "loss": 0.6844, + "step": 6905 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013139259655546928, + "loss": 0.7003, + "step": 6906 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013137532559917938, + "loss": 0.7574, + "step": 6907 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013135805360475647, + "loss": 0.6788, + "step": 6908 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013134078057277195, + "loss": 0.7497, + "step": 6909 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013132350650379743, + "loss": 0.6636, + "step": 6910 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001313062313984044, + "loss": 0.7488, + "step": 6911 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013128895525716452, + "loss": 0.7265, + "step": 6912 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013127167808064933, + "loss": 0.6438, + "step": 6913 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013125439986943056, + "loss": 0.6928, + "step": 6914 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013123712062407985, + "loss": 0.7322, + "step": 6915 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013121984034516899, + "loss": 0.6584, + "step": 6916 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013120255903326968, + "loss": 0.6874, + "step": 6917 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013118527668895378, + "loss": 0.6519, + "step": 6918 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013116799331279306, + "loss": 0.7051, + "step": 6919 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001311507089053594, + "loss": 0.7692, + "step": 6920 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013113342346722474, + "loss": 0.6582, + "step": 6921 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013111613699896095, + "loss": 0.7115, + "step": 6922 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013109884950114007, + "loss": 0.668, + "step": 6923 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013108156097433403, + "loss": 0.6955, + "step": 6924 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001310642714191149, + "loss": 0.6811, + "step": 6925 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013104698083605474, + "loss": 0.6248, + "step": 6926 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013102968922572568, + "loss": 0.7407, + "step": 6927 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013101239658869983, + "loss": 0.6874, + "step": 6928 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013099510292554934, + "loss": 0.7235, + "step": 6929 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013097780823684646, + "loss": 0.6811, + "step": 6930 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013096051252316343, + "loss": 0.6956, + "step": 6931 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013094321578507246, + "loss": 0.7342, + "step": 6932 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013092591802314594, + "loss": 0.705, + "step": 6933 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013090861923795616, + "loss": 0.7044, + "step": 6934 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013089131943007552, + "loss": 0.644, + "step": 6935 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001308740186000764, + "loss": 0.7341, + "step": 6936 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013085671674853124, + "loss": 0.7489, + "step": 6937 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013083941387601256, + "loss": 0.715, + "step": 6938 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013082210998309284, + "loss": 0.6571, + "step": 6939 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001308048050703446, + "loss": 0.7081, + "step": 6940 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013078749913834046, + "loss": 0.6846, + "step": 6941 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013077019218765305, + "loss": 0.7169, + "step": 6942 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001307528842188549, + "loss": 0.7419, + "step": 6943 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013073557523251886, + "loss": 0.6728, + "step": 6944 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001307182652292175, + "loss": 0.7046, + "step": 6945 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013070095420952363, + "loss": 0.7036, + "step": 6946 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013068364217401002, + "loss": 0.6638, + "step": 6947 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013066632912324947, + "loss": 0.6644, + "step": 6948 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013064901505781485, + "loss": 0.6731, + "step": 6949 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013063169997827898, + "loss": 0.718, + "step": 6950 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013061438388521488, + "loss": 0.7298, + "step": 6951 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001305970667791954, + "loss": 0.6753, + "step": 6952 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001305797486607936, + "loss": 0.7005, + "step": 6953 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013056242953058238, + "loss": 0.7015, + "step": 6954 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001305451093891349, + "loss": 0.7011, + "step": 6955 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001305277882370242, + "loss": 0.7171, + "step": 6956 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001305104660748234, + "loss": 0.7262, + "step": 6957 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013049314290310566, + "loss": 0.6371, + "step": 6958 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013047581872244413, + "loss": 0.6248, + "step": 6959 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013045849353341202, + "loss": 0.6869, + "step": 6960 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001304411673365826, + "loss": 0.6605, + "step": 6961 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013042384013252917, + "loss": 0.6977, + "step": 6962 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013040651192182504, + "loss": 0.7004, + "step": 6963 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013038918270504355, + "loss": 0.7677, + "step": 6964 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013037185248275803, + "loss": 0.7064, + "step": 6965 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013035452125554196, + "loss": 0.7243, + "step": 6966 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013033718902396877, + "loss": 0.6944, + "step": 6967 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013031985578861196, + "loss": 0.7261, + "step": 6968 + }, + { + "epoch": 1.61, + "learning_rate": 0.000130302521550045, + "loss": 0.6677, + "step": 6969 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013028518630884146, + "loss": 0.667, + "step": 6970 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013026785006557492, + "loss": 0.6694, + "step": 6971 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013025051282081898, + "loss": 0.6638, + "step": 6972 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013023317457514733, + "loss": 0.6582, + "step": 6973 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013021583532913362, + "loss": 0.7341, + "step": 6974 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013019849508335158, + "loss": 0.7203, + "step": 6975 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013018115383837492, + "loss": 0.7192, + "step": 6976 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013016381159477745, + "loss": 0.7109, + "step": 6977 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013014646835313298, + "loss": 0.7422, + "step": 6978 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013012912411401536, + "loss": 0.7383, + "step": 6979 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013011177887799845, + "loss": 0.7474, + "step": 6980 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013009443264565618, + "loss": 0.6814, + "step": 6981 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001300770854175625, + "loss": 0.7059, + "step": 6982 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001300597371942913, + "loss": 0.7183, + "step": 6983 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013004238797641675, + "loss": 0.719, + "step": 6984 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001300250377645128, + "loss": 0.712, + "step": 6985 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013000768655915349, + "loss": 0.6781, + "step": 6986 + }, + { + "epoch": 1.62, + "learning_rate": 0.000129990334360913, + "loss": 0.6788, + "step": 6987 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012997298117036542, + "loss": 0.6217, + "step": 6988 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012995562698808494, + "loss": 0.7409, + "step": 6989 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012993827181464584, + "loss": 0.678, + "step": 6990 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012992091565062225, + "loss": 0.7306, + "step": 6991 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001299035584965885, + "loss": 0.685, + "step": 6992 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001298862003531189, + "loss": 0.7053, + "step": 6993 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012986884122078774, + "loss": 0.716, + "step": 6994 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012985148110016947, + "loss": 0.7042, + "step": 6995 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012983411999183844, + "loss": 0.7455, + "step": 6996 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012981675789636905, + "loss": 0.7093, + "step": 6997 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001297993948143359, + "loss": 0.7263, + "step": 6998 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012978203074631334, + "loss": 0.6977, + "step": 6999 + }, + { + "epoch": 1.62, + "learning_rate": 0.000129764665692876, + "loss": 0.706, + "step": 7000 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012974729965459843, + "loss": 0.6838, + "step": 7001 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001297299326320552, + "loss": 0.6906, + "step": 7002 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012971256462582099, + "loss": 0.7652, + "step": 7003 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012969519563647041, + "loss": 0.7305, + "step": 7004 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012967782566457818, + "loss": 0.6605, + "step": 7005 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012966045471071906, + "loss": 0.687, + "step": 7006 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012964308277546777, + "loss": 0.7317, + "step": 7007 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012962570985939915, + "loss": 0.7773, + "step": 7008 + }, + { + "epoch": 1.62, + "learning_rate": 0.000129608335963088, + "loss": 0.6971, + "step": 7009 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012959096108710916, + "loss": 0.6669, + "step": 7010 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012957358523203754, + "loss": 0.6921, + "step": 7011 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012955620839844807, + "loss": 0.7246, + "step": 7012 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001295388305869157, + "loss": 0.697, + "step": 7013 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012952145179801546, + "loss": 0.7711, + "step": 7014 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001295040720323223, + "loss": 0.7396, + "step": 7015 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001294866912904113, + "loss": 0.7198, + "step": 7016 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012946930957285759, + "loss": 0.729, + "step": 7017 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012945192688023624, + "loss": 0.7018, + "step": 7018 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012943454321312243, + "loss": 0.7546, + "step": 7019 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001294171585720913, + "loss": 0.7151, + "step": 7020 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012939977295771813, + "loss": 0.6884, + "step": 7021 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012938238637057813, + "loss": 0.7435, + "step": 7022 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012936499881124655, + "loss": 0.6544, + "step": 7023 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012934761028029874, + "loss": 0.6803, + "step": 7024 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012933022077831006, + "loss": 0.7155, + "step": 7025 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012931283030585583, + "loss": 0.6685, + "step": 7026 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012929543886351153, + "loss": 0.7097, + "step": 7027 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012927804645185254, + "loss": 0.6971, + "step": 7028 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012926065307145436, + "loss": 0.7284, + "step": 7029 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001292432587228925, + "loss": 0.6131, + "step": 7030 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001292258634067425, + "loss": 0.6172, + "step": 7031 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001292084671235799, + "loss": 0.6766, + "step": 7032 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001291910698739803, + "loss": 0.7629, + "step": 7033 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012917367165851933, + "loss": 0.671, + "step": 7034 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012915627247777272, + "loss": 0.8079, + "step": 7035 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001291388723323161, + "loss": 0.7142, + "step": 7036 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012912147122272523, + "loss": 0.6929, + "step": 7037 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012910406914957582, + "loss": 0.7206, + "step": 7038 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012908666611344374, + "loss": 0.6797, + "step": 7039 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012906926211490475, + "loss": 0.6807, + "step": 7040 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012905185715453475, + "loss": 0.6661, + "step": 7041 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001290344512329096, + "loss": 0.7213, + "step": 7042 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012901704435060526, + "loss": 0.7149, + "step": 7043 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001289996365081976, + "loss": 0.6639, + "step": 7044 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012898222770626268, + "loss": 0.7508, + "step": 7045 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012896481794537644, + "loss": 0.7543, + "step": 7046 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012894740722611502, + "loss": 0.7464, + "step": 7047 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012892999554905447, + "loss": 0.757, + "step": 7048 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012891258291477086, + "loss": 0.6844, + "step": 7049 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012889516932384031, + "loss": 0.707, + "step": 7050 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012887775477683907, + "loss": 0.7194, + "step": 7051 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001288603392743433, + "loss": 0.6837, + "step": 7052 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012884292281692931, + "loss": 0.6452, + "step": 7053 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012882550540517325, + "loss": 0.6905, + "step": 7054 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012880808703965148, + "loss": 0.6301, + "step": 7055 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001287906677209403, + "loss": 0.7091, + "step": 7056 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012877324744961613, + "loss": 0.7115, + "step": 7057 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012875582622625534, + "loss": 0.7245, + "step": 7058 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001287384040514344, + "loss": 0.6939, + "step": 7059 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012872098092572964, + "loss": 0.715, + "step": 7060 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012870355684971763, + "loss": 0.6856, + "step": 7061 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001286861318239749, + "loss": 0.6391, + "step": 7062 + }, + { + "epoch": 1.64, + "learning_rate": 0.000128668705849078, + "loss": 0.7158, + "step": 7063 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012865127892560348, + "loss": 0.6814, + "step": 7064 + }, + { + "epoch": 1.64, + "learning_rate": 0.000128633851054128, + "loss": 0.6998, + "step": 7065 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001286164222352282, + "loss": 0.7219, + "step": 7066 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012859899246948065, + "loss": 0.694, + "step": 7067 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001285815617574622, + "loss": 0.6814, + "step": 7068 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012856413009974956, + "loss": 0.6883, + "step": 7069 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001285466974969195, + "loss": 0.7494, + "step": 7070 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012852926394954872, + "loss": 0.6886, + "step": 7071 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012851182945821418, + "loss": 0.7092, + "step": 7072 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012849439402349265, + "loss": 0.7669, + "step": 7073 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012847695764596113, + "loss": 0.6721, + "step": 7074 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001284595203261965, + "loss": 0.7304, + "step": 7075 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012844208206477567, + "loss": 0.7249, + "step": 7076 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001284246428622757, + "loss": 0.7495, + "step": 7077 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012840720271927353, + "loss": 0.7702, + "step": 7078 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001283897616363463, + "loss": 0.6669, + "step": 7079 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012837231961407105, + "loss": 0.7206, + "step": 7080 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012835487665302492, + "loss": 0.6674, + "step": 7081 + }, + { + "epoch": 1.64, + "learning_rate": 0.000128337432753785, + "loss": 0.7016, + "step": 7082 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012831998791692852, + "loss": 0.6798, + "step": 7083 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012830254214303265, + "loss": 0.634, + "step": 7084 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012828509543267468, + "loss": 0.7242, + "step": 7085 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001282676477864318, + "loss": 0.7133, + "step": 7086 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001282501992048814, + "loss": 0.74, + "step": 7087 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012823274968860074, + "loss": 0.6938, + "step": 7088 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001282152992381672, + "loss": 0.6987, + "step": 7089 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012819784785415817, + "loss": 0.664, + "step": 7090 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012818039553715114, + "loss": 0.7207, + "step": 7091 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012816294228772346, + "loss": 0.7155, + "step": 7092 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001281454881064527, + "loss": 0.6786, + "step": 7093 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012812803299391628, + "loss": 0.7154, + "step": 7094 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012811057695069183, + "loss": 0.7329, + "step": 7095 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012809311997735696, + "loss": 0.7778, + "step": 7096 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012807566207448918, + "loss": 0.6675, + "step": 7097 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012805820324266617, + "loss": 0.6679, + "step": 7098 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012804074348246562, + "loss": 0.6582, + "step": 7099 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001280232827944652, + "loss": 0.7499, + "step": 7100 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012800582117924268, + "loss": 0.649, + "step": 7101 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012798835863737576, + "loss": 0.7551, + "step": 7102 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012797089516944233, + "loss": 0.673, + "step": 7103 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001279534307760201, + "loss": 0.6833, + "step": 7104 + }, + { + "epoch": 1.64, + "learning_rate": 0.000127935965457687, + "loss": 0.7174, + "step": 7105 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012791849921502092, + "loss": 0.7113, + "step": 7106 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001279010320485997, + "loss": 0.6589, + "step": 7107 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012788356395900137, + "loss": 0.6813, + "step": 7108 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012786609494680386, + "loss": 0.797, + "step": 7109 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012784862501258518, + "loss": 0.7273, + "step": 7110 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012783115415692338, + "loss": 0.7426, + "step": 7111 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012781368238039655, + "loss": 0.6628, + "step": 7112 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012779620968358273, + "loss": 0.6963, + "step": 7113 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012777873606706014, + "loss": 0.6974, + "step": 7114 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012776126153140683, + "loss": 0.7553, + "step": 7115 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012774378607720104, + "loss": 0.6456, + "step": 7116 + }, + { + "epoch": 1.65, + "learning_rate": 0.000127726309705021, + "loss": 0.7243, + "step": 7117 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012770883241544495, + "loss": 0.6798, + "step": 7118 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012769135420905118, + "loss": 0.6826, + "step": 7119 + }, + { + "epoch": 1.65, + "learning_rate": 0.000127673875086418, + "loss": 0.6654, + "step": 7120 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012765639504812374, + "loss": 0.6303, + "step": 7121 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012763891409474677, + "loss": 0.7419, + "step": 7122 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012762143222686554, + "loss": 0.694, + "step": 7123 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001276039494450584, + "loss": 0.7259, + "step": 7124 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012758646574990385, + "loss": 0.6478, + "step": 7125 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012756898114198043, + "loss": 0.8025, + "step": 7126 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012755149562186657, + "loss": 0.7317, + "step": 7127 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012753400919014092, + "loss": 0.6758, + "step": 7128 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012751652184738197, + "loss": 0.7938, + "step": 7129 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012749903359416843, + "loss": 0.6596, + "step": 7130 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012748154443107884, + "loss": 0.5944, + "step": 7131 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012746405435869198, + "loss": 0.6997, + "step": 7132 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001274465633775865, + "loss": 0.6865, + "step": 7133 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001274290714883411, + "loss": 0.6855, + "step": 7134 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001274115786915346, + "loss": 0.6212, + "step": 7135 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012739408498774576, + "loss": 0.6991, + "step": 7136 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012737659037755344, + "loss": 0.7263, + "step": 7137 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012735909486153647, + "loss": 0.7108, + "step": 7138 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012734159844027376, + "loss": 0.6581, + "step": 7139 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012732410111434414, + "loss": 0.7155, + "step": 7140 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012730660288432665, + "loss": 0.6847, + "step": 7141 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001272891037508002, + "loss": 0.6751, + "step": 7142 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012727160371434385, + "loss": 0.7418, + "step": 7143 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012725410277553666, + "loss": 0.6485, + "step": 7144 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012723660093495753, + "loss": 0.6213, + "step": 7145 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001272190981931857, + "loss": 0.7119, + "step": 7146 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012720159455080027, + "loss": 0.6758, + "step": 7147 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012718409000838038, + "loss": 0.7881, + "step": 7148 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012716658456650524, + "loss": 0.7074, + "step": 7149 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012714907822575398, + "loss": 0.6939, + "step": 7150 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001271315709867059, + "loss": 0.7701, + "step": 7151 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012711406284994026, + "loss": 0.6953, + "step": 7152 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001270965538160364, + "loss": 0.7047, + "step": 7153 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001270790438855736, + "loss": 0.6588, + "step": 7154 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001270615330591313, + "loss": 0.7131, + "step": 7155 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012704402133728877, + "loss": 0.6618, + "step": 7156 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012702650872062544, + "loss": 0.7278, + "step": 7157 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012700899520972088, + "loss": 0.7211, + "step": 7158 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001269914808051545, + "loss": 0.786, + "step": 7159 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012697396550750582, + "loss": 0.6491, + "step": 7160 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012695644931735436, + "loss": 0.7284, + "step": 7161 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012693893223527966, + "loss": 0.778, + "step": 7162 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012692141426186136, + "loss": 0.6569, + "step": 7163 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001269038953976791, + "loss": 0.7227, + "step": 7164 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012688637564331256, + "loss": 0.6853, + "step": 7165 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012686885499934132, + "loss": 0.7614, + "step": 7166 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001268513334663452, + "loss": 0.665, + "step": 7167 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012683381104490384, + "loss": 0.6894, + "step": 7168 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012681628773559712, + "loss": 0.6797, + "step": 7169 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012679876353900482, + "loss": 0.6755, + "step": 7170 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012678123845570672, + "loss": 0.728, + "step": 7171 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012676371248628273, + "loss": 0.6711, + "step": 7172 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012674618563131274, + "loss": 0.6763, + "step": 7173 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012672865789137663, + "loss": 0.7254, + "step": 7174 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001267111292670544, + "loss": 0.7162, + "step": 7175 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012669359975892602, + "loss": 0.7406, + "step": 7176 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001266760693675715, + "loss": 0.715, + "step": 7177 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001266585380935708, + "loss": 0.6699, + "step": 7178 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012664100593750408, + "loss": 0.609, + "step": 7179 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001266234728999514, + "loss": 0.6362, + "step": 7180 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012660593898149297, + "loss": 0.6775, + "step": 7181 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012658840418270877, + "loss": 0.6595, + "step": 7182 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012657086850417914, + "loss": 0.6689, + "step": 7183 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012655333194648422, + "loss": 0.6342, + "step": 7184 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012653579451020423, + "loss": 0.6792, + "step": 7185 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012651825619591954, + "loss": 0.6527, + "step": 7186 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012650071700421036, + "loss": 0.6589, + "step": 7187 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012648317693565706, + "loss": 0.6204, + "step": 7188 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012646563599083996, + "loss": 0.6946, + "step": 7189 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001264480941703395, + "loss": 0.6148, + "step": 7190 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012643055147473604, + "loss": 0.661, + "step": 7191 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012641300790461007, + "loss": 0.7409, + "step": 7192 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012639546346054206, + "loss": 0.7188, + "step": 7193 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012637791814311248, + "loss": 0.7633, + "step": 7194 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012636037195290189, + "loss": 0.6584, + "step": 7195 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012634282489049087, + "loss": 0.7087, + "step": 7196 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012632527695645993, + "loss": 0.7488, + "step": 7197 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012630772815138978, + "loss": 0.7125, + "step": 7198 + }, + { + "epoch": 1.67, + "learning_rate": 0.000126290178475861, + "loss": 0.6822, + "step": 7199 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012627262793045432, + "loss": 0.6466, + "step": 7200 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012625507651575042, + "loss": 0.6816, + "step": 7201 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012623752423233, + "loss": 0.6781, + "step": 7202 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012621997108077388, + "loss": 0.6586, + "step": 7203 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012620241706166283, + "loss": 0.6288, + "step": 7204 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012618486217557765, + "loss": 0.72, + "step": 7205 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012616730642309922, + "loss": 0.6867, + "step": 7206 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012614974980480837, + "loss": 0.7076, + "step": 7207 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012613219232128608, + "loss": 0.6947, + "step": 7208 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012611463397311322, + "loss": 0.7535, + "step": 7209 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001260970747608708, + "loss": 0.6187, + "step": 7210 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012607951468513972, + "loss": 0.6372, + "step": 7211 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001260619537465011, + "loss": 0.6525, + "step": 7212 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012604439194553596, + "loss": 0.6992, + "step": 7213 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012602682928282538, + "loss": 0.7385, + "step": 7214 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012600926575895044, + "loss": 0.7404, + "step": 7215 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012599170137449233, + "loss": 0.7183, + "step": 7216 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012597413613003213, + "loss": 0.657, + "step": 7217 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012595657002615108, + "loss": 0.7069, + "step": 7218 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001259390030634304, + "loss": 0.6977, + "step": 7219 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012592143524245132, + "loss": 0.7222, + "step": 7220 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012590386656379516, + "loss": 0.6582, + "step": 7221 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001258862970280432, + "loss": 0.7078, + "step": 7222 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001258687266357767, + "loss": 0.7066, + "step": 7223 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012585115538757714, + "loss": 0.6253, + "step": 7224 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012583358328402587, + "loss": 0.709, + "step": 7225 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012581601032570427, + "loss": 0.7193, + "step": 7226 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001257984365131938, + "loss": 0.6854, + "step": 7227 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012578086184707597, + "loss": 0.7671, + "step": 7228 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001257632863279323, + "loss": 0.698, + "step": 7229 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001257457099563442, + "loss": 0.7267, + "step": 7230 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012572813273289336, + "loss": 0.7074, + "step": 7231 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001257105546581613, + "loss": 0.6651, + "step": 7232 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012569297573272967, + "loss": 0.6795, + "step": 7233 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001256753959571801, + "loss": 0.7281, + "step": 7234 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012565781533209423, + "loss": 0.7091, + "step": 7235 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001256402338580538, + "loss": 0.7435, + "step": 7236 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012562265153564055, + "loss": 0.7385, + "step": 7237 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012560506836543618, + "loss": 0.6785, + "step": 7238 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012558748434802252, + "loss": 0.6648, + "step": 7239 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001255698994839814, + "loss": 0.6739, + "step": 7240 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012555231377389458, + "loss": 0.7019, + "step": 7241 + }, + { + "epoch": 1.68, + "learning_rate": 0.000125534727218344, + "loss": 0.7667, + "step": 7242 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012551713981791155, + "loss": 0.6973, + "step": 7243 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001254995515731791, + "loss": 0.6816, + "step": 7244 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012548196248472873, + "loss": 0.625, + "step": 7245 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012546437255314222, + "loss": 0.7129, + "step": 7246 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012544678177900176, + "loss": 0.7097, + "step": 7247 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012542919016288927, + "loss": 0.7261, + "step": 7248 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012541159770538692, + "loss": 0.7061, + "step": 7249 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012539400440707673, + "loss": 0.6503, + "step": 7250 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012537641026854078, + "loss": 0.6851, + "step": 7251 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012535881529036127, + "loss": 0.7314, + "step": 7252 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012534121947312037, + "loss": 0.7847, + "step": 7253 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012532362281740032, + "loss": 0.6942, + "step": 7254 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001253060253237833, + "loss": 0.7529, + "step": 7255 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012528842699285162, + "loss": 0.6709, + "step": 7256 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001252708278251875, + "loss": 0.7194, + "step": 7257 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012525322782137326, + "loss": 0.6496, + "step": 7258 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012523562698199127, + "loss": 0.7147, + "step": 7259 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001252180253076239, + "loss": 0.7165, + "step": 7260 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012520042279885355, + "loss": 0.7277, + "step": 7261 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012518281945626263, + "loss": 0.7453, + "step": 7262 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001251652152804336, + "loss": 0.6587, + "step": 7263 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012514761027194893, + "loss": 0.6761, + "step": 7264 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012513000443139112, + "loss": 0.6779, + "step": 7265 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012511239775934275, + "loss": 0.6599, + "step": 7266 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001250947902563863, + "loss": 0.7134, + "step": 7267 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012507718192310442, + "loss": 0.7023, + "step": 7268 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012505957276007972, + "loss": 0.7291, + "step": 7269 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001250419627678948, + "loss": 0.7064, + "step": 7270 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012502435194713244, + "loss": 0.6912, + "step": 7271 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012500674029837524, + "loss": 0.7333, + "step": 7272 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012498912782220596, + "loss": 0.6731, + "step": 7273 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012497151451920736, + "loss": 0.7524, + "step": 7274 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012495390038996215, + "loss": 0.7379, + "step": 7275 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012493628543505322, + "loss": 0.7311, + "step": 7276 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012491866965506346, + "loss": 0.6986, + "step": 7277 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012490105305057559, + "loss": 0.741, + "step": 7278 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012488343562217256, + "loss": 0.6991, + "step": 7279 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012486581737043736, + "loss": 0.7311, + "step": 7280 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012484819829595279, + "loss": 0.7081, + "step": 7281 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012483057839930196, + "loss": 0.7399, + "step": 7282 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001248129576810678, + "loss": 0.7285, + "step": 7283 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012479533614183334, + "loss": 0.7002, + "step": 7284 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012477771378218165, + "loss": 0.6674, + "step": 7285 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012476009060269577, + "loss": 0.6719, + "step": 7286 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001247424666039589, + "loss": 0.6856, + "step": 7287 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001247248417865541, + "loss": 0.7019, + "step": 7288 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012470721615106452, + "loss": 0.753, + "step": 7289 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012468958969807342, + "loss": 0.7107, + "step": 7290 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012467196242816396, + "loss": 0.7552, + "step": 7291 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001246543343419194, + "loss": 0.7759, + "step": 7292 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012463670543992302, + "loss": 0.7472, + "step": 7293 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012461907572275808, + "loss": 0.7129, + "step": 7294 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012460144519100793, + "loss": 0.7508, + "step": 7295 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012458381384525594, + "loss": 0.6747, + "step": 7296 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012456618168608547, + "loss": 0.7029, + "step": 7297 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012454854871407994, + "loss": 0.7155, + "step": 7298 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012453091492982274, + "loss": 0.6952, + "step": 7299 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012451328033389738, + "loss": 0.7139, + "step": 7300 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001244956449268873, + "loss": 0.7349, + "step": 7301 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012447800870937604, + "loss": 0.6653, + "step": 7302 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012446037168194714, + "loss": 0.715, + "step": 7303 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012444273384518416, + "loss": 0.7067, + "step": 7304 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012442509519967069, + "loss": 0.6633, + "step": 7305 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001244074557459904, + "loss": 0.7282, + "step": 7306 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012438981548472682, + "loss": 0.6728, + "step": 7307 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012437217441646373, + "loss": 0.672, + "step": 7308 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012435453254178478, + "loss": 0.6927, + "step": 7309 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001243368898612737, + "loss": 0.6965, + "step": 7310 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001243192463755143, + "loss": 0.686, + "step": 7311 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012430160208509028, + "loss": 0.7077, + "step": 7312 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012428395699058548, + "loss": 0.7177, + "step": 7313 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012426631109258373, + "loss": 0.6731, + "step": 7314 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001242486643916689, + "loss": 0.6953, + "step": 7315 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012423101688842486, + "loss": 0.697, + "step": 7316 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012421336858343555, + "loss": 0.6412, + "step": 7317 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012419571947728486, + "loss": 0.7297, + "step": 7318 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012417806957055685, + "loss": 0.658, + "step": 7319 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001241604188638354, + "loss": 0.6964, + "step": 7320 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012414276735770458, + "loss": 0.7257, + "step": 7321 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012412511505274844, + "loss": 0.6818, + "step": 7322 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012410746194955105, + "loss": 0.6773, + "step": 7323 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012408980804869653, + "loss": 0.7349, + "step": 7324 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012407215335076893, + "loss": 0.5974, + "step": 7325 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012405449785635249, + "loss": 0.6448, + "step": 7326 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001240368415660313, + "loss": 0.7657, + "step": 7327 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012401918448038966, + "loss": 0.6741, + "step": 7328 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012400152660001172, + "loss": 0.6326, + "step": 7329 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012398386792548176, + "loss": 0.6649, + "step": 7330 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001239662084573841, + "loss": 0.6842, + "step": 7331 + }, + { + "epoch": 1.7, + "learning_rate": 0.000123948548196303, + "loss": 0.6979, + "step": 7332 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012393088714282279, + "loss": 0.6864, + "step": 7333 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012391322529752788, + "loss": 0.6188, + "step": 7334 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012389556266100268, + "loss": 0.7522, + "step": 7335 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012387789923383146, + "loss": 0.7021, + "step": 7336 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001238602350165988, + "loss": 0.7379, + "step": 7337 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001238425700098891, + "loss": 0.7224, + "step": 7338 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012382490421428689, + "loss": 0.6898, + "step": 7339 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012380723763037667, + "loss": 0.6863, + "step": 7340 + }, + { + "epoch": 1.7, + "learning_rate": 0.000123789570258743, + "loss": 0.6645, + "step": 7341 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001237719020999704, + "loss": 0.6167, + "step": 7342 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001237719020999704, + "loss": 0.6721, + "step": 7343 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012375423315464355, + "loss": 0.6825, + "step": 7344 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012373656342334698, + "loss": 0.7389, + "step": 7345 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012371889290666542, + "loss": 0.6574, + "step": 7346 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001237012216051835, + "loss": 0.6926, + "step": 7347 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012368354951948585, + "loss": 0.7113, + "step": 7348 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012366587665015738, + "loss": 0.6713, + "step": 7349 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012364820299778267, + "loss": 0.7057, + "step": 7350 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012363052856294657, + "loss": 0.6467, + "step": 7351 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012361285334623392, + "loss": 0.6619, + "step": 7352 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012359517734822947, + "loss": 0.702, + "step": 7353 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001235775005695181, + "loss": 0.725, + "step": 7354 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012355982301068472, + "loss": 0.6388, + "step": 7355 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012354214467231423, + "loss": 0.6762, + "step": 7356 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012352446555499155, + "loss": 0.7598, + "step": 7357 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012350678565930165, + "loss": 0.767, + "step": 7358 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001234891049858295, + "loss": 0.806, + "step": 7359 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001234714235351601, + "loss": 0.7017, + "step": 7360 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012345374130787854, + "loss": 0.6335, + "step": 7361 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012343605830456982, + "loss": 0.7639, + "step": 7362 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012341837452581908, + "loss": 0.681, + "step": 7363 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012340068997221138, + "loss": 0.6864, + "step": 7364 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012338300464433192, + "loss": 0.6613, + "step": 7365 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001233653185427658, + "loss": 0.6853, + "step": 7366 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012334763166809825, + "loss": 0.6718, + "step": 7367 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012332994402091448, + "loss": 0.7121, + "step": 7368 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012331225560179972, + "loss": 0.702, + "step": 7369 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012329456641133925, + "loss": 0.6966, + "step": 7370 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001232768764501183, + "loss": 0.7295, + "step": 7371 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012325918571872232, + "loss": 0.7435, + "step": 7372 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012324149421773654, + "loss": 0.6306, + "step": 7373 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001232238019477464, + "loss": 0.7149, + "step": 7374 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012320610890933725, + "loss": 0.6869, + "step": 7375 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001231884151030945, + "loss": 0.6501, + "step": 7376 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001231707205296036, + "loss": 0.6967, + "step": 7377 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012315302518945003, + "loss": 0.6186, + "step": 7378 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012313532908321935, + "loss": 0.6985, + "step": 7379 + }, + { + "epoch": 1.71, + "learning_rate": 0.000123117632211497, + "loss": 0.6535, + "step": 7380 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001230999345748685, + "loss": 0.7396, + "step": 7381 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012308223617391953, + "loss": 0.6871, + "step": 7382 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012306453700923554, + "loss": 0.6971, + "step": 7383 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012304683708140232, + "loss": 0.7802, + "step": 7384 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012302913639100543, + "loss": 0.6918, + "step": 7385 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001230114349386305, + "loss": 0.7428, + "step": 7386 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012299373272486332, + "loss": 0.7033, + "step": 7387 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012297602975028954, + "loss": 0.7048, + "step": 7388 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012295832601549498, + "loss": 0.6809, + "step": 7389 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012294062152106534, + "loss": 0.7017, + "step": 7390 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012292291626758646, + "loss": 0.6572, + "step": 7391 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012290521025564412, + "loss": 0.6815, + "step": 7392 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012288750348582423, + "loss": 0.7445, + "step": 7393 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012286979595871263, + "loss": 0.7591, + "step": 7394 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001228520876748952, + "loss": 0.6593, + "step": 7395 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012283437863495794, + "loss": 0.6484, + "step": 7396 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012281666883948674, + "loss": 0.78, + "step": 7397 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012279895828906756, + "loss": 0.7549, + "step": 7398 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001227812469842864, + "loss": 0.6631, + "step": 7399 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012276353492572935, + "loss": 0.6942, + "step": 7400 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001227458221139824, + "loss": 0.7524, + "step": 7401 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012272810854963164, + "loss": 0.7678, + "step": 7402 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012271039423326314, + "loss": 0.7362, + "step": 7403 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012269267916546308, + "loss": 0.6523, + "step": 7404 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012267496334681757, + "loss": 0.7099, + "step": 7405 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012265724677791278, + "loss": 0.7015, + "step": 7406 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001226395294593349, + "loss": 0.616, + "step": 7407 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012262181139167022, + "loss": 0.7393, + "step": 7408 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012260409257550489, + "loss": 0.7524, + "step": 7409 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012258637301142526, + "loss": 0.6903, + "step": 7410 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001225686527000176, + "loss": 0.6092, + "step": 7411 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012255093164186825, + "loss": 0.6966, + "step": 7412 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001225332098375635, + "loss": 0.645, + "step": 7413 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001225154872876898, + "loss": 0.7031, + "step": 7414 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012249776399283346, + "loss": 0.6595, + "step": 7415 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012248003995358098, + "loss": 0.8146, + "step": 7416 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012246231517051875, + "loss": 0.7125, + "step": 7417 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012244458964423327, + "loss": 0.7071, + "step": 7418 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012242686337531102, + "loss": 0.7403, + "step": 7419 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012240913636433852, + "loss": 0.7122, + "step": 7420 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012239140861190233, + "loss": 0.6478, + "step": 7421 + }, + { + "epoch": 1.72, + "learning_rate": 0.000122373680118589, + "loss": 0.6854, + "step": 7422 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012235595088498513, + "loss": 0.6477, + "step": 7423 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012233822091167732, + "loss": 0.7005, + "step": 7424 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012232049019925226, + "loss": 0.7472, + "step": 7425 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012230275874829658, + "loss": 0.6894, + "step": 7426 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012228502655939696, + "loss": 0.7243, + "step": 7427 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012226729363314011, + "loss": 0.7339, + "step": 7428 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012224955997011281, + "loss": 0.7377, + "step": 7429 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001222318255709018, + "loss": 0.6598, + "step": 7430 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012221409043609388, + "loss": 0.7652, + "step": 7431 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012219635456627584, + "loss": 0.7446, + "step": 7432 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012217861796203454, + "loss": 0.6675, + "step": 7433 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001221608806239568, + "loss": 0.706, + "step": 7434 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012214314255262957, + "loss": 0.6777, + "step": 7435 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012212540374863972, + "loss": 0.7691, + "step": 7436 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001221076642125742, + "loss": 0.7246, + "step": 7437 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012208992394501992, + "loss": 0.648, + "step": 7438 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001220721829465639, + "loss": 0.7362, + "step": 7439 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012205444121779317, + "loss": 0.6325, + "step": 7440 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012203669875929474, + "loss": 0.7407, + "step": 7441 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012201895557165564, + "loss": 0.7537, + "step": 7442 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012200121165546299, + "loss": 0.6685, + "step": 7443 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012198346701130383, + "loss": 0.6649, + "step": 7444 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012196572163976536, + "loss": 0.6852, + "step": 7445 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001219479755414347, + "loss": 0.7277, + "step": 7446 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012193022871689904, + "loss": 0.717, + "step": 7447 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012191248116674556, + "loss": 0.7943, + "step": 7448 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012189473289156148, + "loss": 0.6594, + "step": 7449 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012187698389193403, + "loss": 0.7159, + "step": 7450 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012185923416845053, + "loss": 0.718, + "step": 7451 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012184148372169826, + "loss": 0.6632, + "step": 7452 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012182373255226454, + "loss": 0.6997, + "step": 7453 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012180598066073667, + "loss": 0.66, + "step": 7454 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012178822804770204, + "loss": 0.7256, + "step": 7455 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012177047471374807, + "loss": 0.6475, + "step": 7456 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012175272065946217, + "loss": 0.7033, + "step": 7457 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012173496588543175, + "loss": 0.6268, + "step": 7458 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001217172103922443, + "loss": 0.7518, + "step": 7459 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012169945418048729, + "loss": 0.6603, + "step": 7460 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001216816972507482, + "loss": 0.6965, + "step": 7461 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012166393960361462, + "loss": 0.7069, + "step": 7462 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012164618123967409, + "loss": 0.7281, + "step": 7463 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012162842215951418, + "loss": 0.6702, + "step": 7464 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001216106623637225, + "loss": 0.7089, + "step": 7465 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012159290185288665, + "loss": 0.6605, + "step": 7466 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012157514062759431, + "loss": 0.6727, + "step": 7467 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012155737868843318, + "loss": 0.7291, + "step": 7468 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012153961603599095, + "loss": 0.8059, + "step": 7469 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001215218526708553, + "loss": 0.6849, + "step": 7470 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012150408859361399, + "loss": 0.7631, + "step": 7471 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001214863238048548, + "loss": 0.7248, + "step": 7472 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012146855830516555, + "loss": 0.6985, + "step": 7473 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012145079209513402, + "loss": 0.7104, + "step": 7474 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001214330251753481, + "loss": 0.6782, + "step": 7475 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012141525754639558, + "loss": 0.7157, + "step": 7476 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012139748920886438, + "loss": 0.6947, + "step": 7477 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012137972016334243, + "loss": 0.6185, + "step": 7478 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012136195041041765, + "loss": 0.7525, + "step": 7479 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012134417995067801, + "loss": 0.6294, + "step": 7480 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012132640878471147, + "loss": 0.6714, + "step": 7481 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012130863691310603, + "loss": 0.7291, + "step": 7482 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012129086433644976, + "loss": 0.6682, + "step": 7483 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012127309105533065, + "loss": 0.7352, + "step": 7484 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012125531707033684, + "loss": 0.7193, + "step": 7485 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012123754238205637, + "loss": 0.7879, + "step": 7486 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012121976699107743, + "loss": 0.6794, + "step": 7487 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012120199089798807, + "loss": 0.66, + "step": 7488 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012118421410337655, + "loss": 0.7479, + "step": 7489 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012116643660783098, + "loss": 0.7252, + "step": 7490 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012114865841193965, + "loss": 0.6558, + "step": 7491 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012113087951629074, + "loss": 0.6457, + "step": 7492 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012111309992147255, + "loss": 0.6852, + "step": 7493 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012109531962807332, + "loss": 0.6814, + "step": 7494 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012107753863668139, + "loss": 0.6915, + "step": 7495 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012105975694788509, + "loss": 0.7656, + "step": 7496 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012104197456227276, + "loss": 0.6886, + "step": 7497 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012102419148043279, + "loss": 0.6292, + "step": 7498 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012100640770295355, + "loss": 0.6899, + "step": 7499 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012098862323042349, + "loss": 0.7533, + "step": 7500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012097083806343103, + "loss": 0.7427, + "step": 7501 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012095305220256468, + "loss": 0.719, + "step": 7502 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012093526564841289, + "loss": 0.6344, + "step": 7503 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012091747840156418, + "loss": 0.717, + "step": 7504 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001208996904626071, + "loss": 0.6995, + "step": 7505 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012088190183213022, + "loss": 0.7219, + "step": 7506 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012086411251072207, + "loss": 0.6465, + "step": 7507 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012084632249897132, + "loss": 0.668, + "step": 7508 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012082853179746657, + "loss": 0.7249, + "step": 7509 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012081074040679645, + "loss": 0.6825, + "step": 7510 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012079294832754969, + "loss": 0.6769, + "step": 7511 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012077515556031491, + "loss": 0.7003, + "step": 7512 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001207573621056809, + "loss": 0.6583, + "step": 7513 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012073956796423635, + "loss": 0.7541, + "step": 7514 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012072177313657005, + "loss": 0.7148, + "step": 7515 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012070397762327079, + "loss": 0.6906, + "step": 7516 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012068618142492738, + "loss": 0.71, + "step": 7517 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012066838454212865, + "loss": 0.6283, + "step": 7518 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012065058697546345, + "loss": 0.7587, + "step": 7519 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012063278872552067, + "loss": 0.6742, + "step": 7520 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012061498979288916, + "loss": 0.6526, + "step": 7521 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012059719017815791, + "loss": 0.7222, + "step": 7522 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012057938988191585, + "loss": 0.739, + "step": 7523 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012056158890475194, + "loss": 0.7112, + "step": 7524 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012054378724725517, + "loss": 0.7168, + "step": 7525 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012052598491001455, + "loss": 0.6896, + "step": 7526 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012050818189361912, + "loss": 0.7197, + "step": 7527 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012049037819865792, + "loss": 0.7128, + "step": 7528 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012047257382572006, + "loss": 0.6697, + "step": 7529 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012045476877539466, + "loss": 0.7323, + "step": 7530 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012043696304827077, + "loss": 0.7338, + "step": 7531 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012041915664493761, + "loss": 0.7352, + "step": 7532 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012040134956598434, + "loss": 0.7213, + "step": 7533 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012038354181200011, + "loss": 0.7267, + "step": 7534 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001203657333835742, + "loss": 0.637, + "step": 7535 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012034792428129578, + "loss": 0.7033, + "step": 7536 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012033011450575416, + "loss": 0.7234, + "step": 7537 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012031230405753861, + "loss": 0.7052, + "step": 7538 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001202944929372384, + "loss": 0.737, + "step": 7539 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001202766811454429, + "loss": 0.7257, + "step": 7540 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012025886868274143, + "loss": 0.7081, + "step": 7541 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012024105554972341, + "loss": 0.7494, + "step": 7542 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012022324174697817, + "loss": 0.7436, + "step": 7543 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012020542727509518, + "loss": 0.667, + "step": 7544 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001201876121346638, + "loss": 0.6962, + "step": 7545 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012016979632627357, + "loss": 0.7083, + "step": 7546 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012015197985051394, + "loss": 0.6735, + "step": 7547 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012013416270797441, + "loss": 0.6353, + "step": 7548 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012011634489924455, + "loss": 0.6961, + "step": 7549 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012009852642491383, + "loss": 0.6262, + "step": 7550 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012008070728557186, + "loss": 0.6675, + "step": 7551 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012006288748180823, + "loss": 0.6885, + "step": 7552 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012004506701421256, + "loss": 0.6862, + "step": 7553 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001200272458833745, + "loss": 0.7156, + "step": 7554 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012000942408988368, + "loss": 0.7075, + "step": 7555 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011999160163432974, + "loss": 0.7104, + "step": 7556 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011997377851730248, + "loss": 0.8032, + "step": 7557 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011995595473939156, + "loss": 0.7003, + "step": 7558 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011993813030118674, + "loss": 0.6652, + "step": 7559 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011992030520327779, + "loss": 0.6602, + "step": 7560 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001199024794462545, + "loss": 0.6822, + "step": 7561 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011988465303070664, + "loss": 0.7403, + "step": 7562 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011986682595722412, + "loss": 0.7659, + "step": 7563 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011984899822639672, + "loss": 0.7002, + "step": 7564 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001198311698388144, + "loss": 0.7161, + "step": 7565 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011981334079506696, + "loss": 0.625, + "step": 7566 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011979551109574438, + "loss": 0.6413, + "step": 7567 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011977768074143656, + "loss": 0.7229, + "step": 7568 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011975984973273353, + "loss": 0.6349, + "step": 7569 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011974201807022525, + "loss": 0.6893, + "step": 7570 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011972418575450166, + "loss": 0.6892, + "step": 7571 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011970635278615288, + "loss": 0.7078, + "step": 7572 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011968851916576888, + "loss": 0.6723, + "step": 7573 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011967068489393978, + "loss": 0.7066, + "step": 7574 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011965284997125568, + "loss": 0.6943, + "step": 7575 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011963501439830669, + "loss": 0.7007, + "step": 7576 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011961717817568292, + "loss": 0.6943, + "step": 7577 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011959934130397452, + "loss": 0.6712, + "step": 7578 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011958150378377169, + "loss": 0.7057, + "step": 7579 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011956366561566463, + "loss": 0.6929, + "step": 7580 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011954582680024358, + "loss": 0.6912, + "step": 7581 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011952798733809875, + "loss": 0.6849, + "step": 7582 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011951014722982042, + "loss": 0.7083, + "step": 7583 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011949230647599888, + "loss": 0.6891, + "step": 7584 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011947446507722437, + "loss": 0.714, + "step": 7585 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011945662303408735, + "loss": 0.6835, + "step": 7586 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011943878034717807, + "loss": 0.7545, + "step": 7587 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011942093701708695, + "loss": 0.7181, + "step": 7588 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011940309304440433, + "loss": 0.7257, + "step": 7589 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011938524842972068, + "loss": 0.6653, + "step": 7590 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011936740317362636, + "loss": 0.7512, + "step": 7591 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011934955727671193, + "loss": 0.6608, + "step": 7592 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011933171073956781, + "loss": 0.7019, + "step": 7593 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011931386356278449, + "loss": 0.7572, + "step": 7594 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011929601574695248, + "loss": 0.6847, + "step": 7595 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011927816729266234, + "loss": 0.6849, + "step": 7596 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011926031820050468, + "loss": 0.6784, + "step": 7597 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011924246847107, + "loss": 0.7188, + "step": 7598 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011922461810494893, + "loss": 0.711, + "step": 7599 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011920676710273214, + "loss": 0.7657, + "step": 7600 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001191889154650102, + "loss": 0.6745, + "step": 7601 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011917106319237386, + "loss": 0.6611, + "step": 7602 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011915321028541375, + "loss": 0.6617, + "step": 7603 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011913535674472059, + "loss": 0.7173, + "step": 7604 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011911750257088511, + "loss": 0.7139, + "step": 7605 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001190996477644981, + "loss": 0.6724, + "step": 7606 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011908179232615027, + "loss": 0.6305, + "step": 7607 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011906393625643244, + "loss": 0.696, + "step": 7608 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011904607955593543, + "loss": 0.6795, + "step": 7609 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011902822222525007, + "loss": 0.659, + "step": 7610 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011901036426496723, + "loss": 0.7148, + "step": 7611 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011899250567567777, + "loss": 0.6202, + "step": 7612 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011897464645797258, + "loss": 0.6907, + "step": 7613 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011895678661244259, + "loss": 0.686, + "step": 7614 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011893892613967872, + "loss": 0.7012, + "step": 7615 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011892106504027196, + "loss": 0.6816, + "step": 7616 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011890320331481328, + "loss": 0.7496, + "step": 7617 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011888534096389367, + "loss": 0.6333, + "step": 7618 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011886747798810419, + "loss": 0.6314, + "step": 7619 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011884961438803581, + "loss": 0.6837, + "step": 7620 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011883175016427965, + "loss": 0.6795, + "step": 7621 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011881388531742679, + "loss": 0.6788, + "step": 7622 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011879601984806829, + "loss": 0.6636, + "step": 7623 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011877815375679535, + "loss": 0.7152, + "step": 7624 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011876028704419903, + "loss": 0.6818, + "step": 7625 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011874241971087057, + "loss": 0.6278, + "step": 7626 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011872455175740112, + "loss": 0.7818, + "step": 7627 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011870668318438187, + "loss": 0.7117, + "step": 7628 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011868881399240408, + "loss": 0.7327, + "step": 7629 + }, + { + "epoch": 1.77, + "learning_rate": 0.000118670944182059, + "loss": 0.6879, + "step": 7630 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011865307375393788, + "loss": 0.6786, + "step": 7631 + }, + { + "epoch": 1.77, + "learning_rate": 0.000118635202708632, + "loss": 0.7038, + "step": 7632 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011861733104673269, + "loss": 0.7041, + "step": 7633 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011859945876883132, + "loss": 0.6297, + "step": 7634 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011858158587551913, + "loss": 0.6628, + "step": 7635 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011856371236738757, + "loss": 0.7257, + "step": 7636 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011854583824502803, + "loss": 0.6992, + "step": 7637 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011852796350903188, + "loss": 0.7528, + "step": 7638 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011851008815999064, + "loss": 0.6716, + "step": 7639 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011849221219849562, + "loss": 0.6645, + "step": 7640 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011847433562513842, + "loss": 0.7471, + "step": 7641 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011845645844051045, + "loss": 0.7143, + "step": 7642 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011843858064520329, + "loss": 0.7137, + "step": 7643 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011842070223980841, + "loss": 0.729, + "step": 7644 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011840282322491744, + "loss": 0.6139, + "step": 7645 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011838494360112185, + "loss": 0.6664, + "step": 7646 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011836706336901331, + "loss": 0.734, + "step": 7647 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001183491825291834, + "loss": 0.7029, + "step": 7648 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011833130108222378, + "loss": 0.7023, + "step": 7649 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001183134190287261, + "loss": 0.6773, + "step": 7650 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011829553636928202, + "loss": 0.7142, + "step": 7651 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001182776531044832, + "loss": 0.7583, + "step": 7652 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011825976923492141, + "loss": 0.7222, + "step": 7653 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011824188476118835, + "loss": 0.6929, + "step": 7654 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011822399968387582, + "loss": 0.765, + "step": 7655 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011820611400357555, + "loss": 0.6642, + "step": 7656 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011818822772087932, + "loss": 0.6793, + "step": 7657 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011817034083637897, + "loss": 0.6872, + "step": 7658 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011815245335066634, + "loss": 0.656, + "step": 7659 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011813456526433327, + "loss": 0.6572, + "step": 7660 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011811667657797166, + "loss": 0.8045, + "step": 7661 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011809878729217337, + "loss": 0.6079, + "step": 7662 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011808089740753028, + "loss": 0.6959, + "step": 7663 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001180630069246344, + "loss": 0.818, + "step": 7664 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011804511584407763, + "loss": 0.6944, + "step": 7665 + }, + { + "epoch": 1.77, + "learning_rate": 0.000118027224166452, + "loss": 0.6983, + "step": 7666 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011800933189234943, + "loss": 0.6841, + "step": 7667 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011799143902236198, + "loss": 0.6799, + "step": 7668 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011797354555708164, + "loss": 0.6929, + "step": 7669 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011795565149710051, + "loss": 0.6787, + "step": 7670 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011793775684301063, + "loss": 0.6267, + "step": 7671 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001179198615954041, + "loss": 0.6971, + "step": 7672 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011790196575487303, + "loss": 0.6223, + "step": 7673 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011788406932200954, + "loss": 0.692, + "step": 7674 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011786617229740578, + "loss": 0.7, + "step": 7675 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011784827468165394, + "loss": 0.6503, + "step": 7676 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001178303764753462, + "loss": 0.6988, + "step": 7677 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011781247767907475, + "loss": 0.7673, + "step": 7678 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011779457829343184, + "loss": 0.7266, + "step": 7679 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011777667831900966, + "loss": 0.7317, + "step": 7680 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011775877775640056, + "loss": 0.6745, + "step": 7681 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011774087660619683, + "loss": 0.7039, + "step": 7682 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011772297486899069, + "loss": 0.7143, + "step": 7683 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011770507254537453, + "loss": 0.7768, + "step": 7684 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011768716963594064, + "loss": 0.6864, + "step": 7685 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011766926614128142, + "loss": 0.7074, + "step": 7686 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001176513620619893, + "loss": 0.645, + "step": 7687 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001176334573986566, + "loss": 0.6786, + "step": 7688 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011761555215187578, + "loss": 0.6776, + "step": 7689 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011759764632223926, + "loss": 0.6921, + "step": 7690 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011757973991033955, + "loss": 0.7796, + "step": 7691 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011756183291676904, + "loss": 0.5879, + "step": 7692 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011754392534212033, + "loss": 0.7188, + "step": 7693 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011752601718698588, + "loss": 0.7347, + "step": 7694 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011750810845195826, + "loss": 0.7456, + "step": 7695 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011749019913762997, + "loss": 0.6796, + "step": 7696 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011747228924459361, + "loss": 0.6655, + "step": 7697 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011745437877344186, + "loss": 0.639, + "step": 7698 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011743646772476719, + "loss": 0.8453, + "step": 7699 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011741855609916233, + "loss": 0.7034, + "step": 7700 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011740064389721989, + "loss": 0.676, + "step": 7701 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011738273111953257, + "loss": 0.7012, + "step": 7702 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011736481776669306, + "loss": 0.6998, + "step": 7703 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011734690383929403, + "loss": 0.6731, + "step": 7704 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011732898933792824, + "loss": 0.6968, + "step": 7705 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011731107426318844, + "loss": 0.6554, + "step": 7706 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011729315861566737, + "loss": 0.7303, + "step": 7707 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011727524239595784, + "loss": 0.7413, + "step": 7708 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011725732560465263, + "loss": 0.698, + "step": 7709 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001172394082423446, + "loss": 0.6967, + "step": 7710 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011722149030962657, + "loss": 0.6847, + "step": 7711 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001172035718070914, + "loss": 0.6499, + "step": 7712 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011718565273533197, + "loss": 0.7373, + "step": 7713 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011716773309494118, + "loss": 0.6831, + "step": 7714 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011714981288651194, + "loss": 0.6783, + "step": 7715 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001171318921106372, + "loss": 0.6303, + "step": 7716 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011711397076790991, + "loss": 0.7064, + "step": 7717 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011709604885892303, + "loss": 0.6247, + "step": 7718 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011707812638426957, + "loss": 0.6922, + "step": 7719 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011706020334454253, + "loss": 0.6592, + "step": 7720 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011704227974033495, + "loss": 0.6865, + "step": 7721 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011702435557223987, + "loss": 0.6719, + "step": 7722 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011700643084085037, + "loss": 0.7296, + "step": 7723 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011698850554675951, + "loss": 0.7008, + "step": 7724 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001169705796905604, + "loss": 0.6491, + "step": 7725 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011695265327284619, + "loss": 0.768, + "step": 7726 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011693472629421001, + "loss": 0.6943, + "step": 7727 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011691679875524499, + "loss": 0.7362, + "step": 7728 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011689887065654434, + "loss": 0.7125, + "step": 7729 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011688094199870127, + "loss": 0.6521, + "step": 7730 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011686301278230895, + "loss": 0.7146, + "step": 7731 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011684508300796066, + "loss": 0.7028, + "step": 7732 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011682715267624962, + "loss": 0.7456, + "step": 7733 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011680922178776911, + "loss": 0.631, + "step": 7734 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011679129034311244, + "loss": 0.686, + "step": 7735 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011677335834287286, + "loss": 0.6973, + "step": 7736 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011675542578764378, + "loss": 0.7038, + "step": 7737 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011673749267801848, + "loss": 0.7178, + "step": 7738 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011671955901459035, + "loss": 0.7205, + "step": 7739 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011670162479795277, + "loss": 0.6436, + "step": 7740 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011668369002869912, + "loss": 0.6639, + "step": 7741 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011666575470742283, + "loss": 0.6981, + "step": 7742 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011664781883471734, + "loss": 0.7171, + "step": 7743 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011662988241117612, + "loss": 0.7651, + "step": 7744 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001166119454373926, + "loss": 0.6655, + "step": 7745 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011659400791396033, + "loss": 0.7636, + "step": 7746 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011657606984147274, + "loss": 0.6727, + "step": 7747 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011655813122052341, + "loss": 0.6783, + "step": 7748 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001165401920517059, + "loss": 0.6918, + "step": 7749 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011652225233561374, + "loss": 0.7316, + "step": 7750 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011650431207284053, + "loss": 0.6967, + "step": 7751 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011648637126397988, + "loss": 0.6396, + "step": 7752 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011646842990962534, + "loss": 0.6997, + "step": 7753 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001164504880103706, + "loss": 0.7093, + "step": 7754 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011643254556680934, + "loss": 0.6436, + "step": 7755 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001164146025795352, + "loss": 0.6582, + "step": 7756 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011639665904914185, + "loss": 0.6544, + "step": 7757 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011637871497622303, + "loss": 0.6915, + "step": 7758 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011636077036137243, + "loss": 0.6955, + "step": 7759 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011634282520518383, + "loss": 0.6632, + "step": 7760 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011632487950825097, + "loss": 0.7227, + "step": 7761 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011630693327116768, + "loss": 0.6747, + "step": 7762 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011628898649452769, + "loss": 0.7135, + "step": 7763 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011627103917892479, + "loss": 0.7354, + "step": 7764 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011625309132495292, + "loss": 0.7318, + "step": 7765 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011623514293320585, + "loss": 0.7313, + "step": 7766 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001162171940042775, + "loss": 0.6599, + "step": 7767 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011619924453876172, + "loss": 0.7367, + "step": 7768 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011618129453725242, + "loss": 0.7315, + "step": 7769 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011616334400034348, + "loss": 0.7517, + "step": 7770 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011614539292862892, + "loss": 0.7187, + "step": 7771 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011612744132270267, + "loss": 0.721, + "step": 7772 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001161094891831587, + "loss": 0.7033, + "step": 7773 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011609153651059097, + "loss": 0.6384, + "step": 7774 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011607358330559355, + "loss": 0.7309, + "step": 7775 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011605562956876039, + "loss": 0.7204, + "step": 7776 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011603767530068561, + "loss": 0.6359, + "step": 7777 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011601972050196328, + "loss": 0.7002, + "step": 7778 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011600176517318741, + "loss": 0.7541, + "step": 7779 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011598380931495212, + "loss": 0.7869, + "step": 7780 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011596585292785153, + "loss": 0.6959, + "step": 7781 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011594789601247982, + "loss": 0.6999, + "step": 7782 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011592993856943111, + "loss": 0.7281, + "step": 7783 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011591198059929954, + "loss": 0.6686, + "step": 7784 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001158940221026793, + "loss": 0.6332, + "step": 7785 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011587606308016463, + "loss": 0.7178, + "step": 7786 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011585810353234969, + "loss": 0.6904, + "step": 7787 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001158401434598288, + "loss": 0.6355, + "step": 7788 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011582218286319619, + "loss": 0.7485, + "step": 7789 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011580422174304608, + "loss": 0.6629, + "step": 7790 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001157862600999728, + "loss": 0.6321, + "step": 7791 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011576829793457064, + "loss": 0.7074, + "step": 7792 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011575033524743393, + "loss": 0.6711, + "step": 7793 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011573237203915706, + "loss": 0.7039, + "step": 7794 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011571440831033431, + "loss": 0.6886, + "step": 7795 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001156964440615601, + "loss": 0.6379, + "step": 7796 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011567847929342881, + "loss": 0.7131, + "step": 7797 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011566051400653486, + "loss": 0.7066, + "step": 7798 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011564254820147266, + "loss": 0.6734, + "step": 7799 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011562458187883669, + "loss": 0.6896, + "step": 7800 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011560661503922139, + "loss": 0.6604, + "step": 7801 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011558864768322122, + "loss": 0.7185, + "step": 7802 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001155706798114307, + "loss": 0.7344, + "step": 7803 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011555271142444433, + "loss": 0.6675, + "step": 7804 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011553474252285666, + "loss": 0.7019, + "step": 7805 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001155167731072622, + "loss": 0.6483, + "step": 7806 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011549880317825556, + "loss": 0.6972, + "step": 7807 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011548083273643131, + "loss": 0.6415, + "step": 7808 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011546286178238403, + "loss": 0.6598, + "step": 7809 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011546286178238403, + "loss": 0.7156, + "step": 7810 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011544489031670833, + "loss": 0.6952, + "step": 7811 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011542691833999886, + "loss": 0.6969, + "step": 7812 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011540894585285027, + "loss": 0.6834, + "step": 7813 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011539097285585723, + "loss": 0.7135, + "step": 7814 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011537299934961438, + "loss": 0.695, + "step": 7815 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011535502533471647, + "loss": 0.674, + "step": 7816 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011533705081175821, + "loss": 0.6386, + "step": 7817 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011531907578133429, + "loss": 0.6756, + "step": 7818 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011530110024403952, + "loss": 0.6662, + "step": 7819 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011528312420046862, + "loss": 0.6895, + "step": 7820 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011526514765121641, + "loss": 0.7407, + "step": 7821 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011524717059687766, + "loss": 0.6744, + "step": 7822 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001152291930380472, + "loss": 0.683, + "step": 7823 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011521121497531989, + "loss": 0.7293, + "step": 7824 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011519323640929052, + "loss": 0.694, + "step": 7825 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011517525734055398, + "loss": 0.612, + "step": 7826 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011515727776970517, + "loss": 0.6578, + "step": 7827 + }, + { + "epoch": 1.81, + "learning_rate": 0.000115139297697339, + "loss": 0.6328, + "step": 7828 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011512131712405037, + "loss": 0.6578, + "step": 7829 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001151033360504342, + "loss": 0.6599, + "step": 7830 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011508535447708546, + "loss": 0.7038, + "step": 7831 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001150673724045991, + "loss": 0.7387, + "step": 7832 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011504938983357012, + "loss": 0.7388, + "step": 7833 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001150314067645935, + "loss": 0.7252, + "step": 7834 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011501342319826429, + "loss": 0.7216, + "step": 7835 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001149954391351775, + "loss": 0.7271, + "step": 7836 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011497745457592816, + "loss": 0.6767, + "step": 7837 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011495946952111136, + "loss": 0.6949, + "step": 7838 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011494148397132219, + "loss": 0.7096, + "step": 7839 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011492349792715571, + "loss": 0.7542, + "step": 7840 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011490551138920706, + "loss": 0.6362, + "step": 7841 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011488752435807142, + "loss": 0.6168, + "step": 7842 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011486953683434382, + "loss": 0.6883, + "step": 7843 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001148515488186195, + "loss": 0.688, + "step": 7844 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011483356031149366, + "loss": 0.6814, + "step": 7845 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011481557131356145, + "loss": 0.7788, + "step": 7846 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011479758182541809, + "loss": 0.7592, + "step": 7847 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011477959184765883, + "loss": 0.7502, + "step": 7848 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011476160138087889, + "loss": 0.6671, + "step": 7849 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011474361042567352, + "loss": 0.7419, + "step": 7850 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011472561898263805, + "loss": 0.7018, + "step": 7851 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011470762705236772, + "loss": 0.72, + "step": 7852 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001146896346354579, + "loss": 0.6757, + "step": 7853 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011467164173250385, + "loss": 0.7096, + "step": 7854 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011465364834410089, + "loss": 0.6537, + "step": 7855 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011463565447084445, + "loss": 0.6959, + "step": 7856 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001146176601133299, + "loss": 0.6752, + "step": 7857 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011459966527215263, + "loss": 0.6969, + "step": 7858 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011458166994790797, + "loss": 0.7338, + "step": 7859 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001145636741411914, + "loss": 0.7537, + "step": 7860 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011454567785259836, + "loss": 0.675, + "step": 7861 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011452768108272427, + "loss": 0.651, + "step": 7862 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011450968383216465, + "loss": 0.6974, + "step": 7863 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011449168610151497, + "loss": 0.6963, + "step": 7864 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011447368789137072, + "loss": 0.7085, + "step": 7865 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011445568920232737, + "loss": 0.698, + "step": 7866 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011443769003498053, + "loss": 0.6615, + "step": 7867 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001144196903899257, + "loss": 0.6748, + "step": 7868 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011440169026775851, + "loss": 0.6864, + "step": 7869 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011438368966907445, + "loss": 0.6554, + "step": 7870 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011436568859446917, + "loss": 0.6723, + "step": 7871 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011434768704453824, + "loss": 0.6695, + "step": 7872 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011432968501987735, + "loss": 0.7338, + "step": 7873 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001143116825210821, + "loss": 0.7373, + "step": 7874 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011429367954874819, + "loss": 0.6464, + "step": 7875 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011427567610347124, + "loss": 0.6657, + "step": 7876 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011425767218584696, + "loss": 0.7188, + "step": 7877 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011423966779647102, + "loss": 0.7334, + "step": 7878 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001142216629359392, + "loss": 0.7222, + "step": 7879 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011420365760484728, + "loss": 0.7057, + "step": 7880 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001141856518037909, + "loss": 0.6349, + "step": 7881 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011416764553336588, + "loss": 0.7533, + "step": 7882 + }, + { + "epoch": 1.83, + "learning_rate": 0.000114149638794168, + "loss": 0.691, + "step": 7883 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011413163158679305, + "loss": 0.6457, + "step": 7884 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011411362391183689, + "loss": 0.6974, + "step": 7885 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011409561576989529, + "loss": 0.6153, + "step": 7886 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011407760716156412, + "loss": 0.6299, + "step": 7887 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011405959808743926, + "loss": 0.6277, + "step": 7888 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011404158854811653, + "loss": 0.6306, + "step": 7889 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011402357854419193, + "loss": 0.7213, + "step": 7890 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011400556807626123, + "loss": 0.7057, + "step": 7891 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011398755714492044, + "loss": 0.7414, + "step": 7892 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011396954575076551, + "loss": 0.7095, + "step": 7893 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011395153389439233, + "loss": 0.6753, + "step": 7894 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011393352157639687, + "loss": 0.7281, + "step": 7895 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001139155087973752, + "loss": 0.7338, + "step": 7896 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011389749555792324, + "loss": 0.7149, + "step": 7897 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011387948185863701, + "loss": 0.6699, + "step": 7898 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011386146770011257, + "loss": 0.748, + "step": 7899 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011384345308294595, + "loss": 0.6411, + "step": 7900 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001138254380077332, + "loss": 0.6263, + "step": 7901 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011380742247507042, + "loss": 0.7095, + "step": 7902 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011378940648555368, + "loss": 0.7065, + "step": 7903 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011377139003977909, + "loss": 0.751, + "step": 7904 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011375337313834275, + "loss": 0.6513, + "step": 7905 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011373535578184082, + "loss": 0.7342, + "step": 7906 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011371733797086948, + "loss": 0.6911, + "step": 7907 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011369931970602483, + "loss": 0.6741, + "step": 7908 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011368130098790308, + "loss": 0.6671, + "step": 7909 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011366328181710044, + "loss": 0.6577, + "step": 7910 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011364526219421309, + "loss": 0.6549, + "step": 7911 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011362724211983731, + "loss": 0.6594, + "step": 7912 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011360922159456928, + "loss": 0.6648, + "step": 7913 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011359120061900527, + "loss": 0.7032, + "step": 7914 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011357317919374157, + "loss": 0.6511, + "step": 7915 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011355515731937445, + "loss": 0.6806, + "step": 7916 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011353713499650023, + "loss": 0.7213, + "step": 7917 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011351911222571518, + "loss": 0.6776, + "step": 7918 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011350108900761569, + "loss": 0.6635, + "step": 7919 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011348306534279805, + "loss": 0.751, + "step": 7920 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011346504123185866, + "loss": 0.7389, + "step": 7921 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011344701667539385, + "loss": 0.7146, + "step": 7922 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011342899167400005, + "loss": 0.7699, + "step": 7923 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011341096622827365, + "loss": 0.7455, + "step": 7924 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011339294033881105, + "loss": 0.657, + "step": 7925 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011337491400620872, + "loss": 0.7049, + "step": 7926 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011335688723106306, + "loss": 0.6903, + "step": 7927 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011333886001397058, + "loss": 0.7545, + "step": 7928 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011332083235552773, + "loss": 0.7212, + "step": 7929 + }, + { + "epoch": 1.84, + "learning_rate": 0.000113302804256331, + "loss": 0.7268, + "step": 7930 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011328477571697691, + "loss": 0.7572, + "step": 7931 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011326674673806195, + "loss": 0.7369, + "step": 7932 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001132487173201827, + "loss": 0.6515, + "step": 7933 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011323068746393568, + "loss": 0.6623, + "step": 7934 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011321265716991744, + "loss": 0.701, + "step": 7935 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011319462643872461, + "loss": 0.7228, + "step": 7936 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011317659527095375, + "loss": 0.6903, + "step": 7937 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011315856366720147, + "loss": 0.5927, + "step": 7938 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011314053162806436, + "loss": 0.6781, + "step": 7939 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001131224991541391, + "loss": 0.7125, + "step": 7940 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011310446624602233, + "loss": 0.6706, + "step": 7941 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011308643290431074, + "loss": 0.6955, + "step": 7942 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011306839912960096, + "loss": 0.6442, + "step": 7943 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011305036492248972, + "loss": 0.6938, + "step": 7944 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011303233028357367, + "loss": 0.7725, + "step": 7945 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011301429521344962, + "loss": 0.6808, + "step": 7946 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011299625971271424, + "loss": 0.7503, + "step": 7947 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011297822378196431, + "loss": 0.6804, + "step": 7948 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011296018742179662, + "loss": 0.729, + "step": 7949 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011294215063280787, + "loss": 0.6756, + "step": 7950 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001129241134155949, + "loss": 0.6743, + "step": 7951 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011290607577075453, + "loss": 0.6854, + "step": 7952 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011288803769888359, + "loss": 0.7063, + "step": 7953 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001128699992005789, + "loss": 0.648, + "step": 7954 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001128519602764373, + "loss": 0.645, + "step": 7955 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011283392092705562, + "loss": 0.6509, + "step": 7956 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011281588115303081, + "loss": 0.6519, + "step": 7957 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011279784095495973, + "loss": 0.6709, + "step": 7958 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011277980033343932, + "loss": 0.6868, + "step": 7959 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011276175928906641, + "loss": 0.6717, + "step": 7960 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011274371782243802, + "loss": 0.6518, + "step": 7961 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011272567593415105, + "loss": 0.6477, + "step": 7962 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011270763362480248, + "loss": 0.6683, + "step": 7963 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011268959089498929, + "loss": 0.6861, + "step": 7964 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011267154774530851, + "loss": 0.7344, + "step": 7965 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011265350417635706, + "loss": 0.709, + "step": 7966 + }, + { + "epoch": 1.84, + "learning_rate": 0.000112635460188732, + "loss": 0.7061, + "step": 7967 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011261741578303033, + "loss": 0.7484, + "step": 7968 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011259937095984918, + "loss": 0.7388, + "step": 7969 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011258132571978555, + "loss": 0.7097, + "step": 7970 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001125632800634365, + "loss": 0.6863, + "step": 7971 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011254523399139913, + "loss": 0.6253, + "step": 7972 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011252718750427053, + "loss": 0.7508, + "step": 7973 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011250914060264785, + "loss": 0.6856, + "step": 7974 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011249109328712823, + "loss": 0.6813, + "step": 7975 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011247304555830876, + "loss": 0.7235, + "step": 7976 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001124549974167866, + "loss": 0.6372, + "step": 7977 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011243694886315894, + "loss": 0.7022, + "step": 7978 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011241889989802293, + "loss": 0.6886, + "step": 7979 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001124008505219758, + "loss": 0.6607, + "step": 7980 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001123828007356148, + "loss": 0.7181, + "step": 7981 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011236475053953708, + "loss": 0.6705, + "step": 7982 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011234669993433988, + "loss": 0.7424, + "step": 7983 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001123286489206205, + "loss": 0.6838, + "step": 7984 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011231059749897615, + "loss": 0.7195, + "step": 7985 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011229254567000415, + "loss": 0.6858, + "step": 7986 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011227449343430179, + "loss": 0.6793, + "step": 7987 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011225644079246633, + "loss": 0.6543, + "step": 7988 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011223838774509514, + "loss": 0.6617, + "step": 7989 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011222033429278552, + "loss": 0.6481, + "step": 7990 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011220228043613484, + "loss": 0.7147, + "step": 7991 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011218422617574043, + "loss": 0.6144, + "step": 7992 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011216617151219968, + "loss": 0.7352, + "step": 7993 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011214811644610994, + "loss": 0.7167, + "step": 7994 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011213006097806866, + "loss": 0.7488, + "step": 7995 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011211200510867321, + "loss": 0.6556, + "step": 7996 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011209394883852108, + "loss": 0.8032, + "step": 7997 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011207589216820964, + "loss": 0.6996, + "step": 7998 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011205783509833634, + "loss": 0.6421, + "step": 7999 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011203977762949869, + "loss": 0.6838, + "step": 8000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011202171976229412, + "loss": 0.7191, + "step": 8001 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011200366149732015, + "loss": 0.7348, + "step": 8002 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001119856028351743, + "loss": 0.6394, + "step": 8003 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011196754377645406, + "loss": 0.6458, + "step": 8004 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011194948432175696, + "loss": 0.6823, + "step": 8005 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011193142447168055, + "loss": 0.6782, + "step": 8006 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011191336422682237, + "loss": 0.6861, + "step": 8007 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011189530358778005, + "loss": 0.7068, + "step": 8008 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011187724255515108, + "loss": 0.7278, + "step": 8009 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011185918112953313, + "loss": 0.6315, + "step": 8010 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011184111931152377, + "loss": 0.6444, + "step": 8011 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011182305710172063, + "loss": 0.75, + "step": 8012 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011180499450072137, + "loss": 0.7208, + "step": 8013 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011178693150912361, + "loss": 0.6716, + "step": 8014 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011176886812752499, + "loss": 0.6461, + "step": 8015 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011175080435652323, + "loss": 0.7052, + "step": 8016 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011173274019671601, + "loss": 0.7156, + "step": 8017 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011171467564870098, + "loss": 0.655, + "step": 8018 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001116966107130759, + "loss": 0.6815, + "step": 8019 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001116785453904385, + "loss": 0.7312, + "step": 8020 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011166047968138646, + "loss": 0.6449, + "step": 8021 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011164241358651759, + "loss": 0.6354, + "step": 8022 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011162434710642963, + "loss": 0.6635, + "step": 8023 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011160628024172037, + "loss": 0.6748, + "step": 8024 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011158821299298759, + "loss": 0.6417, + "step": 8025 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011157014536082906, + "loss": 0.6854, + "step": 8026 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011155207734584263, + "loss": 0.671, + "step": 8027 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011153400894862613, + "loss": 0.6521, + "step": 8028 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011151594016977738, + "loss": 0.6795, + "step": 8029 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011149787100989424, + "loss": 0.6798, + "step": 8030 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011147980146957458, + "loss": 0.7111, + "step": 8031 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011146173154941625, + "loss": 0.7107, + "step": 8032 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011144366125001717, + "loss": 0.6777, + "step": 8033 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011142559057197523, + "loss": 0.7204, + "step": 8034 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011140751951588837, + "loss": 0.654, + "step": 8035 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011138944808235448, + "loss": 0.6246, + "step": 8036 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001113713762719715, + "loss": 0.677, + "step": 8037 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001113533040853374, + "loss": 0.7267, + "step": 8038 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011133523152305017, + "loss": 0.7078, + "step": 8039 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011131715858570771, + "loss": 0.7073, + "step": 8040 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011129908527390809, + "loss": 0.72, + "step": 8041 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011128101158824926, + "loss": 0.747, + "step": 8042 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011126293752932926, + "loss": 0.7441, + "step": 8043 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011124486309774611, + "loss": 0.7443, + "step": 8044 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011122678829409787, + "loss": 0.7056, + "step": 8045 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011120871311898254, + "loss": 0.6835, + "step": 8046 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011119063757299822, + "loss": 0.7206, + "step": 8047 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011117256165674299, + "loss": 0.7305, + "step": 8048 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001111544853708149, + "loss": 0.7182, + "step": 8049 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011113640871581212, + "loss": 0.6087, + "step": 8050 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001111183316923327, + "loss": 0.7595, + "step": 8051 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011110025430097474, + "loss": 0.7166, + "step": 8052 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011108217654233649, + "loss": 0.7172, + "step": 8053 + }, + { + "epoch": 1.86, + "learning_rate": 0.000111064098417016, + "loss": 0.6104, + "step": 8054 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011104601992561147, + "loss": 0.7042, + "step": 8055 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011102794106872106, + "loss": 0.7304, + "step": 8056 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011100986184694293, + "loss": 0.6527, + "step": 8057 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011099178226087535, + "loss": 0.6952, + "step": 8058 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011097370231111646, + "loss": 0.6712, + "step": 8059 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011095562199826452, + "loss": 0.6508, + "step": 8060 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011093754132291776, + "loss": 0.61, + "step": 8061 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011091946028567442, + "loss": 0.6699, + "step": 8062 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001109013788871327, + "loss": 0.6657, + "step": 8063 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011088329712789096, + "loss": 0.6796, + "step": 8064 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011086521500854745, + "loss": 0.6725, + "step": 8065 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011084713252970049, + "loss": 0.6317, + "step": 8066 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011082904969194832, + "loss": 0.6677, + "step": 8067 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001108109664958893, + "loss": 0.6819, + "step": 8068 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011079288294212171, + "loss": 0.6325, + "step": 8069 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011077479903124394, + "loss": 0.7208, + "step": 8070 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011075671476385438, + "loss": 0.6648, + "step": 8071 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011073863014055132, + "loss": 0.7212, + "step": 8072 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011072054516193317, + "loss": 0.7171, + "step": 8073 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011070245982859828, + "loss": 0.6686, + "step": 8074 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001106843741411451, + "loss": 0.7038, + "step": 8075 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011066628810017206, + "loss": 0.7117, + "step": 8076 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001106482017062775, + "loss": 0.7776, + "step": 8077 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011063011496005992, + "loss": 0.7477, + "step": 8078 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011061202786211773, + "loss": 0.7091, + "step": 8079 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001105939404130494, + "loss": 0.7028, + "step": 8080 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001105758526134534, + "loss": 0.7024, + "step": 8081 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011055776446392825, + "loss": 0.7235, + "step": 8082 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001105396759650724, + "loss": 0.6343, + "step": 8083 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011052158711748434, + "loss": 0.7077, + "step": 8084 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011050349792176263, + "loss": 0.6717, + "step": 8085 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011048540837850572, + "loss": 0.7239, + "step": 8086 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011046731848831226, + "loss": 0.6768, + "step": 8087 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011044922825178074, + "loss": 0.7287, + "step": 8088 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011043113766950972, + "loss": 0.7294, + "step": 8089 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011041304674209778, + "loss": 0.6859, + "step": 8090 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011039495547014349, + "loss": 0.691, + "step": 8091 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011037686385424548, + "loss": 0.6549, + "step": 8092 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011035877189500232, + "loss": 0.6459, + "step": 8093 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011034067959301266, + "loss": 0.6599, + "step": 8094 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001103225869488751, + "loss": 0.6773, + "step": 8095 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011030449396318831, + "loss": 0.6685, + "step": 8096 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001102864006365509, + "loss": 0.7301, + "step": 8097 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011026830696956164, + "loss": 0.7016, + "step": 8098 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011025021296281907, + "loss": 0.7345, + "step": 8099 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011023211861692196, + "loss": 0.681, + "step": 8100 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011021402393246897, + "loss": 0.624, + "step": 8101 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011019592891005883, + "loss": 0.6248, + "step": 8102 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011017783355029026, + "loss": 0.6593, + "step": 8103 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011015973785376199, + "loss": 0.6586, + "step": 8104 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011014164182107274, + "loss": 0.6521, + "step": 8105 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011012354545282129, + "loss": 0.6284, + "step": 8106 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001101054487496064, + "loss": 0.7214, + "step": 8107 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011008735171202684, + "loss": 0.7505, + "step": 8108 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011006925434068139, + "loss": 0.7861, + "step": 8109 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011005115663616887, + "loss": 0.6824, + "step": 8110 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011003305859908806, + "loss": 0.7773, + "step": 8111 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011001496023003781, + "loss": 0.6375, + "step": 8112 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010999686152961693, + "loss": 0.6496, + "step": 8113 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010997876249842427, + "loss": 0.659, + "step": 8114 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010996066313705869, + "loss": 0.7126, + "step": 8115 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010994256344611901, + "loss": 0.6956, + "step": 8116 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010992446342620419, + "loss": 0.6905, + "step": 8117 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010990636307791305, + "loss": 0.6988, + "step": 8118 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010988826240184446, + "loss": 0.7258, + "step": 8119 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010987016139859743, + "loss": 0.6691, + "step": 8120 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010985206006877078, + "loss": 0.7577, + "step": 8121 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010983395841296348, + "loss": 0.7424, + "step": 8122 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010981585643177448, + "loss": 0.7032, + "step": 8123 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001097977541258027, + "loss": 0.6765, + "step": 8124 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010977965149564712, + "loss": 0.6232, + "step": 8125 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010976154854190672, + "loss": 0.6616, + "step": 8126 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010974344526518045, + "loss": 0.686, + "step": 8127 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010972534166606734, + "loss": 0.785, + "step": 8128 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010970723774516638, + "loss": 0.7319, + "step": 8129 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001096891335030766, + "loss": 0.6912, + "step": 8130 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010967102894039698, + "loss": 0.6434, + "step": 8131 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010965292405772658, + "loss": 0.7041, + "step": 8132 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010963481885566447, + "loss": 0.6893, + "step": 8133 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001096167133348097, + "loss": 0.6877, + "step": 8134 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010959860749576131, + "loss": 0.6306, + "step": 8135 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010958050133911836, + "loss": 0.6877, + "step": 8136 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010956239486548003, + "loss": 0.6773, + "step": 8137 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010954428807544533, + "loss": 0.6534, + "step": 8138 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010952618096961341, + "loss": 0.6954, + "step": 8139 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010950807354858338, + "loss": 0.6242, + "step": 8140 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010948996581295436, + "loss": 0.702, + "step": 8141 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010947185776332552, + "loss": 0.7417, + "step": 8142 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010945374940029598, + "loss": 0.6417, + "step": 8143 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010943564072446492, + "loss": 0.6999, + "step": 8144 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010941753173643152, + "loss": 0.6149, + "step": 8145 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010939942243679495, + "loss": 0.6996, + "step": 8146 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010938131282615436, + "loss": 0.6366, + "step": 8147 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010936320290510905, + "loss": 0.691, + "step": 8148 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010934509267425814, + "loss": 0.6847, + "step": 8149 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001093269821342009, + "loss": 0.7271, + "step": 8150 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010930887128553658, + "loss": 0.6538, + "step": 8151 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010929076012886437, + "loss": 0.6708, + "step": 8152 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010927264866478353, + "loss": 0.7553, + "step": 8153 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010925453689389338, + "loss": 0.675, + "step": 8154 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010923642481679315, + "loss": 0.7143, + "step": 8155 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010921831243408218, + "loss": 0.7266, + "step": 8156 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010920019974635967, + "loss": 0.702, + "step": 8157 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010918208675422496, + "loss": 0.6959, + "step": 8158 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010916397345827742, + "loss": 0.6855, + "step": 8159 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010914585985911632, + "loss": 0.6964, + "step": 8160 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010912774595734101, + "loss": 0.6542, + "step": 8161 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010910963175355087, + "loss": 0.745, + "step": 8162 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010909151724834519, + "loss": 0.6694, + "step": 8163 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010907340244232331, + "loss": 0.7813, + "step": 8164 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010905528733608473, + "loss": 0.6923, + "step": 8165 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010903717193022873, + "loss": 0.637, + "step": 8166 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010901905622535477, + "loss": 0.6631, + "step": 8167 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001090009402220622, + "loss": 0.6704, + "step": 8168 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010898282392095048, + "loss": 0.6899, + "step": 8169 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010896470732261897, + "loss": 0.7091, + "step": 8170 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010894659042766715, + "loss": 0.7052, + "step": 8171 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010892847323669451, + "loss": 0.6683, + "step": 8172 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010891035575030039, + "loss": 0.6978, + "step": 8173 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010889223796908437, + "loss": 0.7047, + "step": 8174 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010887411989364585, + "loss": 0.7094, + "step": 8175 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010885600152458428, + "loss": 0.7644, + "step": 8176 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001088378828624993, + "loss": 0.6659, + "step": 8177 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010881976390799025, + "loss": 0.7085, + "step": 8178 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010880164466165674, + "loss": 0.6425, + "step": 8179 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010878352512409824, + "loss": 0.6973, + "step": 8180 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010876540529591431, + "loss": 0.647, + "step": 8181 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010874728517770447, + "loss": 0.6848, + "step": 8182 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010872916477006834, + "loss": 0.6609, + "step": 8183 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010871104407360537, + "loss": 0.6871, + "step": 8184 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010869292308891521, + "loss": 0.7203, + "step": 8185 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001086748018165974, + "loss": 0.7136, + "step": 8186 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010865668025725151, + "loss": 0.711, + "step": 8187 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010863855841147723, + "loss": 0.6813, + "step": 8188 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010862043627987408, + "loss": 0.7102, + "step": 8189 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010860231386304172, + "loss": 0.7177, + "step": 8190 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010858419116157975, + "loss": 0.6764, + "step": 8191 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010856606817608781, + "loss": 0.6129, + "step": 8192 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010854794490716555, + "loss": 0.6558, + "step": 8193 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010852982135541266, + "loss": 0.672, + "step": 8194 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010851169752142874, + "loss": 0.6675, + "step": 8195 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010849357340581349, + "loss": 0.6947, + "step": 8196 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010847544900916665, + "loss": 0.7192, + "step": 8197 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010845732433208779, + "loss": 0.6242, + "step": 8198 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010843919937517672, + "loss": 0.6871, + "step": 8199 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010842107413903313, + "loss": 0.7309, + "step": 8200 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010840294862425669, + "loss": 0.6585, + "step": 8201 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010838482283144718, + "loss": 0.7555, + "step": 8202 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001083666967612043, + "loss": 0.6997, + "step": 8203 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010834857041412782, + "loss": 0.6268, + "step": 8204 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010833044379081752, + "loss": 0.7279, + "step": 8205 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010831231689187311, + "loss": 0.7541, + "step": 8206 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010829418971789442, + "loss": 0.6547, + "step": 8207 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010827606226948119, + "loss": 0.6713, + "step": 8208 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010825793454723325, + "loss": 0.6687, + "step": 8209 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010823980655175037, + "loss": 0.6374, + "step": 8210 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010822167828363236, + "loss": 0.7136, + "step": 8211 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010820354974347909, + "loss": 0.6852, + "step": 8212 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010818542093189032, + "loss": 0.6554, + "step": 8213 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010816729184946595, + "loss": 0.6693, + "step": 8214 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010814916249680578, + "loss": 0.7064, + "step": 8215 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001081310328745097, + "loss": 0.7005, + "step": 8216 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010811290298317755, + "loss": 0.5933, + "step": 8217 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010809477282340923, + "loss": 0.686, + "step": 8218 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001080766423958046, + "loss": 0.6579, + "step": 8219 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010805851170096354, + "loss": 0.7669, + "step": 8220 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010804038073948599, + "loss": 0.6395, + "step": 8221 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010802224951197184, + "loss": 0.6478, + "step": 8222 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010800411801902102, + "loss": 0.6647, + "step": 8223 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010798598626123341, + "loss": 0.6384, + "step": 8224 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010796785423920901, + "loss": 0.7805, + "step": 8225 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001079497219535477, + "loss": 0.6789, + "step": 8226 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001079315894048495, + "loss": 0.6212, + "step": 8227 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010791345659371433, + "loss": 0.7083, + "step": 8228 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010789532352074216, + "loss": 0.6734, + "step": 8229 + }, + { + "epoch": 1.91, + "learning_rate": 0.000107877190186533, + "loss": 0.6394, + "step": 8230 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010785905659168678, + "loss": 0.6886, + "step": 8231 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010784092273680358, + "loss": 0.6542, + "step": 8232 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010782278862248331, + "loss": 0.701, + "step": 8233 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010780465424932606, + "loss": 0.6623, + "step": 8234 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010778651961793183, + "loss": 0.7139, + "step": 8235 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010776838472890065, + "loss": 0.7043, + "step": 8236 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010775024958283251, + "loss": 0.6197, + "step": 8237 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010773211418032754, + "loss": 0.6527, + "step": 8238 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010771397852198575, + "loss": 0.7472, + "step": 8239 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010769584260840722, + "loss": 0.722, + "step": 8240 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010767770644019202, + "loss": 0.6963, + "step": 8241 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010765957001794018, + "loss": 0.5639, + "step": 8242 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010764143334225187, + "loss": 0.7328, + "step": 8243 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010762329641372716, + "loss": 0.6689, + "step": 8244 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010760515923296617, + "loss": 0.6415, + "step": 8245 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010758702180056897, + "loss": 0.7079, + "step": 8246 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010756888411713575, + "loss": 0.6447, + "step": 8247 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010755074618326655, + "loss": 0.706, + "step": 8248 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010753260799956157, + "loss": 0.6274, + "step": 8249 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010751446956662097, + "loss": 0.6744, + "step": 8250 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001074963308850449, + "loss": 0.6736, + "step": 8251 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010747819195543351, + "loss": 0.7157, + "step": 8252 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010746005277838697, + "loss": 0.6794, + "step": 8253 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010744191335450545, + "loss": 0.6904, + "step": 8254 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010742377368438914, + "loss": 0.7314, + "step": 8255 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010740563376863828, + "loss": 0.6681, + "step": 8256 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010738749360785307, + "loss": 0.7281, + "step": 8257 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010736935320263371, + "loss": 0.7127, + "step": 8258 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010735121255358036, + "loss": 0.7043, + "step": 8259 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010733307166129331, + "loss": 0.6628, + "step": 8260 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010731493052637283, + "loss": 0.6817, + "step": 8261 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001072967891494191, + "loss": 0.6616, + "step": 8262 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010727864753103245, + "loss": 0.7465, + "step": 8263 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010726050567181304, + "loss": 0.7186, + "step": 8264 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010724236357236121, + "loss": 0.6351, + "step": 8265 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010722422123327722, + "loss": 0.6789, + "step": 8266 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010720607865516137, + "loss": 0.8318, + "step": 8267 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010718793583861395, + "loss": 0.6163, + "step": 8268 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010716979278423522, + "loss": 0.688, + "step": 8269 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010715164949262555, + "loss": 0.7363, + "step": 8270 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010713350596438521, + "loss": 0.7195, + "step": 8271 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010711536220011455, + "loss": 0.7268, + "step": 8272 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010709721820041392, + "loss": 0.7369, + "step": 8273 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010707907396588361, + "loss": 0.7047, + "step": 8274 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010706092949712403, + "loss": 0.6907, + "step": 8275 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010704278479473546, + "loss": 0.684, + "step": 8276 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010702463985931832, + "loss": 0.657, + "step": 8277 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010700649469147299, + "loss": 0.6521, + "step": 8278 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010698834929179982, + "loss": 0.6919, + "step": 8279 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010697020366089921, + "loss": 0.6413, + "step": 8280 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010695205779937155, + "loss": 0.7237, + "step": 8281 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001069339117078172, + "loss": 0.7021, + "step": 8282 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010691576538683665, + "loss": 0.7535, + "step": 8283 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010689761883703029, + "loss": 0.8037, + "step": 8284 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010687947205899853, + "loss": 0.6779, + "step": 8285 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010686132505334179, + "loss": 0.6649, + "step": 8286 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010684317782066053, + "loss": 0.7504, + "step": 8287 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010682503036155518, + "loss": 0.7088, + "step": 8288 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010680688267662627, + "loss": 0.6331, + "step": 8289 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010678873476647415, + "loss": 0.7262, + "step": 8290 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010677058663169936, + "loss": 0.6043, + "step": 8291 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010675243827290238, + "loss": 0.6974, + "step": 8292 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010673428969068364, + "loss": 0.7201, + "step": 8293 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001067161408856437, + "loss": 0.7872, + "step": 8294 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010669799185838301, + "loss": 0.6619, + "step": 8295 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001066798426095021, + "loss": 0.6456, + "step": 8296 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010666169313960148, + "loss": 0.6769, + "step": 8297 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010664354344928167, + "loss": 0.6285, + "step": 8298 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010662539353914316, + "loss": 0.6802, + "step": 8299 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010660724340978661, + "loss": 0.6346, + "step": 8300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010658909306181241, + "loss": 0.6704, + "step": 8301 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010657094249582122, + "loss": 0.6677, + "step": 8302 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010655279171241354, + "loss": 0.7321, + "step": 8303 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010653464071218998, + "loss": 0.7376, + "step": 8304 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010651648949575106, + "loss": 0.6986, + "step": 8305 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010649833806369738, + "loss": 0.7201, + "step": 8306 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010648018641662955, + "loss": 0.6663, + "step": 8307 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010646203455514814, + "loss": 0.7277, + "step": 8308 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010644388247985377, + "loss": 0.697, + "step": 8309 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010642573019134703, + "loss": 0.7155, + "step": 8310 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010640757769022854, + "loss": 0.7266, + "step": 8311 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001063894249770989, + "loss": 0.7297, + "step": 8312 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010637127205255877, + "loss": 0.7126, + "step": 8313 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010635311891720878, + "loss": 0.6257, + "step": 8314 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010633496557164959, + "loss": 0.6759, + "step": 8315 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010631681201648181, + "loss": 0.6845, + "step": 8316 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010629865825230611, + "loss": 0.7502, + "step": 8317 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010628050427972318, + "loss": 0.6321, + "step": 8318 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010626235009933367, + "loss": 0.6941, + "step": 8319 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010624419571173825, + "loss": 0.6794, + "step": 8320 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010622604111753762, + "loss": 0.6754, + "step": 8321 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010620788631733246, + "loss": 0.6853, + "step": 8322 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010618973131172347, + "loss": 0.6918, + "step": 8323 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010617157610131136, + "loss": 0.6938, + "step": 8324 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010615342068669685, + "loss": 0.6832, + "step": 8325 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010613526506848063, + "loss": 0.6364, + "step": 8326 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010611710924726346, + "loss": 0.7301, + "step": 8327 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010609895322364605, + "loss": 0.7222, + "step": 8328 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010608079699822915, + "loss": 0.682, + "step": 8329 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010606264057161348, + "loss": 0.6116, + "step": 8330 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010604448394439983, + "loss": 0.6881, + "step": 8331 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010602632711718892, + "loss": 0.7192, + "step": 8332 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010600817009058154, + "loss": 0.6394, + "step": 8333 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010599001286517847, + "loss": 0.691, + "step": 8334 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010597185544158047, + "loss": 0.7509, + "step": 8335 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010595369782038831, + "loss": 0.6598, + "step": 8336 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010593554000220285, + "loss": 0.6843, + "step": 8337 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010591738198762478, + "loss": 0.6958, + "step": 8338 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010589922377725498, + "loss": 0.6656, + "step": 8339 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010588106537169425, + "loss": 0.7149, + "step": 8340 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001058629067715434, + "loss": 0.6553, + "step": 8341 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001058447479774033, + "loss": 0.7245, + "step": 8342 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010582658898987468, + "loss": 0.7101, + "step": 8343 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001058084298095584, + "loss": 0.6668, + "step": 8344 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001057902704370554, + "loss": 0.7407, + "step": 8345 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010577211087296644, + "loss": 0.6894, + "step": 8346 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010575395111789241, + "loss": 0.6628, + "step": 8347 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010573579117243417, + "loss": 0.6668, + "step": 8348 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010571763103719254, + "loss": 0.6541, + "step": 8349 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010569947071276847, + "loss": 0.7075, + "step": 8350 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001056813101997628, + "loss": 0.6501, + "step": 8351 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010566314949877643, + "loss": 0.7176, + "step": 8352 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010564498861041026, + "loss": 0.7601, + "step": 8353 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010562682753526513, + "loss": 0.6389, + "step": 8354 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010560866627394202, + "loss": 0.6275, + "step": 8355 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001055905048270418, + "loss": 0.7173, + "step": 8356 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001055723431951654, + "loss": 0.6776, + "step": 8357 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001055541813789138, + "loss": 0.6913, + "step": 8358 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010553601937888782, + "loss": 0.7603, + "step": 8359 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010551785719568848, + "loss": 0.6656, + "step": 8360 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010549969482991664, + "loss": 0.6786, + "step": 8361 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010548153228217335, + "loss": 0.6842, + "step": 8362 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010546336955305952, + "loss": 0.7066, + "step": 8363 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010544520664317613, + "loss": 0.6807, + "step": 8364 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010542704355312407, + "loss": 0.651, + "step": 8365 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010540888028350438, + "loss": 0.6901, + "step": 8366 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010539071683491804, + "loss": 0.6534, + "step": 8367 + }, + { + "epoch": 1.94, + "learning_rate": 0.000105372553207966, + "loss": 0.6788, + "step": 8368 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001053543894032493, + "loss": 0.7333, + "step": 8369 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010533622542136889, + "loss": 0.7008, + "step": 8370 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001053180612629258, + "loss": 0.6887, + "step": 8371 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010529989692852098, + "loss": 0.686, + "step": 8372 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010528173241875552, + "loss": 0.7134, + "step": 8373 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010526356773423043, + "loss": 0.7819, + "step": 8374 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001052454028755467, + "loss": 0.7253, + "step": 8375 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010522723784330537, + "loss": 0.6562, + "step": 8376 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010520907263810746, + "loss": 0.6793, + "step": 8377 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010519090726055402, + "loss": 0.6196, + "step": 8378 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010517274171124616, + "loss": 0.6652, + "step": 8379 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010515457599078488, + "loss": 0.5998, + "step": 8380 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010513641009977124, + "loss": 0.7147, + "step": 8381 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010511824403880629, + "loss": 0.6939, + "step": 8382 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010510007780849112, + "loss": 0.6818, + "step": 8383 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001050819114094268, + "loss": 0.6993, + "step": 8384 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010506374484221446, + "loss": 0.5839, + "step": 8385 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010504557810745512, + "loss": 0.7149, + "step": 8386 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010502741120574991, + "loss": 0.6712, + "step": 8387 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010500924413769988, + "loss": 0.6758, + "step": 8388 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010499107690390617, + "loss": 0.7378, + "step": 8389 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010497290950496994, + "loss": 0.6985, + "step": 8390 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010495474194149224, + "loss": 0.6228, + "step": 8391 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010493657421407418, + "loss": 0.7124, + "step": 8392 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010491840632331695, + "loss": 0.6743, + "step": 8393 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010490023826982163, + "loss": 0.6738, + "step": 8394 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010488207005418935, + "loss": 0.6872, + "step": 8395 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001048639016770213, + "loss": 0.7065, + "step": 8396 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010484573313891857, + "loss": 0.7599, + "step": 8397 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010482756444048237, + "loss": 0.6753, + "step": 8398 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010480939558231379, + "loss": 0.6723, + "step": 8399 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010479122656501406, + "loss": 0.6823, + "step": 8400 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010477305738918433, + "loss": 0.6904, + "step": 8401 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010475488805542575, + "loss": 0.6758, + "step": 8402 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010473671856433951, + "loss": 0.6922, + "step": 8403 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001047185489165268, + "loss": 0.6571, + "step": 8404 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010470037911258881, + "loss": 0.6249, + "step": 8405 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010468220915312671, + "loss": 0.6921, + "step": 8406 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010466403903874176, + "loss": 0.6832, + "step": 8407 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010464586877003513, + "loss": 0.656, + "step": 8408 + }, + { + "epoch": 1.95, + "learning_rate": 0.000104627698347608, + "loss": 0.6789, + "step": 8409 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010460952777206161, + "loss": 0.7121, + "step": 8410 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010459135704399718, + "loss": 0.7803, + "step": 8411 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010457318616401597, + "loss": 0.6777, + "step": 8412 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010455501513271915, + "loss": 0.6496, + "step": 8413 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010453684395070797, + "loss": 0.7006, + "step": 8414 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001045186726185837, + "loss": 0.7121, + "step": 8415 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010450050113694756, + "loss": 0.6528, + "step": 8416 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010448232950640082, + "loss": 0.6567, + "step": 8417 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001044641577275447, + "loss": 0.7506, + "step": 8418 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001044459858009805, + "loss": 0.681, + "step": 8419 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010442781372730946, + "loss": 0.715, + "step": 8420 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010440964150713287, + "loss": 0.6705, + "step": 8421 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010439146914105196, + "loss": 0.6896, + "step": 8422 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010437329662966807, + "loss": 0.7321, + "step": 8423 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010435512397358243, + "loss": 0.7287, + "step": 8424 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010433695117339637, + "loss": 0.7273, + "step": 8425 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010431877822971117, + "loss": 0.6971, + "step": 8426 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010430060514312811, + "loss": 0.6806, + "step": 8427 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010428243191424851, + "loss": 0.7221, + "step": 8428 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010426425854367367, + "loss": 0.7259, + "step": 8429 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010424608503200492, + "loss": 0.6506, + "step": 8430 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010422791137984359, + "loss": 0.7101, + "step": 8431 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010420973758779094, + "loss": 0.7299, + "step": 8432 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010419156365644833, + "loss": 0.7285, + "step": 8433 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010417338958641712, + "loss": 0.6947, + "step": 8434 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010415521537829859, + "loss": 0.6408, + "step": 8435 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010413704103269415, + "loss": 0.6731, + "step": 8436 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010411886655020508, + "loss": 0.6463, + "step": 8437 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010410069193143279, + "loss": 0.7064, + "step": 8438 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010408251717697857, + "loss": 0.7149, + "step": 8439 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010406434228744379, + "loss": 0.6816, + "step": 8440 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010404616726342986, + "loss": 0.6858, + "step": 8441 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001040279921055381, + "loss": 0.7534, + "step": 8442 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010400981681436994, + "loss": 0.6977, + "step": 8443 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010399164139052668, + "loss": 0.5958, + "step": 8444 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010397346583460971, + "loss": 0.643, + "step": 8445 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010395529014722048, + "loss": 0.6801, + "step": 8446 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010393711432896031, + "loss": 0.7613, + "step": 8447 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010391893838043065, + "loss": 0.6704, + "step": 8448 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010390076230223288, + "loss": 0.7447, + "step": 8449 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010388258609496835, + "loss": 0.6921, + "step": 8450 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010386440975923855, + "loss": 0.6715, + "step": 8451 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010384623329564483, + "loss": 0.7416, + "step": 8452 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010382805670478863, + "loss": 0.6965, + "step": 8453 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010380987998727139, + "loss": 0.7133, + "step": 8454 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010379170314369448, + "loss": 0.6827, + "step": 8455 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010377352617465934, + "loss": 0.6551, + "step": 8456 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010375534908076742, + "loss": 0.7657, + "step": 8457 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010373717186262018, + "loss": 0.6591, + "step": 8458 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010371899452081904, + "loss": 0.6393, + "step": 8459 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010370081705596542, + "loss": 0.6655, + "step": 8460 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010368263946866076, + "loss": 0.65, + "step": 8461 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010366446175950653, + "loss": 0.6843, + "step": 8462 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010364628392910423, + "loss": 0.7438, + "step": 8463 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010362810597805526, + "loss": 0.6773, + "step": 8464 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010360992790696114, + "loss": 0.6992, + "step": 8465 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010359174971642327, + "loss": 0.6883, + "step": 8466 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010357357140704316, + "loss": 0.7006, + "step": 8467 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010355539297942227, + "loss": 0.6937, + "step": 8468 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010353721443416211, + "loss": 0.6548, + "step": 8469 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010351903577186417, + "loss": 0.6632, + "step": 8470 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001035008569931299, + "loss": 0.7242, + "step": 8471 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001034826780985608, + "loss": 0.6884, + "step": 8472 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010346449908875833, + "loss": 0.7277, + "step": 8473 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010344631996432408, + "loss": 0.6712, + "step": 8474 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010342814072585949, + "loss": 0.6608, + "step": 8475 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010340996137396608, + "loss": 0.7273, + "step": 8476 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010339178190924537, + "loss": 0.7362, + "step": 8477 + } + ], + "logging_steps": 1, + "max_steps": 17276, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 173, + "total_flos": 1.913647968842416e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}