|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002564102564102564, |
|
"grad_norm": 0.13979803025722504, |
|
"learning_rate": 2.564102564102564e-07, |
|
"loss": 0.9999, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005128205128205128, |
|
"grad_norm": 0.15988187491893768, |
|
"learning_rate": 5.128205128205128e-07, |
|
"loss": 1.0694, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007692307692307693, |
|
"grad_norm": 0.1669122576713562, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.1319, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010256410256410256, |
|
"grad_norm": 0.15094897150993347, |
|
"learning_rate": 1.0256410256410257e-06, |
|
"loss": 0.956, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01282051282051282, |
|
"grad_norm": 0.15886467695236206, |
|
"learning_rate": 1.2820512820512822e-06, |
|
"loss": 1.2478, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015384615384615385, |
|
"grad_norm": 0.13728941977024078, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 1.0633, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.017948717948717947, |
|
"grad_norm": 0.14830218255519867, |
|
"learning_rate": 1.794871794871795e-06, |
|
"loss": 0.9022, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.020512820512820513, |
|
"grad_norm": 0.17115703225135803, |
|
"learning_rate": 2.0512820512820513e-06, |
|
"loss": 1.1275, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.023076923076923078, |
|
"grad_norm": 0.15854498744010925, |
|
"learning_rate": 2.3076923076923077e-06, |
|
"loss": 1.0972, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 0.15408478677272797, |
|
"learning_rate": 2.5641025641025644e-06, |
|
"loss": 1.0453, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.028205128205128206, |
|
"grad_norm": 0.15272922813892365, |
|
"learning_rate": 2.8205128205128207e-06, |
|
"loss": 1.0399, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"grad_norm": 0.14716756343841553, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.0765, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03333333333333333, |
|
"grad_norm": 0.14600983262062073, |
|
"learning_rate": 3.3333333333333337e-06, |
|
"loss": 1.0616, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.035897435897435895, |
|
"grad_norm": 0.14107345044612885, |
|
"learning_rate": 3.58974358974359e-06, |
|
"loss": 1.1168, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 0.14267683029174805, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.0462, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.041025641025641026, |
|
"grad_norm": 0.1449165940284729, |
|
"learning_rate": 4.102564102564103e-06, |
|
"loss": 1.1341, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04358974358974359, |
|
"grad_norm": 0.14054569602012634, |
|
"learning_rate": 4.358974358974359e-06, |
|
"loss": 1.0704, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.046153846153846156, |
|
"grad_norm": 0.14372889697551727, |
|
"learning_rate": 4.615384615384615e-06, |
|
"loss": 1.1511, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04871794871794872, |
|
"grad_norm": 0.13227999210357666, |
|
"learning_rate": 4.871794871794872e-06, |
|
"loss": 1.009, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05128205128205128, |
|
"grad_norm": 0.15212635695934296, |
|
"learning_rate": 5.128205128205129e-06, |
|
"loss": 1.1836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05384615384615385, |
|
"grad_norm": 0.1289321482181549, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 1.1769, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05641025641025641, |
|
"grad_norm": 0.12752899527549744, |
|
"learning_rate": 5.641025641025641e-06, |
|
"loss": 1.0759, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05897435897435897, |
|
"grad_norm": 0.12242157757282257, |
|
"learning_rate": 5.897435897435897e-06, |
|
"loss": 1.0826, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"grad_norm": 0.1286514699459076, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.05, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 0.13825258612632751, |
|
"learning_rate": 6.410256410256411e-06, |
|
"loss": 1.0944, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 0.13136117160320282, |
|
"learning_rate": 6.6666666666666675e-06, |
|
"loss": 1.0472, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06923076923076923, |
|
"grad_norm": 0.11351772397756577, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 1.071, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07179487179487179, |
|
"grad_norm": 0.12361160665750504, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.0955, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07435897435897436, |
|
"grad_norm": 0.13740238547325134, |
|
"learning_rate": 7.435897435897436e-06, |
|
"loss": 1.1061, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 0.11295292526483536, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.0159, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07948717948717948, |
|
"grad_norm": 0.12402593344449997, |
|
"learning_rate": 7.948717948717949e-06, |
|
"loss": 1.0859, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08205128205128205, |
|
"grad_norm": 0.1193617656826973, |
|
"learning_rate": 8.205128205128205e-06, |
|
"loss": 1.1554, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08461538461538462, |
|
"grad_norm": 0.12157738953828812, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 1.0643, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08717948717948718, |
|
"grad_norm": 0.11561132967472076, |
|
"learning_rate": 8.717948717948719e-06, |
|
"loss": 0.9329, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08974358974358974, |
|
"grad_norm": 0.1199595183134079, |
|
"learning_rate": 8.974358974358976e-06, |
|
"loss": 0.9432, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"grad_norm": 0.27351143956184387, |
|
"learning_rate": 9.23076923076923e-06, |
|
"loss": 0.9671, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09487179487179487, |
|
"grad_norm": 0.11380849033594131, |
|
"learning_rate": 9.487179487179489e-06, |
|
"loss": 1.0776, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09743589743589744, |
|
"grad_norm": 0.12026315927505493, |
|
"learning_rate": 9.743589743589744e-06, |
|
"loss": 0.9837, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.11509953439235687, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0578, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10256410256410256, |
|
"grad_norm": 0.12789179384708405, |
|
"learning_rate": 9.99878394811512e-06, |
|
"loss": 1.0436, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10512820512820513, |
|
"grad_norm": 0.10106956213712692, |
|
"learning_rate": 9.997564935064936e-06, |
|
"loss": 0.934, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1076923076923077, |
|
"grad_norm": 0.11464275419712067, |
|
"learning_rate": 9.996342950020318e-06, |
|
"loss": 1.0297, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11025641025641025, |
|
"grad_norm": 0.11068426072597504, |
|
"learning_rate": 9.995117982099268e-06, |
|
"loss": 1.1004, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11282051282051282, |
|
"grad_norm": 0.10913486778736115, |
|
"learning_rate": 9.993890020366601e-06, |
|
"loss": 0.92, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 0.11235719919204712, |
|
"learning_rate": 9.992659053833607e-06, |
|
"loss": 1.0534, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11794871794871795, |
|
"grad_norm": 0.10900150239467621, |
|
"learning_rate": 9.991425071457738e-06, |
|
"loss": 1.0011, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12051282051282051, |
|
"grad_norm": 0.11291161179542542, |
|
"learning_rate": 9.990188062142274e-06, |
|
"loss": 0.9889, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"grad_norm": 0.12270451337099075, |
|
"learning_rate": 9.988948014735981e-06, |
|
"loss": 1.1178, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12564102564102564, |
|
"grad_norm": 0.109133280813694, |
|
"learning_rate": 9.987704918032787e-06, |
|
"loss": 1.0422, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 0.11073730140924454, |
|
"learning_rate": 9.98645876077144e-06, |
|
"loss": 1.1226, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13076923076923078, |
|
"grad_norm": 0.10467839986085892, |
|
"learning_rate": 9.98520953163517e-06, |
|
"loss": 1.089, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.10366383194923401, |
|
"learning_rate": 9.983957219251336e-06, |
|
"loss": 1.1206, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1358974358974359, |
|
"grad_norm": 0.10720381140708923, |
|
"learning_rate": 9.982701812191105e-06, |
|
"loss": 1.091, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13846153846153847, |
|
"grad_norm": 0.2592061161994934, |
|
"learning_rate": 9.981443298969074e-06, |
|
"loss": 0.964, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14102564102564102, |
|
"grad_norm": 0.10395167022943497, |
|
"learning_rate": 9.98018166804294e-06, |
|
"loss": 1.1305, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14358974358974358, |
|
"grad_norm": 0.10875218361616135, |
|
"learning_rate": 9.978916907813147e-06, |
|
"loss": 1.1347, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14615384615384616, |
|
"grad_norm": 0.10331016033887863, |
|
"learning_rate": 9.977649006622518e-06, |
|
"loss": 1.1921, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14871794871794872, |
|
"grad_norm": 0.10478100180625916, |
|
"learning_rate": 9.976377952755907e-06, |
|
"loss": 1.0031, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15128205128205127, |
|
"grad_norm": 0.09711793065071106, |
|
"learning_rate": 9.975103734439834e-06, |
|
"loss": 0.9949, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 0.10558706521987915, |
|
"learning_rate": 9.973826339842128e-06, |
|
"loss": 1.0029, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1564102564102564, |
|
"grad_norm": 0.09912573546171188, |
|
"learning_rate": 9.972545757071548e-06, |
|
"loss": 0.933, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15897435897435896, |
|
"grad_norm": 0.11607331037521362, |
|
"learning_rate": 9.971261974177426e-06, |
|
"loss": 0.9942, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.16153846153846155, |
|
"grad_norm": 0.10281538218259811, |
|
"learning_rate": 9.969974979149292e-06, |
|
"loss": 1.0307, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1641025641025641, |
|
"grad_norm": 0.10646649450063705, |
|
"learning_rate": 9.968684759916494e-06, |
|
"loss": 1.0052, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.1755123883485794, |
|
"learning_rate": 9.967391304347826e-06, |
|
"loss": 0.9506, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16923076923076924, |
|
"grad_norm": 0.11006496846675873, |
|
"learning_rate": 9.966094600251151e-06, |
|
"loss": 0.9679, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1717948717948718, |
|
"grad_norm": 0.11007404327392578, |
|
"learning_rate": 9.96479463537301e-06, |
|
"loss": 1.0251, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17435897435897435, |
|
"grad_norm": 0.1030791848897934, |
|
"learning_rate": 9.963491397398239e-06, |
|
"loss": 1.0104, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17692307692307693, |
|
"grad_norm": 0.10340573638677597, |
|
"learning_rate": 9.962184873949581e-06, |
|
"loss": 1.1273, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1794871794871795, |
|
"grad_norm": 0.10667295008897781, |
|
"learning_rate": 9.960875052587295e-06, |
|
"loss": 1.1031, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18205128205128204, |
|
"grad_norm": 0.10353393852710724, |
|
"learning_rate": 9.959561920808762e-06, |
|
"loss": 0.9595, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"grad_norm": 0.10161738842725754, |
|
"learning_rate": 9.95824546604808e-06, |
|
"loss": 0.9629, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18717948717948718, |
|
"grad_norm": 0.11324603855609894, |
|
"learning_rate": 9.956925675675678e-06, |
|
"loss": 1.2039, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18974358974358974, |
|
"grad_norm": 0.11005936563014984, |
|
"learning_rate": 9.955602536997886e-06, |
|
"loss": 1.0425, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 0.10767950117588043, |
|
"learning_rate": 9.954276037256563e-06, |
|
"loss": 1.0281, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19487179487179487, |
|
"grad_norm": 0.10422754287719727, |
|
"learning_rate": 9.952946163628658e-06, |
|
"loss": 1.0155, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19743589743589743, |
|
"grad_norm": 0.10673552006483078, |
|
"learning_rate": 9.951612903225807e-06, |
|
"loss": 1.184, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.11334969103336334, |
|
"learning_rate": 9.950276243093924e-06, |
|
"loss": 0.9366, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.20256410256410257, |
|
"grad_norm": 0.10220715403556824, |
|
"learning_rate": 9.948936170212767e-06, |
|
"loss": 1.0855, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.20512820512820512, |
|
"grad_norm": 0.10753922909498215, |
|
"learning_rate": 9.947592671495527e-06, |
|
"loss": 0.946, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2076923076923077, |
|
"grad_norm": 0.1163082867860794, |
|
"learning_rate": 9.946245733788397e-06, |
|
"loss": 1.0971, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.21025641025641026, |
|
"grad_norm": 0.11111017316579819, |
|
"learning_rate": 9.944895343870142e-06, |
|
"loss": 1.0869, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2128205128205128, |
|
"grad_norm": 0.10907071083784103, |
|
"learning_rate": 9.943541488451669e-06, |
|
"loss": 0.9786, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2153846153846154, |
|
"grad_norm": 0.10257716476917267, |
|
"learning_rate": 9.94218415417559e-06, |
|
"loss": 0.9889, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21794871794871795, |
|
"grad_norm": 0.10978135466575623, |
|
"learning_rate": 9.94082332761578e-06, |
|
"loss": 1.1028, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2205128205128205, |
|
"grad_norm": 0.110615074634552, |
|
"learning_rate": 9.939458995276944e-06, |
|
"loss": 1.019, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2230769230769231, |
|
"grad_norm": 0.11058582365512848, |
|
"learning_rate": 9.938091143594154e-06, |
|
"loss": 0.9996, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.22564102564102564, |
|
"grad_norm": 0.11037719249725342, |
|
"learning_rate": 9.936719758932415e-06, |
|
"loss": 1.0338, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2282051282051282, |
|
"grad_norm": 0.10798349976539612, |
|
"learning_rate": 9.935344827586207e-06, |
|
"loss": 0.9677, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 0.10694784671068192, |
|
"learning_rate": 9.933966335779024e-06, |
|
"loss": 1.0419, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23333333333333334, |
|
"grad_norm": 0.13677257299423218, |
|
"learning_rate": 9.932584269662922e-06, |
|
"loss": 1.0015, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2358974358974359, |
|
"grad_norm": 0.11084003746509552, |
|
"learning_rate": 9.931198615318045e-06, |
|
"loss": 1.0031, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.23846153846153847, |
|
"grad_norm": 0.10615186393260956, |
|
"learning_rate": 9.929809358752167e-06, |
|
"loss": 0.9214, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.24102564102564103, |
|
"grad_norm": 0.10620255023241043, |
|
"learning_rate": 9.928416485900218e-06, |
|
"loss": 0.9185, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24358974358974358, |
|
"grad_norm": 0.11533376574516296, |
|
"learning_rate": 9.927019982623805e-06, |
|
"loss": 1.0308, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 0.1096138209104538, |
|
"learning_rate": 9.925619834710745e-06, |
|
"loss": 1.0478, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.24871794871794872, |
|
"grad_norm": 0.11876872926950455, |
|
"learning_rate": 9.924216027874566e-06, |
|
"loss": 1.0602, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2512820512820513, |
|
"grad_norm": 0.10606134682893753, |
|
"learning_rate": 9.922808547754035e-06, |
|
"loss": 0.901, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.25384615384615383, |
|
"grad_norm": 0.11515390872955322, |
|
"learning_rate": 9.921397379912666e-06, |
|
"loss": 1.0333, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 0.11910593509674072, |
|
"learning_rate": 9.919982509838217e-06, |
|
"loss": 0.9956, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.258974358974359, |
|
"grad_norm": 0.12178193032741547, |
|
"learning_rate": 9.918563922942208e-06, |
|
"loss": 1.071, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.26153846153846155, |
|
"grad_norm": 0.1089189425110817, |
|
"learning_rate": 9.917141604559404e-06, |
|
"loss": 0.9349, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2641025641025641, |
|
"grad_norm": 0.1138150617480278, |
|
"learning_rate": 9.915715539947322e-06, |
|
"loss": 1.0169, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.1112711951136589, |
|
"learning_rate": 9.914285714285713e-06, |
|
"loss": 0.9022, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2692307692307692, |
|
"grad_norm": 0.11421187967061996, |
|
"learning_rate": 9.912852112676058e-06, |
|
"loss": 0.9711, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2717948717948718, |
|
"grad_norm": 0.10921610891819, |
|
"learning_rate": 9.911414720141032e-06, |
|
"loss": 0.9388, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2743589743589744, |
|
"grad_norm": 0.11643636971712112, |
|
"learning_rate": 9.909973521624008e-06, |
|
"loss": 1.003, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.27692307692307694, |
|
"grad_norm": 0.13560256361961365, |
|
"learning_rate": 9.908528501988513e-06, |
|
"loss": 0.9955, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2794871794871795, |
|
"grad_norm": 0.11191970109939575, |
|
"learning_rate": 9.9070796460177e-06, |
|
"loss": 1.0105, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.28205128205128205, |
|
"grad_norm": 0.1319538950920105, |
|
"learning_rate": 9.905626938413824e-06, |
|
"loss": 1.0841, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2846153846153846, |
|
"grad_norm": 0.11922305077314377, |
|
"learning_rate": 9.904170363797693e-06, |
|
"loss": 0.9614, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.28717948717948716, |
|
"grad_norm": 0.11520028859376907, |
|
"learning_rate": 9.902709906708132e-06, |
|
"loss": 0.9648, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.28974358974358977, |
|
"grad_norm": 0.10564184933900833, |
|
"learning_rate": 9.901245551601424e-06, |
|
"loss": 0.9224, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2923076923076923, |
|
"grad_norm": 0.11402294784784317, |
|
"learning_rate": 9.89977728285078e-06, |
|
"loss": 1.0839, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2948717948717949, |
|
"grad_norm": 0.1240580752491951, |
|
"learning_rate": 9.898305084745763e-06, |
|
"loss": 0.9802, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.29743589743589743, |
|
"grad_norm": 0.12268956750631332, |
|
"learning_rate": 9.896828941491739e-06, |
|
"loss": 1.0552, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.11103710532188416, |
|
"learning_rate": 9.895348837209303e-06, |
|
"loss": 0.9337, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.30256410256410254, |
|
"grad_norm": 0.1281978338956833, |
|
"learning_rate": 9.893864755933724e-06, |
|
"loss": 1.0919, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.30512820512820515, |
|
"grad_norm": 0.11921875178813934, |
|
"learning_rate": 9.892376681614351e-06, |
|
"loss": 1.0019, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 0.11357328295707703, |
|
"learning_rate": 9.890884598114054e-06, |
|
"loss": 0.9391, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31025641025641026, |
|
"grad_norm": 0.12437216937541962, |
|
"learning_rate": 9.889388489208635e-06, |
|
"loss": 1.0949, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3128205128205128, |
|
"grad_norm": 0.11032367497682571, |
|
"learning_rate": 9.887888338586223e-06, |
|
"loss": 1.0712, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3153846153846154, |
|
"grad_norm": 0.10982154309749603, |
|
"learning_rate": 9.886384129846709e-06, |
|
"loss": 1.0995, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.31794871794871793, |
|
"grad_norm": 0.11465884000062943, |
|
"learning_rate": 9.88487584650113e-06, |
|
"loss": 1.1997, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 0.11689360439777374, |
|
"learning_rate": 9.883363471971068e-06, |
|
"loss": 0.9555, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3230769230769231, |
|
"grad_norm": 0.12228330969810486, |
|
"learning_rate": 9.88184698958805e-06, |
|
"loss": 1.0185, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.32564102564102565, |
|
"grad_norm": 0.11858666688203812, |
|
"learning_rate": 9.88032638259293e-06, |
|
"loss": 0.9995, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3282051282051282, |
|
"grad_norm": 0.107363760471344, |
|
"learning_rate": 9.87880163413527e-06, |
|
"loss": 0.9283, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.33076923076923076, |
|
"grad_norm": 0.1286807507276535, |
|
"learning_rate": 9.877272727272727e-06, |
|
"loss": 0.9635, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.13207103312015533, |
|
"learning_rate": 9.875739644970415e-06, |
|
"loss": 1.0259, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.33589743589743587, |
|
"grad_norm": 0.12238481640815735, |
|
"learning_rate": 9.874202370100275e-06, |
|
"loss": 0.9458, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3384615384615385, |
|
"grad_norm": 0.12218200415372849, |
|
"learning_rate": 9.87266088544044e-06, |
|
"loss": 0.9856, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.34102564102564104, |
|
"grad_norm": 0.192653626203537, |
|
"learning_rate": 9.871115173674589e-06, |
|
"loss": 1.1864, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3435897435897436, |
|
"grad_norm": 0.12083520740270615, |
|
"learning_rate": 9.869565217391306e-06, |
|
"loss": 1.0003, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.34615384615384615, |
|
"grad_norm": 0.11669037491083145, |
|
"learning_rate": 9.86801099908341e-06, |
|
"loss": 1.0615, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3487179487179487, |
|
"grad_norm": 0.11430592834949493, |
|
"learning_rate": 9.866452501147316e-06, |
|
"loss": 0.927, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.35128205128205126, |
|
"grad_norm": 0.10570185631513596, |
|
"learning_rate": 9.864889705882355e-06, |
|
"loss": 0.9244, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.35384615384615387, |
|
"grad_norm": 0.12382305413484573, |
|
"learning_rate": 9.863322595490108e-06, |
|
"loss": 1.0086, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3564102564102564, |
|
"grad_norm": 0.11679980158805847, |
|
"learning_rate": 9.861751152073734e-06, |
|
"loss": 0.9317, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.358974358974359, |
|
"grad_norm": 0.12016775459051132, |
|
"learning_rate": 9.860175357637288e-06, |
|
"loss": 0.9946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.36153846153846153, |
|
"grad_norm": 0.11385658383369446, |
|
"learning_rate": 9.858595194085029e-06, |
|
"loss": 0.9507, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3641025641025641, |
|
"grad_norm": 0.11400415003299713, |
|
"learning_rate": 9.857010643220733e-06, |
|
"loss": 0.977, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.36666666666666664, |
|
"grad_norm": 0.12075012922286987, |
|
"learning_rate": 9.855421686746988e-06, |
|
"loss": 1.1277, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"grad_norm": 0.12377439439296722, |
|
"learning_rate": 9.853828306264502e-06, |
|
"loss": 1.0703, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3717948717948718, |
|
"grad_norm": 0.11683501303195953, |
|
"learning_rate": 9.852230483271376e-06, |
|
"loss": 0.9773, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.37435897435897436, |
|
"grad_norm": 0.11950255930423737, |
|
"learning_rate": 9.850628199162401e-06, |
|
"loss": 0.9428, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3769230769230769, |
|
"grad_norm": 0.12005724757909775, |
|
"learning_rate": 9.849021435228333e-06, |
|
"loss": 1.1703, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.37948717948717947, |
|
"grad_norm": 0.11797571182250977, |
|
"learning_rate": 9.847410172655158e-06, |
|
"loss": 0.8719, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.382051282051282, |
|
"grad_norm": 0.1225227490067482, |
|
"learning_rate": 9.845794392523365e-06, |
|
"loss": 1.1311, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 0.12024562805891037, |
|
"learning_rate": 9.844174075807208e-06, |
|
"loss": 1.1669, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3871794871794872, |
|
"grad_norm": 0.12286081910133362, |
|
"learning_rate": 9.842549203373947e-06, |
|
"loss": 1.1541, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.38974358974358975, |
|
"grad_norm": 0.1257518231868744, |
|
"learning_rate": 9.840919755983107e-06, |
|
"loss": 0.9313, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3923076923076923, |
|
"grad_norm": 0.12524078786373138, |
|
"learning_rate": 9.839285714285715e-06, |
|
"loss": 0.904, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.39487179487179486, |
|
"grad_norm": 0.1183227151632309, |
|
"learning_rate": 9.83764705882353e-06, |
|
"loss": 1.0132, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3974358974358974, |
|
"grad_norm": 0.12392973154783249, |
|
"learning_rate": 9.836003770028276e-06, |
|
"loss": 1.069, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.13140057027339935, |
|
"learning_rate": 9.83435582822086e-06, |
|
"loss": 1.1211, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4025641025641026, |
|
"grad_norm": 0.1755838245153427, |
|
"learning_rate": 9.832703213610588e-06, |
|
"loss": 1.053, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.40512820512820513, |
|
"grad_norm": 0.12414582073688507, |
|
"learning_rate": 9.831045906294368e-06, |
|
"loss": 1.0483, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4076923076923077, |
|
"grad_norm": 0.13171876966953278, |
|
"learning_rate": 9.829383886255924e-06, |
|
"loss": 1.0296, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 0.12738922238349915, |
|
"learning_rate": 9.827717133364974e-06, |
|
"loss": 1.0102, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4128205128205128, |
|
"grad_norm": 0.1904231309890747, |
|
"learning_rate": 9.826045627376427e-06, |
|
"loss": 1.0314, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4153846153846154, |
|
"grad_norm": 0.12011483311653137, |
|
"learning_rate": 9.824369347929558e-06, |
|
"loss": 0.9475, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.41794871794871796, |
|
"grad_norm": 0.1304839700460434, |
|
"learning_rate": 9.822688274547189e-06, |
|
"loss": 1.0456, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4205128205128205, |
|
"grad_norm": 0.131229430437088, |
|
"learning_rate": 9.821002386634847e-06, |
|
"loss": 1.1589, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4230769230769231, |
|
"grad_norm": 0.12201635539531708, |
|
"learning_rate": 9.819311663479923e-06, |
|
"loss": 0.966, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4256410256410256, |
|
"grad_norm": 0.12963519990444183, |
|
"learning_rate": 9.81761608425084e-06, |
|
"loss": 1.0354, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4282051282051282, |
|
"grad_norm": 0.12793965637683868, |
|
"learning_rate": 9.815915627996166e-06, |
|
"loss": 1.0306, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4307692307692308, |
|
"grad_norm": 0.17451830208301544, |
|
"learning_rate": 9.814210273643783e-06, |
|
"loss": 1.0694, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.43333333333333335, |
|
"grad_norm": 0.14709219336509705, |
|
"learning_rate": 9.812500000000001e-06, |
|
"loss": 1.1155, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4358974358974359, |
|
"grad_norm": 0.205572709441185, |
|
"learning_rate": 9.810784785748676e-06, |
|
"loss": 1.0661, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.43846153846153846, |
|
"grad_norm": 0.13123241066932678, |
|
"learning_rate": 9.809064609450338e-06, |
|
"loss": 0.9847, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.441025641025641, |
|
"grad_norm": 0.14202538132667542, |
|
"learning_rate": 9.807339449541285e-06, |
|
"loss": 1.0204, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.44358974358974357, |
|
"grad_norm": 0.15248540043830872, |
|
"learning_rate": 9.80560928433269e-06, |
|
"loss": 0.9468, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4461538461538462, |
|
"grad_norm": 0.296898752450943, |
|
"learning_rate": 9.803874092009686e-06, |
|
"loss": 1.0747, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.44871794871794873, |
|
"grad_norm": 0.12776528298854828, |
|
"learning_rate": 9.802133850630456e-06, |
|
"loss": 1.0828, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4512820512820513, |
|
"grad_norm": 0.1273936629295349, |
|
"learning_rate": 9.800388538125306e-06, |
|
"loss": 0.93, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.45384615384615384, |
|
"grad_norm": 0.1328604370355606, |
|
"learning_rate": 9.79863813229572e-06, |
|
"loss": 0.905, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4564102564102564, |
|
"grad_norm": 0.14861007034778595, |
|
"learning_rate": 9.796882610813444e-06, |
|
"loss": 0.9129, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.45897435897435895, |
|
"grad_norm": 0.12911070883274078, |
|
"learning_rate": 9.795121951219514e-06, |
|
"loss": 0.9161, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 0.12798583507537842, |
|
"learning_rate": 9.793356130923302e-06, |
|
"loss": 1.015, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4641025641025641, |
|
"grad_norm": 0.14299742877483368, |
|
"learning_rate": 9.791585127201565e-06, |
|
"loss": 0.9986, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4666666666666667, |
|
"grad_norm": 0.16951002180576324, |
|
"learning_rate": 9.789808917197453e-06, |
|
"loss": 1.0231, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.46923076923076923, |
|
"grad_norm": 0.1452597677707672, |
|
"learning_rate": 9.78802747791953e-06, |
|
"loss": 0.9805, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4717948717948718, |
|
"grad_norm": 0.12342038750648499, |
|
"learning_rate": 9.786240786240787e-06, |
|
"loss": 0.9625, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.47435897435897434, |
|
"grad_norm": 0.13525085151195526, |
|
"learning_rate": 9.784448818897639e-06, |
|
"loss": 1.0564, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.47692307692307695, |
|
"grad_norm": 0.14499512314796448, |
|
"learning_rate": 9.782651552488912e-06, |
|
"loss": 1.0676, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4794871794871795, |
|
"grad_norm": 0.13853202760219574, |
|
"learning_rate": 9.780848963474828e-06, |
|
"loss": 0.9944, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.48205128205128206, |
|
"grad_norm": 0.1524648517370224, |
|
"learning_rate": 9.779041028175976e-06, |
|
"loss": 1.0957, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4846153846153846, |
|
"grad_norm": 0.13356293737888336, |
|
"learning_rate": 9.77722772277228e-06, |
|
"loss": 1.0397, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.48717948717948717, |
|
"grad_norm": 0.1403387039899826, |
|
"learning_rate": 9.775409023301933e-06, |
|
"loss": 1.1755, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4897435897435897, |
|
"grad_norm": 0.13895130157470703, |
|
"learning_rate": 9.773584905660379e-06, |
|
"loss": 1.0341, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"grad_norm": 0.1415233463048935, |
|
"learning_rate": 9.771755345599206e-06, |
|
"loss": 1.092, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4948717948717949, |
|
"grad_norm": 0.1509629786014557, |
|
"learning_rate": 9.7699203187251e-06, |
|
"loss": 0.9921, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.49743589743589745, |
|
"grad_norm": 0.13306330144405365, |
|
"learning_rate": 9.768079800498753e-06, |
|
"loss": 1.0274, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.1483563780784607, |
|
"learning_rate": 9.766233766233768e-06, |
|
"loss": 0.977, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5025641025641026, |
|
"grad_norm": 0.13634060323238373, |
|
"learning_rate": 9.764382191095549e-06, |
|
"loss": 0.8818, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5051282051282051, |
|
"grad_norm": 0.13927966356277466, |
|
"learning_rate": 9.762525050100202e-06, |
|
"loss": 0.9694, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5076923076923077, |
|
"grad_norm": 0.13205285370349884, |
|
"learning_rate": 9.760662318113397e-06, |
|
"loss": 1.0713, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5102564102564102, |
|
"grad_norm": 0.1272955685853958, |
|
"learning_rate": 9.758793969849248e-06, |
|
"loss": 0.9304, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 0.14206095039844513, |
|
"learning_rate": 9.75691997986915e-06, |
|
"loss": 1.0151, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5153846153846153, |
|
"grad_norm": 0.13040238618850708, |
|
"learning_rate": 9.755040322580646e-06, |
|
"loss": 1.056, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.517948717948718, |
|
"grad_norm": 0.13569800555706024, |
|
"learning_rate": 9.753154972236246e-06, |
|
"loss": 1.0058, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5205128205128206, |
|
"grad_norm": 0.12954074144363403, |
|
"learning_rate": 9.751263902932256e-06, |
|
"loss": 1.0232, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5230769230769231, |
|
"grad_norm": 0.1352427899837494, |
|
"learning_rate": 9.749367088607595e-06, |
|
"loss": 1.1409, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5256410256410257, |
|
"grad_norm": 0.13935823738574982, |
|
"learning_rate": 9.747464503042597e-06, |
|
"loss": 0.9364, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5282051282051282, |
|
"grad_norm": 0.14334161579608917, |
|
"learning_rate": 9.745556119857798e-06, |
|
"loss": 1.0412, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5307692307692308, |
|
"grad_norm": 0.13454332947731018, |
|
"learning_rate": 9.743641912512716e-06, |
|
"loss": 1.0093, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.1333240121603012, |
|
"learning_rate": 9.741721854304638e-06, |
|
"loss": 1.0294, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5358974358974359, |
|
"grad_norm": 0.1349434107542038, |
|
"learning_rate": 9.739795918367347e-06, |
|
"loss": 1.0395, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 0.12995462119579315, |
|
"learning_rate": 9.737864077669904e-06, |
|
"loss": 1.0675, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.541025641025641, |
|
"grad_norm": 0.13981810212135315, |
|
"learning_rate": 9.735926305015354e-06, |
|
"loss": 1.0434, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5435897435897435, |
|
"grad_norm": 0.13797558844089508, |
|
"learning_rate": 9.733982573039467e-06, |
|
"loss": 1.1582, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5461538461538461, |
|
"grad_norm": 0.136617973446846, |
|
"learning_rate": 9.732032854209446e-06, |
|
"loss": 0.9319, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5487179487179488, |
|
"grad_norm": 0.15038198232650757, |
|
"learning_rate": 9.730077120822623e-06, |
|
"loss": 1.0026, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5512820512820513, |
|
"grad_norm": 0.1529029756784439, |
|
"learning_rate": 9.728115345005151e-06, |
|
"loss": 1.0784, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5538461538461539, |
|
"grad_norm": 0.13984259963035583, |
|
"learning_rate": 9.726147498710677e-06, |
|
"loss": 1.1533, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5564102564102564, |
|
"grad_norm": 0.14129801094532013, |
|
"learning_rate": 9.724173553719009e-06, |
|
"loss": 0.9703, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.558974358974359, |
|
"grad_norm": 0.14009319245815277, |
|
"learning_rate": 9.722193481634766e-06, |
|
"loss": 1.1317, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5615384615384615, |
|
"grad_norm": 0.13649149239063263, |
|
"learning_rate": 9.720207253886011e-06, |
|
"loss": 1.0083, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5641025641025641, |
|
"grad_norm": 0.13949915766716003, |
|
"learning_rate": 9.718214841722885e-06, |
|
"loss": 0.9941, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5666666666666667, |
|
"grad_norm": 0.17557266354560852, |
|
"learning_rate": 9.716216216216216e-06, |
|
"loss": 1.1887, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5692307692307692, |
|
"grad_norm": 0.132981538772583, |
|
"learning_rate": 9.714211348256117e-06, |
|
"loss": 0.9682, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5717948717948718, |
|
"grad_norm": 0.15944674611091614, |
|
"learning_rate": 9.712200208550574e-06, |
|
"loss": 0.9442, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5743589743589743, |
|
"grad_norm": 0.15149790048599243, |
|
"learning_rate": 9.710182767624022e-06, |
|
"loss": 1.1134, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 0.13614985346794128, |
|
"learning_rate": 9.7081589958159e-06, |
|
"loss": 0.8473, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5794871794871795, |
|
"grad_norm": 0.1307866871356964, |
|
"learning_rate": 9.706128863279205e-06, |
|
"loss": 0.9158, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5820512820512821, |
|
"grad_norm": 0.14745928347110748, |
|
"learning_rate": 9.704092339979015e-06, |
|
"loss": 0.9988, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5846153846153846, |
|
"grad_norm": 0.14534030854701996, |
|
"learning_rate": 9.702049395691015e-06, |
|
"loss": 1.0931, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5871794871794872, |
|
"grad_norm": 0.146283358335495, |
|
"learning_rate": 9.7e-06, |
|
"loss": 1.2088, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5897435897435898, |
|
"grad_norm": 0.16283774375915527, |
|
"learning_rate": 9.697944122298367e-06, |
|
"loss": 0.9512, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5923076923076923, |
|
"grad_norm": 0.1303090751171112, |
|
"learning_rate": 9.695881731784583e-06, |
|
"loss": 0.9181, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5948717948717949, |
|
"grad_norm": 0.14575974643230438, |
|
"learning_rate": 9.693812797461662e-06, |
|
"loss": 1.0348, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5974358974358974, |
|
"grad_norm": 0.14711220562458038, |
|
"learning_rate": 9.691737288135593e-06, |
|
"loss": 1.0401, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.14356166124343872, |
|
"learning_rate": 9.689655172413794e-06, |
|
"loss": 0.9484, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6025641025641025, |
|
"grad_norm": 0.14533978700637817, |
|
"learning_rate": 9.687566418703508e-06, |
|
"loss": 0.9985, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6051282051282051, |
|
"grad_norm": 0.14926594495773315, |
|
"learning_rate": 9.685470995210218e-06, |
|
"loss": 1.1273, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6076923076923076, |
|
"grad_norm": 0.15611067414283752, |
|
"learning_rate": 9.683368869936036e-06, |
|
"loss": 1.0847, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6102564102564103, |
|
"grad_norm": 0.15448501706123352, |
|
"learning_rate": 9.681260010678057e-06, |
|
"loss": 1.0333, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6128205128205129, |
|
"grad_norm": 0.16903169453144073, |
|
"learning_rate": 9.679144385026738e-06, |
|
"loss": 0.9972, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 0.14398221671581268, |
|
"learning_rate": 9.677021960364222e-06, |
|
"loss": 0.9373, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.617948717948718, |
|
"grad_norm": 0.16799390316009521, |
|
"learning_rate": 9.674892703862662e-06, |
|
"loss": 0.9425, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6205128205128205, |
|
"grad_norm": 0.16503410041332245, |
|
"learning_rate": 9.672756582482538e-06, |
|
"loss": 0.991, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6230769230769231, |
|
"grad_norm": 0.13837389647960663, |
|
"learning_rate": 9.670613562970937e-06, |
|
"loss": 1.0057, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6256410256410256, |
|
"grad_norm": 0.15482862293720245, |
|
"learning_rate": 9.66846361185984e-06, |
|
"loss": 1.2884, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6282051282051282, |
|
"grad_norm": 0.17946982383728027, |
|
"learning_rate": 9.666306695464364e-06, |
|
"loss": 1.0498, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6307692307692307, |
|
"grad_norm": 0.18409568071365356, |
|
"learning_rate": 9.664142779881018e-06, |
|
"loss": 1.0383, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6333333333333333, |
|
"grad_norm": 0.142312690615654, |
|
"learning_rate": 9.661971830985917e-06, |
|
"loss": 1.1336, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6358974358974359, |
|
"grad_norm": 0.15140476822853088, |
|
"learning_rate": 9.659793814432991e-06, |
|
"loss": 1.0985, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6384615384615384, |
|
"grad_norm": 0.1846708357334137, |
|
"learning_rate": 9.657608695652173e-06, |
|
"loss": 0.9555, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 0.16700689494609833, |
|
"learning_rate": 9.655416439847578e-06, |
|
"loss": 0.9279, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6435897435897436, |
|
"grad_norm": 0.1423652619123459, |
|
"learning_rate": 9.653217011995637e-06, |
|
"loss": 1.073, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6461538461538462, |
|
"grad_norm": 0.16811993718147278, |
|
"learning_rate": 9.651010376843254e-06, |
|
"loss": 1.0924, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6487179487179487, |
|
"grad_norm": 0.15465322136878967, |
|
"learning_rate": 9.64879649890591e-06, |
|
"loss": 1.078, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6512820512820513, |
|
"grad_norm": 0.1621096432209015, |
|
"learning_rate": 9.646575342465754e-06, |
|
"loss": 1.0095, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6538461538461539, |
|
"grad_norm": 0.16749772429466248, |
|
"learning_rate": 9.644346871569704e-06, |
|
"loss": 0.948, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6564102564102564, |
|
"grad_norm": 0.1397644579410553, |
|
"learning_rate": 9.64211105002749e-06, |
|
"loss": 1.0232, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.658974358974359, |
|
"grad_norm": 0.179872065782547, |
|
"learning_rate": 9.639867841409692e-06, |
|
"loss": 1.0781, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6615384615384615, |
|
"grad_norm": 0.18746939301490784, |
|
"learning_rate": 9.63761720904578e-06, |
|
"loss": 0.9158, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6641025641025641, |
|
"grad_norm": 0.17777228355407715, |
|
"learning_rate": 9.635359116022101e-06, |
|
"loss": 1.0611, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.15664607286453247, |
|
"learning_rate": 9.633093525179857e-06, |
|
"loss": 1.0061, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6692307692307692, |
|
"grad_norm": 0.2109006941318512, |
|
"learning_rate": 9.630820399113084e-06, |
|
"loss": 1.0238, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6717948717948717, |
|
"grad_norm": 0.16149462759494781, |
|
"learning_rate": 9.628539700166575e-06, |
|
"loss": 1.1312, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6743589743589744, |
|
"grad_norm": 0.13910357654094696, |
|
"learning_rate": 9.626251390433817e-06, |
|
"loss": 0.9808, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.676923076923077, |
|
"grad_norm": 0.15184703469276428, |
|
"learning_rate": 9.623955431754875e-06, |
|
"loss": 0.9458, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6794871794871795, |
|
"grad_norm": 0.17896021902561188, |
|
"learning_rate": 9.621651785714285e-06, |
|
"loss": 1.0504, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6820512820512821, |
|
"grad_norm": 0.15594998002052307, |
|
"learning_rate": 9.619340413638905e-06, |
|
"loss": 1.1288, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6846153846153846, |
|
"grad_norm": 0.14313378930091858, |
|
"learning_rate": 9.617021276595746e-06, |
|
"loss": 0.9985, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6871794871794872, |
|
"grad_norm": 0.14239932596683502, |
|
"learning_rate": 9.614694335389792e-06, |
|
"loss": 0.9776, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6897435897435897, |
|
"grad_norm": 0.1516118049621582, |
|
"learning_rate": 9.612359550561798e-06, |
|
"loss": 1.0086, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 0.1527036875486374, |
|
"learning_rate": 9.610016882386046e-06, |
|
"loss": 1.1663, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6948717948717948, |
|
"grad_norm": 0.1713275909423828, |
|
"learning_rate": 9.607666290868095e-06, |
|
"loss": 0.977, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6974358974358974, |
|
"grad_norm": 0.13157938420772552, |
|
"learning_rate": 9.60530773574252e-06, |
|
"loss": 0.9617, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.17953188717365265, |
|
"learning_rate": 9.60294117647059e-06, |
|
"loss": 1.0687, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7025641025641025, |
|
"grad_norm": 0.17509308457374573, |
|
"learning_rate": 9.60056657223796e-06, |
|
"loss": 0.9238, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7051282051282052, |
|
"grad_norm": 0.16777881979942322, |
|
"learning_rate": 9.598183881952327e-06, |
|
"loss": 1.0203, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7076923076923077, |
|
"grad_norm": 0.1494888812303543, |
|
"learning_rate": 9.595793064241049e-06, |
|
"loss": 1.0675, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7102564102564103, |
|
"grad_norm": 0.1649765521287918, |
|
"learning_rate": 9.593394077448748e-06, |
|
"loss": 0.8814, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7128205128205128, |
|
"grad_norm": 0.17049697041511536, |
|
"learning_rate": 9.590986879634912e-06, |
|
"loss": 0.891, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7153846153846154, |
|
"grad_norm": 0.14463086426258087, |
|
"learning_rate": 9.58857142857143e-06, |
|
"loss": 0.957, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.717948717948718, |
|
"grad_norm": 0.15929310023784637, |
|
"learning_rate": 9.586147681740127e-06, |
|
"loss": 1.017, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7205128205128205, |
|
"grad_norm": 0.17474216222763062, |
|
"learning_rate": 9.583715596330276e-06, |
|
"loss": 0.9516, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7230769230769231, |
|
"grad_norm": 0.1831640750169754, |
|
"learning_rate": 9.581275129236071e-06, |
|
"loss": 1.0512, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7256410256410256, |
|
"grad_norm": 0.1618429571390152, |
|
"learning_rate": 9.578826237054085e-06, |
|
"loss": 0.9551, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7282051282051282, |
|
"grad_norm": 0.24447672069072723, |
|
"learning_rate": 9.576368876080691e-06, |
|
"loss": 1.0373, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7307692307692307, |
|
"grad_norm": 0.16472192108631134, |
|
"learning_rate": 9.57390300230947e-06, |
|
"loss": 0.9911, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7333333333333333, |
|
"grad_norm": 0.1695912629365921, |
|
"learning_rate": 9.571428571428573e-06, |
|
"loss": 0.9008, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.735897435897436, |
|
"grad_norm": 0.1703156977891922, |
|
"learning_rate": 9.568945538818077e-06, |
|
"loss": 1.0863, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"grad_norm": 0.14874251186847687, |
|
"learning_rate": 9.566453859547304e-06, |
|
"loss": 0.9512, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7410256410256411, |
|
"grad_norm": 0.1689365655183792, |
|
"learning_rate": 9.563953488372094e-06, |
|
"loss": 1.0063, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7435897435897436, |
|
"grad_norm": 0.17003223299980164, |
|
"learning_rate": 9.56144437973209e-06, |
|
"loss": 0.9397, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7461538461538462, |
|
"grad_norm": 0.20850569009780884, |
|
"learning_rate": 9.55892648774796e-06, |
|
"loss": 1.2076, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7487179487179487, |
|
"grad_norm": 0.15689845383167267, |
|
"learning_rate": 9.556399766218587e-06, |
|
"loss": 1.1031, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7512820512820513, |
|
"grad_norm": 0.160260871052742, |
|
"learning_rate": 9.553864168618268e-06, |
|
"loss": 1.0275, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7538461538461538, |
|
"grad_norm": 0.1513524055480957, |
|
"learning_rate": 9.551319648093842e-06, |
|
"loss": 1.008, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7564102564102564, |
|
"grad_norm": 0.17547191679477692, |
|
"learning_rate": 9.54876615746181e-06, |
|
"loss": 1.0009, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7589743589743589, |
|
"grad_norm": 0.15460693836212158, |
|
"learning_rate": 9.546203649205416e-06, |
|
"loss": 1.1096, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7615384615384615, |
|
"grad_norm": 0.17146429419517517, |
|
"learning_rate": 9.543632075471698e-06, |
|
"loss": 0.9587, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.764102564102564, |
|
"grad_norm": 0.15998685359954834, |
|
"learning_rate": 9.54105138806852e-06, |
|
"loss": 0.9264, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7666666666666667, |
|
"grad_norm": 0.17196176946163177, |
|
"learning_rate": 9.538461538461538e-06, |
|
"loss": 1.0, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.18652167916297913, |
|
"learning_rate": 9.535862477771191e-06, |
|
"loss": 1.1239, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7717948717948718, |
|
"grad_norm": 0.1686553657054901, |
|
"learning_rate": 9.533254156769596e-06, |
|
"loss": 1.1414, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.7743589743589744, |
|
"grad_norm": 0.15988533198833466, |
|
"learning_rate": 9.530636525877454e-06, |
|
"loss": 0.9027, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7769230769230769, |
|
"grad_norm": 0.15526551008224487, |
|
"learning_rate": 9.528009535160905e-06, |
|
"loss": 1.0569, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7794871794871795, |
|
"grad_norm": 0.1854647994041443, |
|
"learning_rate": 9.52537313432836e-06, |
|
"loss": 1.1064, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.782051282051282, |
|
"grad_norm": 0.20110487937927246, |
|
"learning_rate": 9.522727272727274e-06, |
|
"loss": 1.0231, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7846153846153846, |
|
"grad_norm": 0.15321309864521027, |
|
"learning_rate": 9.520071899340924e-06, |
|
"loss": 1.0294, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7871794871794872, |
|
"grad_norm": 0.1512340009212494, |
|
"learning_rate": 9.517406962785115e-06, |
|
"loss": 0.9112, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7897435897435897, |
|
"grad_norm": 0.19026243686676025, |
|
"learning_rate": 9.514732411304872e-06, |
|
"loss": 1.0988, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7923076923076923, |
|
"grad_norm": 0.15860332548618317, |
|
"learning_rate": 9.512048192771085e-06, |
|
"loss": 0.8795, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7948717948717948, |
|
"grad_norm": 0.2282475382089615, |
|
"learning_rate": 9.509354254677129e-06, |
|
"loss": 1.0206, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7974358974358975, |
|
"grad_norm": 0.16409388184547424, |
|
"learning_rate": 9.50665054413543e-06, |
|
"loss": 1.01, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.15974940359592438, |
|
"learning_rate": 9.503937007874017e-06, |
|
"loss": 0.989, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8025641025641026, |
|
"grad_norm": 0.16357499361038208, |
|
"learning_rate": 9.50121359223301e-06, |
|
"loss": 0.9589, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8051282051282052, |
|
"grad_norm": 0.1798093467950821, |
|
"learning_rate": 9.498480243161095e-06, |
|
"loss": 0.9949, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8076923076923077, |
|
"grad_norm": 0.18792827427387238, |
|
"learning_rate": 9.495736906211937e-06, |
|
"loss": 1.0233, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8102564102564103, |
|
"grad_norm": 0.19793489575386047, |
|
"learning_rate": 9.492983526540575e-06, |
|
"loss": 1.0853, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8128205128205128, |
|
"grad_norm": 0.148494690656662, |
|
"learning_rate": 9.490220048899757e-06, |
|
"loss": 1.0836, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8153846153846154, |
|
"grad_norm": 0.20617227256298065, |
|
"learning_rate": 9.487446417636253e-06, |
|
"loss": 0.9434, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8179487179487179, |
|
"grad_norm": 0.2122315913438797, |
|
"learning_rate": 9.484662576687117e-06, |
|
"loss": 0.9936, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 0.19928601384162903, |
|
"learning_rate": 9.481868469575908e-06, |
|
"loss": 0.9267, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.823076923076923, |
|
"grad_norm": 0.1821938306093216, |
|
"learning_rate": 9.479064039408867e-06, |
|
"loss": 0.9761, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8256410256410256, |
|
"grad_norm": 0.1640615314245224, |
|
"learning_rate": 9.476249228871069e-06, |
|
"loss": 0.9753, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8282051282051283, |
|
"grad_norm": 0.2164408564567566, |
|
"learning_rate": 9.4734239802225e-06, |
|
"loss": 1.009, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8307692307692308, |
|
"grad_norm": 0.17672689259052277, |
|
"learning_rate": 9.470588235294119e-06, |
|
"loss": 0.9376, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.15482589602470398, |
|
"learning_rate": 9.467741935483871e-06, |
|
"loss": 1.0459, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8358974358974359, |
|
"grad_norm": 0.1468273401260376, |
|
"learning_rate": 9.464885021752642e-06, |
|
"loss": 0.9625, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8384615384615385, |
|
"grad_norm": 0.1662525236606598, |
|
"learning_rate": 9.462017434620176e-06, |
|
"loss": 0.9609, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.841025641025641, |
|
"grad_norm": 0.20608198642730713, |
|
"learning_rate": 9.45913911416095e-06, |
|
"loss": 0.9673, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8435897435897436, |
|
"grad_norm": 0.18384887278079987, |
|
"learning_rate": 9.45625e-06, |
|
"loss": 1.02, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 0.1636429876089096, |
|
"learning_rate": 9.453350031308706e-06, |
|
"loss": 1.0665, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8487179487179487, |
|
"grad_norm": 0.16819709539413452, |
|
"learning_rate": 9.450439146800503e-06, |
|
"loss": 0.9494, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8512820512820513, |
|
"grad_norm": 0.16869591176509857, |
|
"learning_rate": 9.447517284726587e-06, |
|
"loss": 0.9362, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8538461538461538, |
|
"grad_norm": 0.17681722342967987, |
|
"learning_rate": 9.444584382871537e-06, |
|
"loss": 0.9596, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8564102564102564, |
|
"grad_norm": 0.1720973253250122, |
|
"learning_rate": 9.441640378548898e-06, |
|
"loss": 0.9371, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8589743589743589, |
|
"grad_norm": 0.1684177815914154, |
|
"learning_rate": 9.438685208596712e-06, |
|
"loss": 0.8739, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8615384615384616, |
|
"grad_norm": 0.15152432024478912, |
|
"learning_rate": 9.435718809373022e-06, |
|
"loss": 1.0489, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.8641025641025641, |
|
"grad_norm": 0.16250121593475342, |
|
"learning_rate": 9.43274111675127e-06, |
|
"loss": 0.9873, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8666666666666667, |
|
"grad_norm": 0.20848555862903595, |
|
"learning_rate": 9.429752066115703e-06, |
|
"loss": 1.1004, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8692307692307693, |
|
"grad_norm": 0.1785045713186264, |
|
"learning_rate": 9.426751592356688e-06, |
|
"loss": 1.0217, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8717948717948718, |
|
"grad_norm": 0.14325062930583954, |
|
"learning_rate": 9.423739629865986e-06, |
|
"loss": 1.0522, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8743589743589744, |
|
"grad_norm": 0.17043401300907135, |
|
"learning_rate": 9.420716112531971e-06, |
|
"loss": 0.9412, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.8769230769230769, |
|
"grad_norm": 0.17380043864250183, |
|
"learning_rate": 9.417680973734785e-06, |
|
"loss": 1.0975, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.8794871794871795, |
|
"grad_norm": 0.15861521661281586, |
|
"learning_rate": 9.414634146341465e-06, |
|
"loss": 0.9772, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.882051282051282, |
|
"grad_norm": 0.17549242079257965, |
|
"learning_rate": 9.411575562700965e-06, |
|
"loss": 1.0992, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8846153846153846, |
|
"grad_norm": 0.20035730302333832, |
|
"learning_rate": 9.408505154639175e-06, |
|
"loss": 0.9817, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8871794871794871, |
|
"grad_norm": 0.16777153313159943, |
|
"learning_rate": 9.405422853453843e-06, |
|
"loss": 0.9846, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.8897435897435897, |
|
"grad_norm": 0.17003268003463745, |
|
"learning_rate": 9.402328589909445e-06, |
|
"loss": 1.022, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.8923076923076924, |
|
"grad_norm": 0.17670491337776184, |
|
"learning_rate": 9.399222294232017e-06, |
|
"loss": 1.1117, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8948717948717949, |
|
"grad_norm": 0.18102656304836273, |
|
"learning_rate": 9.396103896103898e-06, |
|
"loss": 1.0409, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 0.15800277888774872, |
|
"learning_rate": 9.392973324658426e-06, |
|
"loss": 0.8649, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.17744143307209015, |
|
"learning_rate": 9.389830508474576e-06, |
|
"loss": 0.9096, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9025641025641026, |
|
"grad_norm": 0.16629469394683838, |
|
"learning_rate": 9.386675375571523e-06, |
|
"loss": 1.0208, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9051282051282051, |
|
"grad_norm": 0.15229813754558563, |
|
"learning_rate": 9.383507853403142e-06, |
|
"loss": 0.9423, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9076923076923077, |
|
"grad_norm": 0.40435972809791565, |
|
"learning_rate": 9.38032786885246e-06, |
|
"loss": 1.0585, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9102564102564102, |
|
"grad_norm": 0.20711062848567963, |
|
"learning_rate": 9.377135348226017e-06, |
|
"loss": 1.0715, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9128205128205128, |
|
"grad_norm": 0.20200040936470032, |
|
"learning_rate": 9.37393021724819e-06, |
|
"loss": 1.0494, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9153846153846154, |
|
"grad_norm": 0.1699497103691101, |
|
"learning_rate": 9.370712401055409e-06, |
|
"loss": 1.0363, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9179487179487179, |
|
"grad_norm": 0.17281508445739746, |
|
"learning_rate": 9.36748182419035e-06, |
|
"loss": 1.0238, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9205128205128205, |
|
"grad_norm": 0.18644343316555023, |
|
"learning_rate": 9.364238410596028e-06, |
|
"loss": 1.0655, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 0.16731494665145874, |
|
"learning_rate": 9.360982083609822e-06, |
|
"loss": 0.9855, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9256410256410257, |
|
"grad_norm": 0.17364031076431274, |
|
"learning_rate": 9.357712765957447e-06, |
|
"loss": 0.9505, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9282051282051282, |
|
"grad_norm": 0.1894925981760025, |
|
"learning_rate": 9.354430379746837e-06, |
|
"loss": 0.9665, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9307692307692308, |
|
"grad_norm": 0.1816585510969162, |
|
"learning_rate": 9.35113484646195e-06, |
|
"loss": 0.8783, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.177452951669693, |
|
"learning_rate": 9.347826086956523e-06, |
|
"loss": 1.0144, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9358974358974359, |
|
"grad_norm": 0.1650353968143463, |
|
"learning_rate": 9.344504021447722e-06, |
|
"loss": 0.9883, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9384615384615385, |
|
"grad_norm": 0.155875101685524, |
|
"learning_rate": 9.341168569509738e-06, |
|
"loss": 1.0195, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.941025641025641, |
|
"grad_norm": 0.19764171540737152, |
|
"learning_rate": 9.337819650067296e-06, |
|
"loss": 1.0666, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9435897435897436, |
|
"grad_norm": 0.15158161520957947, |
|
"learning_rate": 9.334457181389078e-06, |
|
"loss": 1.1297, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9461538461538461, |
|
"grad_norm": 0.16357675194740295, |
|
"learning_rate": 9.331081081081083e-06, |
|
"loss": 0.9474, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9487179487179487, |
|
"grad_norm": 0.17813360691070557, |
|
"learning_rate": 9.327691266079892e-06, |
|
"loss": 1.0161, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9512820512820512, |
|
"grad_norm": 0.15733803808689117, |
|
"learning_rate": 9.324287652645861e-06, |
|
"loss": 0.8616, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.9538461538461539, |
|
"grad_norm": 0.16512970626354218, |
|
"learning_rate": 9.320870156356221e-06, |
|
"loss": 0.906, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.9564102564102565, |
|
"grad_norm": 0.16653649508953094, |
|
"learning_rate": 9.317438692098092e-06, |
|
"loss": 0.9861, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.958974358974359, |
|
"grad_norm": 0.15374256670475006, |
|
"learning_rate": 9.313993174061434e-06, |
|
"loss": 0.9238, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 0.1622532606124878, |
|
"learning_rate": 9.310533515731875e-06, |
|
"loss": 0.9759, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9641025641025641, |
|
"grad_norm": 0.19126266241073608, |
|
"learning_rate": 9.307059629883482e-06, |
|
"loss": 0.9745, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9666666666666667, |
|
"grad_norm": 0.1595565676689148, |
|
"learning_rate": 9.303571428571428e-06, |
|
"loss": 0.9289, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.9692307692307692, |
|
"grad_norm": 0.17021501064300537, |
|
"learning_rate": 9.30006882312457e-06, |
|
"loss": 1.0675, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.9717948717948718, |
|
"grad_norm": 0.17533089220523834, |
|
"learning_rate": 9.296551724137932e-06, |
|
"loss": 1.0388, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.9743589743589743, |
|
"grad_norm": 0.16750235855579376, |
|
"learning_rate": 9.2930200414651e-06, |
|
"loss": 1.0436, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9769230769230769, |
|
"grad_norm": 0.15664179623126984, |
|
"learning_rate": 9.289473684210525e-06, |
|
"loss": 0.977, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9794871794871794, |
|
"grad_norm": 0.16151364147663116, |
|
"learning_rate": 9.285912560721721e-06, |
|
"loss": 1.0423, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.982051282051282, |
|
"grad_norm": 0.16488024592399597, |
|
"learning_rate": 9.282336578581363e-06, |
|
"loss": 0.9958, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.9846153846153847, |
|
"grad_norm": 0.2102440744638443, |
|
"learning_rate": 9.278745644599303e-06, |
|
"loss": 0.9997, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.9871794871794872, |
|
"grad_norm": 0.18937990069389343, |
|
"learning_rate": 9.275139664804471e-06, |
|
"loss": 1.0116, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9897435897435898, |
|
"grad_norm": 0.17054639756679535, |
|
"learning_rate": 9.27151854443667e-06, |
|
"loss": 0.9257, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.9923076923076923, |
|
"grad_norm": 0.16185376048088074, |
|
"learning_rate": 9.26788218793829e-06, |
|
"loss": 0.8911, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.9948717948717949, |
|
"grad_norm": 0.18112531304359436, |
|
"learning_rate": 9.26423049894589e-06, |
|
"loss": 1.0187, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.9974358974358974, |
|
"grad_norm": 0.17097817361354828, |
|
"learning_rate": 9.26056338028169e-06, |
|
"loss": 0.9781, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.33144575357437134, |
|
"learning_rate": 9.256880733944955e-06, |
|
"loss": 0.8241, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0025641025641026, |
|
"grad_norm": 0.1937527358531952, |
|
"learning_rate": 9.253182461103253e-06, |
|
"loss": 1.0755, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.005128205128205, |
|
"grad_norm": 0.15882588922977448, |
|
"learning_rate": 9.24946846208363e-06, |
|
"loss": 0.9799, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0076923076923077, |
|
"grad_norm": 0.16072547435760498, |
|
"learning_rate": 9.245738636363637e-06, |
|
"loss": 0.9872, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0102564102564102, |
|
"grad_norm": 0.15416628122329712, |
|
"learning_rate": 9.241992882562277e-06, |
|
"loss": 1.0529, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.0128205128205128, |
|
"grad_norm": 0.16685126721858978, |
|
"learning_rate": 9.238231098430814e-06, |
|
"loss": 0.8965, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0153846153846153, |
|
"grad_norm": 0.17164798080921173, |
|
"learning_rate": 9.23445318084346e-06, |
|
"loss": 1.0228, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.0179487179487179, |
|
"grad_norm": 0.1579882949590683, |
|
"learning_rate": 9.230659025787966e-06, |
|
"loss": 0.9462, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.0205128205128204, |
|
"grad_norm": 0.16084755957126617, |
|
"learning_rate": 9.226848528356067e-06, |
|
"loss": 1.0358, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.023076923076923, |
|
"grad_norm": 0.1533387154340744, |
|
"learning_rate": 9.223021582733813e-06, |
|
"loss": 0.9388, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 0.16010062396526337, |
|
"learning_rate": 9.21917808219178e-06, |
|
"loss": 1.053, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.028205128205128, |
|
"grad_norm": 0.16239330172538757, |
|
"learning_rate": 9.215317919075145e-06, |
|
"loss": 0.9812, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.0307692307692307, |
|
"grad_norm": 0.1662273406982422, |
|
"learning_rate": 9.211440984793628e-06, |
|
"loss": 1.0265, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.0333333333333334, |
|
"grad_norm": 0.18184800446033478, |
|
"learning_rate": 9.20754716981132e-06, |
|
"loss": 0.8951, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.035897435897436, |
|
"grad_norm": 0.17101332545280457, |
|
"learning_rate": 9.203636363636365e-06, |
|
"loss": 1.0448, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.0384615384615385, |
|
"grad_norm": 0.16575555503368378, |
|
"learning_rate": 9.199708454810497e-06, |
|
"loss": 0.974, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.041025641025641, |
|
"grad_norm": 0.18276239931583405, |
|
"learning_rate": 9.195763330898466e-06, |
|
"loss": 1.0457, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0435897435897437, |
|
"grad_norm": 0.1637968271970749, |
|
"learning_rate": 9.191800878477306e-06, |
|
"loss": 0.9338, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.0461538461538462, |
|
"grad_norm": 0.20241133868694305, |
|
"learning_rate": 9.18782098312546e-06, |
|
"loss": 1.0354, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0487179487179488, |
|
"grad_norm": 0.161885067820549, |
|
"learning_rate": 9.183823529411765e-06, |
|
"loss": 0.9205, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0512820512820513, |
|
"grad_norm": 0.14989648759365082, |
|
"learning_rate": 9.179808400884306e-06, |
|
"loss": 0.9291, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0538461538461539, |
|
"grad_norm": 0.1757401078939438, |
|
"learning_rate": 9.175775480059083e-06, |
|
"loss": 0.9478, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0564102564102564, |
|
"grad_norm": 0.1715121567249298, |
|
"learning_rate": 9.171724648408586e-06, |
|
"loss": 0.9509, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.058974358974359, |
|
"grad_norm": 0.1794794499874115, |
|
"learning_rate": 9.167655786350149e-06, |
|
"loss": 0.9052, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.0615384615384615, |
|
"grad_norm": 0.1767176240682602, |
|
"learning_rate": 9.163568773234201e-06, |
|
"loss": 0.8936, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.064102564102564, |
|
"grad_norm": 0.16628186404705048, |
|
"learning_rate": 9.15946348733234e-06, |
|
"loss": 0.9326, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.21831132471561432, |
|
"learning_rate": 9.155339805825244e-06, |
|
"loss": 1.0548, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.0692307692307692, |
|
"grad_norm": 0.19534535706043243, |
|
"learning_rate": 9.15119760479042e-06, |
|
"loss": 0.9584, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.0717948717948718, |
|
"grad_norm": 0.16996845602989197, |
|
"learning_rate": 9.147036759189797e-06, |
|
"loss": 0.9573, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.0743589743589743, |
|
"grad_norm": 0.16845408082008362, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.8904, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 0.1751023381948471, |
|
"learning_rate": 9.138658628485306e-06, |
|
"loss": 1.0049, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0794871794871794, |
|
"grad_norm": 0.17877094447612762, |
|
"learning_rate": 9.134441087613294e-06, |
|
"loss": 1.0011, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.082051282051282, |
|
"grad_norm": 0.17528527975082397, |
|
"learning_rate": 9.130204390613173e-06, |
|
"loss": 1.0129, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.0846153846153845, |
|
"grad_norm": 0.18258242309093475, |
|
"learning_rate": 9.125948406676782e-06, |
|
"loss": 1.098, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.087179487179487, |
|
"grad_norm": 0.19751989841461182, |
|
"learning_rate": 9.121673003802282e-06, |
|
"loss": 0.9406, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.0897435897435896, |
|
"grad_norm": 0.1751803457736969, |
|
"learning_rate": 9.117378048780488e-06, |
|
"loss": 0.9812, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.0923076923076924, |
|
"grad_norm": 0.16526196897029877, |
|
"learning_rate": 9.113063407181055e-06, |
|
"loss": 0.9926, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.094871794871795, |
|
"grad_norm": 0.17794327437877655, |
|
"learning_rate": 9.108728943338438e-06, |
|
"loss": 0.8838, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.0974358974358975, |
|
"grad_norm": 0.16672289371490479, |
|
"learning_rate": 9.104374520337684e-06, |
|
"loss": 0.9296, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.1750965267419815, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 0.9608, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.1025641025641026, |
|
"grad_norm": 0.31471797823905945, |
|
"learning_rate": 9.095605242868158e-06, |
|
"loss": 1.113, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1051282051282052, |
|
"grad_norm": 0.2636878192424774, |
|
"learning_rate": 9.091190108191653e-06, |
|
"loss": 1.0369, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.1076923076923078, |
|
"grad_norm": 0.16675381362438202, |
|
"learning_rate": 9.086754453911697e-06, |
|
"loss": 0.9234, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.1102564102564103, |
|
"grad_norm": 0.19578181207180023, |
|
"learning_rate": 9.082298136645965e-06, |
|
"loss": 0.8915, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.1128205128205129, |
|
"grad_norm": 0.15622036159038544, |
|
"learning_rate": 9.077821011673151e-06, |
|
"loss": 0.834, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.1153846153846154, |
|
"grad_norm": 0.1723235547542572, |
|
"learning_rate": 9.073322932917318e-06, |
|
"loss": 1.1672, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.117948717948718, |
|
"grad_norm": 0.16886036098003387, |
|
"learning_rate": 9.068803752931978e-06, |
|
"loss": 1.0947, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.1205128205128205, |
|
"grad_norm": 0.1680499017238617, |
|
"learning_rate": 9.064263322884012e-06, |
|
"loss": 1.0609, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.123076923076923, |
|
"grad_norm": 0.16996805369853973, |
|
"learning_rate": 9.059701492537314e-06, |
|
"loss": 0.9502, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.1256410256410256, |
|
"grad_norm": 0.17708763480186462, |
|
"learning_rate": 9.05511811023622e-06, |
|
"loss": 0.9767, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.1282051282051282, |
|
"grad_norm": 0.19685539603233337, |
|
"learning_rate": 9.050513022888715e-06, |
|
"loss": 0.957, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.1307692307692307, |
|
"grad_norm": 0.18247175216674805, |
|
"learning_rate": 9.045886075949368e-06, |
|
"loss": 0.9098, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.1333333333333333, |
|
"grad_norm": 0.1733454316854477, |
|
"learning_rate": 9.041237113402062e-06, |
|
"loss": 0.9007, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.1358974358974359, |
|
"grad_norm": 0.19381284713745117, |
|
"learning_rate": 9.036565977742447e-06, |
|
"loss": 0.8715, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.1384615384615384, |
|
"grad_norm": 0.19639398157596588, |
|
"learning_rate": 9.031872509960161e-06, |
|
"loss": 1.0139, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.141025641025641, |
|
"grad_norm": 0.17254269123077393, |
|
"learning_rate": 9.027156549520768e-06, |
|
"loss": 1.0033, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.1435897435897435, |
|
"grad_norm": 0.16717708110809326, |
|
"learning_rate": 9.022417934347479e-06, |
|
"loss": 0.9119, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.146153846153846, |
|
"grad_norm": 0.19381357729434967, |
|
"learning_rate": 9.01765650080257e-06, |
|
"loss": 1.058, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.1487179487179486, |
|
"grad_norm": 0.1698828637599945, |
|
"learning_rate": 9.012872083668544e-06, |
|
"loss": 1.0112, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.1512820512820512, |
|
"grad_norm": 0.1623694747686386, |
|
"learning_rate": 9.008064516129034e-06, |
|
"loss": 0.9848, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 0.22092927992343903, |
|
"learning_rate": 9.003233629749395e-06, |
|
"loss": 0.9878, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1564102564102563, |
|
"grad_norm": 0.17867566645145416, |
|
"learning_rate": 8.998379254457052e-06, |
|
"loss": 1.0226, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.1589743589743589, |
|
"grad_norm": 0.1954340934753418, |
|
"learning_rate": 8.993501218521528e-06, |
|
"loss": 1.044, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.1615384615384616, |
|
"grad_norm": 0.1862252950668335, |
|
"learning_rate": 8.988599348534203e-06, |
|
"loss": 0.9865, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.1641025641025642, |
|
"grad_norm": 0.2021472156047821, |
|
"learning_rate": 8.983673469387756e-06, |
|
"loss": 0.9522, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.179282084107399, |
|
"learning_rate": 8.97872340425532e-06, |
|
"loss": 0.9702, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.1692307692307693, |
|
"grad_norm": 0.16460926830768585, |
|
"learning_rate": 8.973748974569319e-06, |
|
"loss": 1.031, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.1717948717948719, |
|
"grad_norm": 0.17866218090057373, |
|
"learning_rate": 8.968750000000001e-06, |
|
"loss": 1.0303, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.1743589743589744, |
|
"grad_norm": 0.21034833788871765, |
|
"learning_rate": 8.963726298433634e-06, |
|
"loss": 0.9513, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.176923076923077, |
|
"grad_norm": 0.24306456744670868, |
|
"learning_rate": 8.958677685950415e-06, |
|
"loss": 0.9149, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.1794871794871795, |
|
"grad_norm": 0.21025407314300537, |
|
"learning_rate": 8.95360397680199e-06, |
|
"loss": 1.097, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.182051282051282, |
|
"grad_norm": 0.18221627175807953, |
|
"learning_rate": 8.948504983388704e-06, |
|
"loss": 0.9259, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.1846153846153846, |
|
"grad_norm": 0.18507330119609833, |
|
"learning_rate": 8.94338051623647e-06, |
|
"loss": 1.075, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.1871794871794872, |
|
"grad_norm": 0.20780031383037567, |
|
"learning_rate": 8.938230383973288e-06, |
|
"loss": 1.0373, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.1897435897435897, |
|
"grad_norm": 0.18843470513820648, |
|
"learning_rate": 8.93305439330544e-06, |
|
"loss": 0.9142, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.1923076923076923, |
|
"grad_norm": 0.20828303694725037, |
|
"learning_rate": 8.92785234899329e-06, |
|
"loss": 0.9692, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.1948717948717948, |
|
"grad_norm": 0.15796665847301483, |
|
"learning_rate": 8.922624053826745e-06, |
|
"loss": 0.936, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.1974358974358974, |
|
"grad_norm": 0.17404665052890778, |
|
"learning_rate": 8.917369308600336e-06, |
|
"loss": 0.9466, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.20071259140968323, |
|
"learning_rate": 8.912087912087912e-06, |
|
"loss": 0.9609, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.2025641025641025, |
|
"grad_norm": 0.21828190982341766, |
|
"learning_rate": 8.90677966101695e-06, |
|
"loss": 0.9867, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.205128205128205, |
|
"grad_norm": 0.17222779989242554, |
|
"learning_rate": 8.901444350042482e-06, |
|
"loss": 1.055, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2076923076923076, |
|
"grad_norm": 0.17729543149471283, |
|
"learning_rate": 8.896081771720614e-06, |
|
"loss": 1.0886, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.2102564102564102, |
|
"grad_norm": 0.17077124118804932, |
|
"learning_rate": 8.89069171648164e-06, |
|
"loss": 0.9318, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.2128205128205127, |
|
"grad_norm": 0.1938466727733612, |
|
"learning_rate": 8.88527397260274e-06, |
|
"loss": 0.9199, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.2153846153846155, |
|
"grad_norm": 0.21834906935691833, |
|
"learning_rate": 8.879828326180258e-06, |
|
"loss": 1.0522, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.217948717948718, |
|
"grad_norm": 0.18741706013679504, |
|
"learning_rate": 8.87435456110155e-06, |
|
"loss": 0.9653, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2205128205128206, |
|
"grad_norm": 0.18282394111156464, |
|
"learning_rate": 8.868852459016393e-06, |
|
"loss": 1.0237, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.2230769230769232, |
|
"grad_norm": 0.17378900945186615, |
|
"learning_rate": 8.86332179930796e-06, |
|
"loss": 1.055, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.2256410256410257, |
|
"grad_norm": 0.21999238431453705, |
|
"learning_rate": 8.857762359063314e-06, |
|
"loss": 0.97, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.2282051282051283, |
|
"grad_norm": 0.18467235565185547, |
|
"learning_rate": 8.852173913043478e-06, |
|
"loss": 1.0498, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 0.16720303893089294, |
|
"learning_rate": 8.846556233653009e-06, |
|
"loss": 0.9951, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.2333333333333334, |
|
"grad_norm": 0.16237983107566833, |
|
"learning_rate": 8.840909090909092e-06, |
|
"loss": 1.0361, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.235897435897436, |
|
"grad_norm": 0.15937431156635284, |
|
"learning_rate": 8.835232252410167e-06, |
|
"loss": 0.9251, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.2384615384615385, |
|
"grad_norm": 0.17901954054832458, |
|
"learning_rate": 8.829525483304043e-06, |
|
"loss": 1.0279, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.241025641025641, |
|
"grad_norm": 0.21782898902893066, |
|
"learning_rate": 8.823788546255506e-06, |
|
"loss": 1.0042, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.2435897435897436, |
|
"grad_norm": 0.172428160905838, |
|
"learning_rate": 8.818021201413429e-06, |
|
"loss": 1.069, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.2461538461538462, |
|
"grad_norm": 0.1770864725112915, |
|
"learning_rate": 8.812223206377328e-06, |
|
"loss": 1.0291, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.2487179487179487, |
|
"grad_norm": 0.1773838996887207, |
|
"learning_rate": 8.80639431616341e-06, |
|
"loss": 1.0329, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.2512820512820513, |
|
"grad_norm": 0.20391307771205902, |
|
"learning_rate": 8.80053428317008e-06, |
|
"loss": 0.9828, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.2538461538461538, |
|
"grad_norm": 0.16360723972320557, |
|
"learning_rate": 8.794642857142858e-06, |
|
"loss": 0.9086, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.2564102564102564, |
|
"grad_norm": 0.21982532739639282, |
|
"learning_rate": 8.788719785138765e-06, |
|
"loss": 1.097, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.258974358974359, |
|
"grad_norm": 0.19650976359844208, |
|
"learning_rate": 8.782764811490128e-06, |
|
"loss": 1.014, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.2615384615384615, |
|
"grad_norm": 0.1792370080947876, |
|
"learning_rate": 8.776777677767778e-06, |
|
"loss": 0.9578, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.264102564102564, |
|
"grad_norm": 0.2550472617149353, |
|
"learning_rate": 8.770758122743683e-06, |
|
"loss": 0.985, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 0.18580225110054016, |
|
"learning_rate": 8.764705882352942e-06, |
|
"loss": 0.9796, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.2692307692307692, |
|
"grad_norm": 0.2560383975505829, |
|
"learning_rate": 8.758620689655173e-06, |
|
"loss": 1.0939, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.2717948717948717, |
|
"grad_norm": 0.18652617931365967, |
|
"learning_rate": 8.752502274795269e-06, |
|
"loss": 0.9389, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.2743589743589743, |
|
"grad_norm": 0.170726478099823, |
|
"learning_rate": 8.746350364963505e-06, |
|
"loss": 1.0236, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.2769230769230768, |
|
"grad_norm": 0.20897836983203888, |
|
"learning_rate": 8.740164684354986e-06, |
|
"loss": 0.9533, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.2794871794871794, |
|
"grad_norm": 0.19295988976955414, |
|
"learning_rate": 8.73394495412844e-06, |
|
"loss": 0.9425, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 0.20471826195716858, |
|
"learning_rate": 8.727690892364306e-06, |
|
"loss": 0.9558, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2846153846153845, |
|
"grad_norm": 0.18632952868938446, |
|
"learning_rate": 8.72140221402214e-06, |
|
"loss": 0.9701, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.287179487179487, |
|
"grad_norm": 0.20619980990886688, |
|
"learning_rate": 8.715078630897317e-06, |
|
"loss": 0.9442, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.2897435897435898, |
|
"grad_norm": 0.16518618166446686, |
|
"learning_rate": 8.708719851576993e-06, |
|
"loss": 1.0207, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.2923076923076924, |
|
"grad_norm": 0.1911863535642624, |
|
"learning_rate": 8.70232558139535e-06, |
|
"loss": 1.0024, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.294871794871795, |
|
"grad_norm": 1.0833367109298706, |
|
"learning_rate": 8.695895522388062e-06, |
|
"loss": 0.9605, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.2974358974358975, |
|
"grad_norm": 0.18326595425605774, |
|
"learning_rate": 8.689429373246025e-06, |
|
"loss": 0.9348, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.18599998950958252, |
|
"learning_rate": 8.682926829268294e-06, |
|
"loss": 1.2229, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.3025641025641026, |
|
"grad_norm": 0.19638995826244354, |
|
"learning_rate": 8.676387582314206e-06, |
|
"loss": 0.9343, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.3051282051282052, |
|
"grad_norm": 0.1773020327091217, |
|
"learning_rate": 8.669811320754717e-06, |
|
"loss": 0.9836, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.3076923076923077, |
|
"grad_norm": 0.19725504517555237, |
|
"learning_rate": 8.663197729422896e-06, |
|
"loss": 0.9532, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3102564102564103, |
|
"grad_norm": 0.18866512179374695, |
|
"learning_rate": 8.656546489563568e-06, |
|
"loss": 0.9729, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.3128205128205128, |
|
"grad_norm": 0.18089522421360016, |
|
"learning_rate": 8.649857278782113e-06, |
|
"loss": 1.0848, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.3153846153846154, |
|
"grad_norm": 0.18652409315109253, |
|
"learning_rate": 8.643129770992367e-06, |
|
"loss": 0.9687, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.317948717948718, |
|
"grad_norm": 0.19303199648857117, |
|
"learning_rate": 8.636363636363635e-06, |
|
"loss": 1.0083, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.3205128205128205, |
|
"grad_norm": 0.207601860165596, |
|
"learning_rate": 8.629558541266796e-06, |
|
"loss": 0.9553, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.323076923076923, |
|
"grad_norm": 0.18684937059879303, |
|
"learning_rate": 8.622714148219442e-06, |
|
"loss": 1.0599, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.3256410256410256, |
|
"grad_norm": 0.1821713149547577, |
|
"learning_rate": 8.615830115830118e-06, |
|
"loss": 1.0457, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.3282051282051281, |
|
"grad_norm": 0.1726110726594925, |
|
"learning_rate": 8.608906098741529e-06, |
|
"loss": 0.8477, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.3307692307692307, |
|
"grad_norm": 0.17926542460918427, |
|
"learning_rate": 8.601941747572816e-06, |
|
"loss": 0.9752, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.1952233761548996, |
|
"learning_rate": 8.59493670886076e-06, |
|
"loss": 0.9636, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.3358974358974358, |
|
"grad_norm": 0.1748773604631424, |
|
"learning_rate": 8.587890625000001e-06, |
|
"loss": 0.9876, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.3384615384615386, |
|
"grad_norm": 0.1747111827135086, |
|
"learning_rate": 8.580803134182175e-06, |
|
"loss": 0.9405, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.3410256410256411, |
|
"grad_norm": 0.18903814256191254, |
|
"learning_rate": 8.573673870333989e-06, |
|
"loss": 1.0787, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.3435897435897437, |
|
"grad_norm": 0.16885128617286682, |
|
"learning_rate": 8.566502463054187e-06, |
|
"loss": 0.9353, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.3461538461538463, |
|
"grad_norm": 0.19254456460475922, |
|
"learning_rate": 8.559288537549409e-06, |
|
"loss": 0.9973, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.3487179487179488, |
|
"grad_norm": 0.18546819686889648, |
|
"learning_rate": 8.552031714568882e-06, |
|
"loss": 0.9529, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.3512820512820514, |
|
"grad_norm": 0.17594410479068756, |
|
"learning_rate": 8.544731610337974e-06, |
|
"loss": 1.0129, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.353846153846154, |
|
"grad_norm": 0.19100527465343475, |
|
"learning_rate": 8.537387836490528e-06, |
|
"loss": 1.043, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.3564102564102565, |
|
"grad_norm": 0.18892578780651093, |
|
"learning_rate": 8.53e-06, |
|
"loss": 0.953, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.358974358974359, |
|
"grad_norm": 0.1750698834657669, |
|
"learning_rate": 8.522567703109327e-06, |
|
"loss": 1.0616, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3615384615384616, |
|
"grad_norm": 0.17712536454200745, |
|
"learning_rate": 8.515090543259558e-06, |
|
"loss": 0.8927, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.3641025641025641, |
|
"grad_norm": 0.1855439990758896, |
|
"learning_rate": 8.507568113017155e-06, |
|
"loss": 0.995, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.3666666666666667, |
|
"grad_norm": 0.17967894673347473, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.8988, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.3692307692307693, |
|
"grad_norm": 0.167103573679924, |
|
"learning_rate": 8.492385786802031e-06, |
|
"loss": 0.9392, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.3717948717948718, |
|
"grad_norm": 0.1761719435453415, |
|
"learning_rate": 8.484725050916498e-06, |
|
"loss": 0.9431, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.3743589743589744, |
|
"grad_norm": 0.19669947028160095, |
|
"learning_rate": 8.477017364657814e-06, |
|
"loss": 0.956, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.376923076923077, |
|
"grad_norm": 0.17305508255958557, |
|
"learning_rate": 8.469262295081969e-06, |
|
"loss": 1.0234, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.3794871794871795, |
|
"grad_norm": 0.18830622732639313, |
|
"learning_rate": 8.461459403905446e-06, |
|
"loss": 0.9498, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.382051282051282, |
|
"grad_norm": 0.20369920134544373, |
|
"learning_rate": 8.453608247422681e-06, |
|
"loss": 1.1387, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 0.18848799169063568, |
|
"learning_rate": 8.445708376421923e-06, |
|
"loss": 0.9122, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.3871794871794871, |
|
"grad_norm": 0.17956501245498657, |
|
"learning_rate": 8.437759336099585e-06, |
|
"loss": 0.9424, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.3897435897435897, |
|
"grad_norm": 0.19759565591812134, |
|
"learning_rate": 8.429760665972945e-06, |
|
"loss": 0.9336, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.3923076923076922, |
|
"grad_norm": 0.20953185856342316, |
|
"learning_rate": 8.421711899791232e-06, |
|
"loss": 0.9995, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.3948717948717948, |
|
"grad_norm": 0.1723688542842865, |
|
"learning_rate": 8.413612565445026e-06, |
|
"loss": 0.9328, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.3974358974358974, |
|
"grad_norm": 0.16942423582077026, |
|
"learning_rate": 8.405462184873949e-06, |
|
"loss": 0.9179, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.16917023062705994, |
|
"learning_rate": 8.397260273972604e-06, |
|
"loss": 1.0107, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.4025641025641025, |
|
"grad_norm": 0.18283595144748688, |
|
"learning_rate": 8.389006342494715e-06, |
|
"loss": 0.8784, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.405128205128205, |
|
"grad_norm": 0.17370331287384033, |
|
"learning_rate": 8.380699893955462e-06, |
|
"loss": 1.1566, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.4076923076923076, |
|
"grad_norm": 0.21643978357315063, |
|
"learning_rate": 8.372340425531915e-06, |
|
"loss": 1.045, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.4102564102564101, |
|
"grad_norm": 0.18621404469013214, |
|
"learning_rate": 8.36392742796158e-06, |
|
"loss": 0.9533, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4128205128205127, |
|
"grad_norm": 0.1949056088924408, |
|
"learning_rate": 8.355460385438972e-06, |
|
"loss": 1.0161, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.4153846153846155, |
|
"grad_norm": 0.1903102844953537, |
|
"learning_rate": 8.346938775510205e-06, |
|
"loss": 1.047, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.417948717948718, |
|
"grad_norm": 0.17839354276657104, |
|
"learning_rate": 8.338362068965518e-06, |
|
"loss": 0.9366, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.4205128205128206, |
|
"grad_norm": 0.18962249159812927, |
|
"learning_rate": 8.32972972972973e-06, |
|
"loss": 0.9484, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.4230769230769231, |
|
"grad_norm": 0.17600049078464508, |
|
"learning_rate": 8.321041214750544e-06, |
|
"loss": 0.9337, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.4256410256410257, |
|
"grad_norm": 0.20685282349586487, |
|
"learning_rate": 8.312295973884657e-06, |
|
"loss": 0.9831, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.4282051282051282, |
|
"grad_norm": 0.20490646362304688, |
|
"learning_rate": 8.303493449781661e-06, |
|
"loss": 1.0035, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.4307692307692308, |
|
"grad_norm": 0.17430691421031952, |
|
"learning_rate": 8.294633077765607e-06, |
|
"loss": 0.8622, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.4333333333333333, |
|
"grad_norm": 0.2322288304567337, |
|
"learning_rate": 8.285714285714285e-06, |
|
"loss": 0.9546, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.435897435897436, |
|
"grad_norm": 0.19194380939006805, |
|
"learning_rate": 8.276736493936054e-06, |
|
"loss": 1.0656, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.4384615384615385, |
|
"grad_norm": 0.1931033879518509, |
|
"learning_rate": 8.267699115044248e-06, |
|
"loss": 0.9399, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.441025641025641, |
|
"grad_norm": 0.184538334608078, |
|
"learning_rate": 8.25860155382908e-06, |
|
"loss": 0.9948, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.4435897435897436, |
|
"grad_norm": 0.19109323620796204, |
|
"learning_rate": 8.249443207126949e-06, |
|
"loss": 0.9874, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.4461538461538461, |
|
"grad_norm": 0.1646609902381897, |
|
"learning_rate": 8.24022346368715e-06, |
|
"loss": 0.9157, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.4487179487179487, |
|
"grad_norm": 0.19419412314891815, |
|
"learning_rate": 8.230941704035874e-06, |
|
"loss": 0.9856, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.4512820512820512, |
|
"grad_norm": 0.18451392650604248, |
|
"learning_rate": 8.221597300337459e-06, |
|
"loss": 0.9303, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.4538461538461538, |
|
"grad_norm": 0.20760126411914825, |
|
"learning_rate": 8.212189616252821e-06, |
|
"loss": 1.1956, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.4564102564102563, |
|
"grad_norm": 0.2049357295036316, |
|
"learning_rate": 8.202718006795016e-06, |
|
"loss": 0.9186, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.458974358974359, |
|
"grad_norm": 0.18056929111480713, |
|
"learning_rate": 8.193181818181819e-06, |
|
"loss": 0.9874, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.4615384615384617, |
|
"grad_norm": 0.2029920220375061, |
|
"learning_rate": 8.18358038768529e-06, |
|
"loss": 1.0627, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.4641025641025642, |
|
"grad_norm": 0.1772759109735489, |
|
"learning_rate": 8.173913043478263e-06, |
|
"loss": 0.9109, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 0.2249906063079834, |
|
"learning_rate": 8.164179104477612e-06, |
|
"loss": 0.92, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.4692307692307693, |
|
"grad_norm": 0.1960502713918686, |
|
"learning_rate": 8.154377880184333e-06, |
|
"loss": 1.07, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.471794871794872, |
|
"grad_norm": 0.17915907502174377, |
|
"learning_rate": 8.14450867052023e-06, |
|
"loss": 1.0765, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.4743589743589745, |
|
"grad_norm": 0.214991495013237, |
|
"learning_rate": 8.134570765661253e-06, |
|
"loss": 0.9856, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.476923076923077, |
|
"grad_norm": 0.19141773879528046, |
|
"learning_rate": 8.124563445867288e-06, |
|
"loss": 1.0069, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.4794871794871796, |
|
"grad_norm": 0.18558935821056366, |
|
"learning_rate": 8.114485981308412e-06, |
|
"loss": 0.9061, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.4820512820512821, |
|
"grad_norm": 0.2104201316833496, |
|
"learning_rate": 8.104337631887457e-06, |
|
"loss": 0.9805, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.4846153846153847, |
|
"grad_norm": 0.18049705028533936, |
|
"learning_rate": 8.094117647058823e-06, |
|
"loss": 0.9658, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.4871794871794872, |
|
"grad_norm": 0.22525040805339813, |
|
"learning_rate": 8.083825265643448e-06, |
|
"loss": 1.0575, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.4897435897435898, |
|
"grad_norm": 0.20596688985824585, |
|
"learning_rate": 8.07345971563981e-06, |
|
"loss": 0.8823, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.4923076923076923, |
|
"grad_norm": 0.24059003591537476, |
|
"learning_rate": 8.063020214030916e-06, |
|
"loss": 0.9827, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.494871794871795, |
|
"grad_norm": 0.18533092737197876, |
|
"learning_rate": 8.052505966587113e-06, |
|
"loss": 1.0123, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.4974358974358974, |
|
"grad_norm": 0.20136979222297668, |
|
"learning_rate": 8.04191616766467e-06, |
|
"loss": 1.0111, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.19839423894882202, |
|
"learning_rate": 8.03125e-06, |
|
"loss": 1.0131, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.5025641025641026, |
|
"grad_norm": 0.18837936222553253, |
|
"learning_rate": 8.020506634499398e-06, |
|
"loss": 1.166, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.505128205128205, |
|
"grad_norm": 0.18904945254325867, |
|
"learning_rate": 8.009685230024214e-06, |
|
"loss": 0.9491, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.5076923076923077, |
|
"grad_norm": 0.17879720032215118, |
|
"learning_rate": 7.998784933171326e-06, |
|
"loss": 1.0575, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.5102564102564102, |
|
"grad_norm": 0.19607414305210114, |
|
"learning_rate": 7.98780487804878e-06, |
|
"loss": 1.0261, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.5128205128205128, |
|
"grad_norm": 0.23364603519439697, |
|
"learning_rate": 7.97674418604651e-06, |
|
"loss": 0.951, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5153846153846153, |
|
"grad_norm": 0.20051056146621704, |
|
"learning_rate": 7.965601965601966e-06, |
|
"loss": 1.2431, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.5179487179487179, |
|
"grad_norm": 0.19472134113311768, |
|
"learning_rate": 7.954377311960544e-06, |
|
"loss": 1.0066, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.5205128205128204, |
|
"grad_norm": 0.21720701456069946, |
|
"learning_rate": 7.943069306930693e-06, |
|
"loss": 0.9888, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.523076923076923, |
|
"grad_norm": 0.18797412514686584, |
|
"learning_rate": 7.93167701863354e-06, |
|
"loss": 0.9282, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.5256410256410255, |
|
"grad_norm": 0.18229195475578308, |
|
"learning_rate": 7.920199501246883e-06, |
|
"loss": 1.1075, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.528205128205128, |
|
"grad_norm": 0.20988033711910248, |
|
"learning_rate": 7.90863579474343e-06, |
|
"loss": 0.9834, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.5307692307692307, |
|
"grad_norm": 0.18902920186519623, |
|
"learning_rate": 7.896984924623117e-06, |
|
"loss": 1.0285, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 0.2365204393863678, |
|
"learning_rate": 7.885245901639344e-06, |
|
"loss": 1.0288, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.5358974358974358, |
|
"grad_norm": 0.18257446587085724, |
|
"learning_rate": 7.873417721518988e-06, |
|
"loss": 1.0293, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.17291095852851868, |
|
"learning_rate": 7.861499364675985e-06, |
|
"loss": 0.9189, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.5410256410256409, |
|
"grad_norm": 0.1902029812335968, |
|
"learning_rate": 7.849489795918368e-06, |
|
"loss": 0.8937, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.5435897435897434, |
|
"grad_norm": 0.17989574372768402, |
|
"learning_rate": 7.837387964148529e-06, |
|
"loss": 1.0091, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.546153846153846, |
|
"grad_norm": 0.19586458802223206, |
|
"learning_rate": 7.825192802056556e-06, |
|
"loss": 0.9092, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.5487179487179488, |
|
"grad_norm": 0.2133467197418213, |
|
"learning_rate": 7.812903225806452e-06, |
|
"loss": 0.957, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.5512820512820513, |
|
"grad_norm": 0.22505982220172882, |
|
"learning_rate": 7.800518134715025e-06, |
|
"loss": 1.0118, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.5538461538461539, |
|
"grad_norm": 0.20532438158988953, |
|
"learning_rate": 7.788036410923278e-06, |
|
"loss": 0.9181, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.5564102564102564, |
|
"grad_norm": 0.17881132662296295, |
|
"learning_rate": 7.775456919060053e-06, |
|
"loss": 1.0308, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.558974358974359, |
|
"grad_norm": 0.21090662479400635, |
|
"learning_rate": 7.762778505897773e-06, |
|
"loss": 1.1082, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.5615384615384615, |
|
"grad_norm": 0.223121777176857, |
|
"learning_rate": 7.75e-06, |
|
"loss": 0.9349, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.564102564102564, |
|
"grad_norm": 0.20706158876419067, |
|
"learning_rate": 7.737120211360633e-06, |
|
"loss": 1.0369, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.5666666666666667, |
|
"grad_norm": 0.19180113077163696, |
|
"learning_rate": 7.724137931034483e-06, |
|
"loss": 0.917, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.5692307692307692, |
|
"grad_norm": 0.19626112282276154, |
|
"learning_rate": 7.711051930758989e-06, |
|
"loss": 1.0926, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.5717948717948718, |
|
"grad_norm": 0.19783137738704681, |
|
"learning_rate": 7.697860962566846e-06, |
|
"loss": 0.9433, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.5743589743589743, |
|
"grad_norm": 0.21266983449459076, |
|
"learning_rate": 7.684563758389262e-06, |
|
"loss": 1.0266, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.5769230769230769, |
|
"grad_norm": 0.1945042610168457, |
|
"learning_rate": 7.671159029649595e-06, |
|
"loss": 0.9966, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.5794871794871796, |
|
"grad_norm": 0.1982981264591217, |
|
"learning_rate": 7.657645466847092e-06, |
|
"loss": 0.9904, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.5820512820512822, |
|
"grad_norm": 0.1927499920129776, |
|
"learning_rate": 7.644021739130436e-06, |
|
"loss": 1.0763, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.5846153846153848, |
|
"grad_norm": 0.19995129108428955, |
|
"learning_rate": 7.630286493860846e-06, |
|
"loss": 0.9884, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.5871794871794873, |
|
"grad_norm": 0.17647652328014374, |
|
"learning_rate": 7.616438356164383e-06, |
|
"loss": 1.0011, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.5897435897435899, |
|
"grad_norm": 0.1947464793920517, |
|
"learning_rate": 7.6024759284731776e-06, |
|
"loss": 1.0027, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.5923076923076924, |
|
"grad_norm": 0.20255906879901886, |
|
"learning_rate": 7.5883977900552484e-06, |
|
"loss": 0.9758, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.594871794871795, |
|
"grad_norm": 0.21405860781669617, |
|
"learning_rate": 7.574202496532593e-06, |
|
"loss": 1.1207, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.5974358974358975, |
|
"grad_norm": 0.16839265823364258, |
|
"learning_rate": 7.559888579387188e-06, |
|
"loss": 0.9939, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.19284895062446594, |
|
"learning_rate": 7.545454545454545e-06, |
|
"loss": 1.0519, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.6025641025641026, |
|
"grad_norm": 0.1863621473312378, |
|
"learning_rate": 7.5308988764044946e-06, |
|
"loss": 0.9174, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.6051282051282052, |
|
"grad_norm": 0.2013963907957077, |
|
"learning_rate": 7.516220028208745e-06, |
|
"loss": 0.9832, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.6076923076923078, |
|
"grad_norm": 0.18340826034545898, |
|
"learning_rate": 7.501416430594901e-06, |
|
"loss": 0.8789, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.6102564102564103, |
|
"grad_norm": 0.1774785965681076, |
|
"learning_rate": 7.486486486486487e-06, |
|
"loss": 1.1, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.6128205128205129, |
|
"grad_norm": 0.18885089457035065, |
|
"learning_rate": 7.471428571428571e-06, |
|
"loss": 0.9079, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.6153846153846154, |
|
"grad_norm": 0.19248345494270325, |
|
"learning_rate": 7.456241032998566e-06, |
|
"loss": 0.9162, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.617948717948718, |
|
"grad_norm": 0.17770878970623016, |
|
"learning_rate": 7.440922190201729e-06, |
|
"loss": 0.9829, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.6205128205128205, |
|
"grad_norm": 0.19071798026561737, |
|
"learning_rate": 7.42547033285094e-06, |
|
"loss": 0.9304, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.623076923076923, |
|
"grad_norm": 0.1921025514602661, |
|
"learning_rate": 7.409883720930233e-06, |
|
"loss": 0.9004, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.6256410256410256, |
|
"grad_norm": 0.21452969312667847, |
|
"learning_rate": 7.394160583941606e-06, |
|
"loss": 0.9945, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.6282051282051282, |
|
"grad_norm": 0.16074183583259583, |
|
"learning_rate": 7.378299120234605e-06, |
|
"loss": 0.925, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.6307692307692307, |
|
"grad_norm": 0.1816839724779129, |
|
"learning_rate": 7.362297496318113e-06, |
|
"loss": 0.9164, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.6333333333333333, |
|
"grad_norm": 0.19317786395549774, |
|
"learning_rate": 7.346153846153847e-06, |
|
"loss": 0.943, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.6358974358974359, |
|
"grad_norm": 0.21708151698112488, |
|
"learning_rate": 7.329866270430907e-06, |
|
"loss": 0.9576, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.6384615384615384, |
|
"grad_norm": 0.200921893119812, |
|
"learning_rate": 7.313432835820895e-06, |
|
"loss": 0.9782, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.641025641025641, |
|
"grad_norm": 0.1886773556470871, |
|
"learning_rate": 7.2968515742128935e-06, |
|
"loss": 0.989, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.6435897435897435, |
|
"grad_norm": 0.19396939873695374, |
|
"learning_rate": 7.280120481927711e-06, |
|
"loss": 1.026, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.646153846153846, |
|
"grad_norm": 0.21198135614395142, |
|
"learning_rate": 7.263237518910742e-06, |
|
"loss": 1.1656, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.6487179487179486, |
|
"grad_norm": 0.22808434069156647, |
|
"learning_rate": 7.246200607902737e-06, |
|
"loss": 1.0588, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.6512820512820512, |
|
"grad_norm": 0.19930703938007355, |
|
"learning_rate": 7.229007633587788e-06, |
|
"loss": 0.9826, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.6538461538461537, |
|
"grad_norm": 0.188712477684021, |
|
"learning_rate": 7.211656441717792e-06, |
|
"loss": 1.0296, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.6564102564102563, |
|
"grad_norm": 0.2129139006137848, |
|
"learning_rate": 7.194144838212635e-06, |
|
"loss": 1.1525, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.6589743589743589, |
|
"grad_norm": 0.20264121890068054, |
|
"learning_rate": 7.176470588235295e-06, |
|
"loss": 0.9398, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.6615384615384614, |
|
"grad_norm": 0.18227992951869965, |
|
"learning_rate": 7.1586314152410585e-06, |
|
"loss": 1.0267, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.664102564102564, |
|
"grad_norm": 0.1936773806810379, |
|
"learning_rate": 7.140625e-06, |
|
"loss": 0.8565, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.1949433982372284, |
|
"learning_rate": 7.122448979591836e-06, |
|
"loss": 1.0448, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.669230769230769, |
|
"grad_norm": 0.19935406744480133, |
|
"learning_rate": 7.104100946372239e-06, |
|
"loss": 0.9539, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.6717948717948716, |
|
"grad_norm": 0.2263110727071762, |
|
"learning_rate": 7.085578446909666e-06, |
|
"loss": 0.9838, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.6743589743589744, |
|
"grad_norm": 0.2188168615102768, |
|
"learning_rate": 7.06687898089172e-06, |
|
"loss": 0.9958, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.676923076923077, |
|
"grad_norm": 0.2003227025270462, |
|
"learning_rate": 7.048e-06, |
|
"loss": 1.0545, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.6794871794871795, |
|
"grad_norm": 0.18214313685894012, |
|
"learning_rate": 7.028938906752412e-06, |
|
"loss": 0.9917, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.682051282051282, |
|
"grad_norm": 0.1975235790014267, |
|
"learning_rate": 7.009693053311792e-06, |
|
"loss": 1.0347, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.6846153846153846, |
|
"grad_norm": 0.2402488738298416, |
|
"learning_rate": 6.990259740259741e-06, |
|
"loss": 0.8716, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.6871794871794872, |
|
"grad_norm": 0.20426467061042786, |
|
"learning_rate": 6.970636215334422e-06, |
|
"loss": 0.9865, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.6897435897435897, |
|
"grad_norm": 0.22642633318901062, |
|
"learning_rate": 6.950819672131147e-06, |
|
"loss": 0.9125, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.6923076923076923, |
|
"grad_norm": 0.23084644973278046, |
|
"learning_rate": 6.930807248764415e-06, |
|
"loss": 0.9856, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.6948717948717948, |
|
"grad_norm": 0.22062422335147858, |
|
"learning_rate": 6.910596026490067e-06, |
|
"loss": 1.055, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.6974358974358974, |
|
"grad_norm": 0.19819729030132294, |
|
"learning_rate": 6.89018302828619e-06, |
|
"loss": 1.0142, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.2273205816745758, |
|
"learning_rate": 6.869565217391305e-06, |
|
"loss": 0.9948, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.7025641025641025, |
|
"grad_norm": 0.2149330973625183, |
|
"learning_rate": 6.848739495798319e-06, |
|
"loss": 0.9062, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.7051282051282053, |
|
"grad_norm": 0.17445141077041626, |
|
"learning_rate": 6.827702702702703e-06, |
|
"loss": 1.0166, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.7076923076923078, |
|
"grad_norm": 0.21047592163085938, |
|
"learning_rate": 6.806451612903226e-06, |
|
"loss": 1.0253, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.7102564102564104, |
|
"grad_norm": 0.19377169013023376, |
|
"learning_rate": 6.784982935153583e-06, |
|
"loss": 0.9729, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.712820512820513, |
|
"grad_norm": 0.18540802597999573, |
|
"learning_rate": 6.763293310463122e-06, |
|
"loss": 0.8732, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.7153846153846155, |
|
"grad_norm": 0.2677832841873169, |
|
"learning_rate": 6.741379310344829e-06, |
|
"loss": 1.0237, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.717948717948718, |
|
"grad_norm": 0.20734448730945587, |
|
"learning_rate": 6.719237435008666e-06, |
|
"loss": 1.0527, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7205128205128206, |
|
"grad_norm": 0.1992000937461853, |
|
"learning_rate": 6.696864111498258e-06, |
|
"loss": 0.9951, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.7230769230769232, |
|
"grad_norm": 0.19159814715385437, |
|
"learning_rate": 6.6742556917688265e-06, |
|
"loss": 1.1233, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.7256410256410257, |
|
"grad_norm": 0.2154679298400879, |
|
"learning_rate": 6.651408450704226e-06, |
|
"loss": 1.0262, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.7282051282051283, |
|
"grad_norm": 0.1996496319770813, |
|
"learning_rate": 6.628318584070796e-06, |
|
"loss": 1.0542, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.7307692307692308, |
|
"grad_norm": 0.18427924811840057, |
|
"learning_rate": 6.604982206405694e-06, |
|
"loss": 0.8912, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 0.1896672397851944, |
|
"learning_rate": 6.58139534883721e-06, |
|
"loss": 1.1042, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.735897435897436, |
|
"grad_norm": 0.2349502146244049, |
|
"learning_rate": 6.557553956834534e-06, |
|
"loss": 1.0842, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.7384615384615385, |
|
"grad_norm": 0.21734175086021423, |
|
"learning_rate": 6.533453887884268e-06, |
|
"loss": 1.0086, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.741025641025641, |
|
"grad_norm": 0.19185325503349304, |
|
"learning_rate": 6.5090909090909095e-06, |
|
"loss": 0.9509, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.7435897435897436, |
|
"grad_norm": 0.18834951519966125, |
|
"learning_rate": 6.484460694698354e-06, |
|
"loss": 1.0333, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.7461538461538462, |
|
"grad_norm": 0.19962508976459503, |
|
"learning_rate": 6.459558823529412e-06, |
|
"loss": 1.0062, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.7487179487179487, |
|
"grad_norm": 0.1968788206577301, |
|
"learning_rate": 6.434380776340111e-06, |
|
"loss": 0.9652, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.7512820512820513, |
|
"grad_norm": 0.19386903941631317, |
|
"learning_rate": 6.408921933085502e-06, |
|
"loss": 0.9253, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.7538461538461538, |
|
"grad_norm": 0.18816019594669342, |
|
"learning_rate": 6.38317757009346e-06, |
|
"loss": 0.998, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.7564102564102564, |
|
"grad_norm": 0.19842685759067535, |
|
"learning_rate": 6.357142857142856e-06, |
|
"loss": 1.0057, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.758974358974359, |
|
"grad_norm": 0.19914638996124268, |
|
"learning_rate": 6.330812854442344e-06, |
|
"loss": 0.9429, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.7615384615384615, |
|
"grad_norm": 0.1913568377494812, |
|
"learning_rate": 6.304182509505703e-06, |
|
"loss": 0.8765, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.764102564102564, |
|
"grad_norm": 0.20887283980846405, |
|
"learning_rate": 6.277246653919694e-06, |
|
"loss": 1.0437, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.7666666666666666, |
|
"grad_norm": 0.1883188635110855, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.9215, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.7692307692307692, |
|
"grad_norm": 0.1730821281671524, |
|
"learning_rate": 6.222437137330755e-06, |
|
"loss": 1.0435, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.7717948717948717, |
|
"grad_norm": 0.18366935849189758, |
|
"learning_rate": 6.194552529182879e-06, |
|
"loss": 1.0434, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.7743589743589743, |
|
"grad_norm": 0.16954126954078674, |
|
"learning_rate": 6.166340508806262e-06, |
|
"loss": 0.9655, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.7769230769230768, |
|
"grad_norm": 0.20240214467048645, |
|
"learning_rate": 6.137795275590551e-06, |
|
"loss": 1.0522, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.7794871794871794, |
|
"grad_norm": 0.1842651218175888, |
|
"learning_rate": 6.1089108910891094e-06, |
|
"loss": 0.9502, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.782051282051282, |
|
"grad_norm": 0.19008156657218933, |
|
"learning_rate": 6.079681274900399e-06, |
|
"loss": 0.9194, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.7846153846153845, |
|
"grad_norm": 0.18955452740192413, |
|
"learning_rate": 6.050100200400802e-06, |
|
"loss": 0.9153, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.787179487179487, |
|
"grad_norm": 0.18745142221450806, |
|
"learning_rate": 6.020161290322582e-06, |
|
"loss": 1.0557, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.7897435897435896, |
|
"grad_norm": 0.19175171852111816, |
|
"learning_rate": 5.9898580121703855e-06, |
|
"loss": 1.0186, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.7923076923076922, |
|
"grad_norm": 0.21348226070404053, |
|
"learning_rate": 5.9591836734693876e-06, |
|
"loss": 1.1486, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.7948717948717947, |
|
"grad_norm": 0.19678117334842682, |
|
"learning_rate": 5.928131416837782e-06, |
|
"loss": 1.0151, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7974358974358975, |
|
"grad_norm": 0.17783664166927338, |
|
"learning_rate": 5.896694214876034e-06, |
|
"loss": 0.9591, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.21038049459457397, |
|
"learning_rate": 5.8648648648648655e-06, |
|
"loss": 1.0256, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.8025641025641026, |
|
"grad_norm": 0.2131882607936859, |
|
"learning_rate": 5.832635983263598e-06, |
|
"loss": 1.0602, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.8051282051282052, |
|
"grad_norm": 0.2531805634498596, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.9224, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.8076923076923077, |
|
"grad_norm": 0.20389708876609802, |
|
"learning_rate": 5.766949152542372e-06, |
|
"loss": 0.926, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.8102564102564103, |
|
"grad_norm": 0.19052807986736298, |
|
"learning_rate": 5.733475479744137e-06, |
|
"loss": 1.0813, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.8128205128205128, |
|
"grad_norm": 0.23358896374702454, |
|
"learning_rate": 5.6995708154506445e-06, |
|
"loss": 1.1185, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.8153846153846154, |
|
"grad_norm": 0.19410401582717896, |
|
"learning_rate": 5.6652267818574515e-06, |
|
"loss": 0.9472, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.817948717948718, |
|
"grad_norm": 0.20657074451446533, |
|
"learning_rate": 5.630434782608696e-06, |
|
"loss": 0.9792, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.8205128205128205, |
|
"grad_norm": 0.19862636923789978, |
|
"learning_rate": 5.5951859956236334e-06, |
|
"loss": 1.0459, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.823076923076923, |
|
"grad_norm": 0.19714003801345825, |
|
"learning_rate": 5.559471365638766e-06, |
|
"loss": 1.0864, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.8256410256410256, |
|
"grad_norm": 0.2103991061449051, |
|
"learning_rate": 5.523281596452329e-06, |
|
"loss": 1.0025, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.8282051282051284, |
|
"grad_norm": 0.21029628813266754, |
|
"learning_rate": 5.486607142857143e-06, |
|
"loss": 1.0526, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.830769230769231, |
|
"grad_norm": 0.207773357629776, |
|
"learning_rate": 5.4494382022471915e-06, |
|
"loss": 0.9971, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.21237727999687195, |
|
"learning_rate": 5.411764705882353e-06, |
|
"loss": 0.9528, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.835897435897436, |
|
"grad_norm": 0.1775677651166916, |
|
"learning_rate": 5.373576309794989e-06, |
|
"loss": 0.8888, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.8384615384615386, |
|
"grad_norm": 0.21109408140182495, |
|
"learning_rate": 5.3348623853211015e-06, |
|
"loss": 0.9526, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.8410256410256411, |
|
"grad_norm": 0.20082655549049377, |
|
"learning_rate": 5.295612009237876e-06, |
|
"loss": 0.989, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.8435897435897437, |
|
"grad_norm": 0.18796475231647491, |
|
"learning_rate": 5.255813953488372e-06, |
|
"loss": 1.1235, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 0.19870947301387787, |
|
"learning_rate": 5.215456674473068e-06, |
|
"loss": 0.9476, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.8487179487179488, |
|
"grad_norm": 0.20163416862487793, |
|
"learning_rate": 5.174528301886793e-06, |
|
"loss": 1.0574, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.8512820512820514, |
|
"grad_norm": 0.1803264170885086, |
|
"learning_rate": 5.133016627078385e-06, |
|
"loss": 0.9504, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.853846153846154, |
|
"grad_norm": 0.19215236604213715, |
|
"learning_rate": 5.090909090909091e-06, |
|
"loss": 1.1705, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.8564102564102565, |
|
"grad_norm": 0.2053728848695755, |
|
"learning_rate": 5.048192771084337e-06, |
|
"loss": 1.0616, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.858974358974359, |
|
"grad_norm": 0.18856436014175415, |
|
"learning_rate": 5.004854368932039e-06, |
|
"loss": 1.1064, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.8615384615384616, |
|
"grad_norm": 0.22481724619865417, |
|
"learning_rate": 4.960880195599021e-06, |
|
"loss": 0.9446, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.8641025641025641, |
|
"grad_norm": 0.19489426910877228, |
|
"learning_rate": 4.916256157635469e-06, |
|
"loss": 0.9482, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.18249572813510895, |
|
"learning_rate": 4.870967741935484e-06, |
|
"loss": 0.9447, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.8692307692307693, |
|
"grad_norm": 0.1984269618988037, |
|
"learning_rate": 4.825e-06, |
|
"loss": 0.8603, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.8717948717948718, |
|
"grad_norm": 0.1892971694469452, |
|
"learning_rate": 4.778337531486147e-06, |
|
"loss": 0.9162, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.8743589743589744, |
|
"grad_norm": 0.19022035598754883, |
|
"learning_rate": 4.7309644670050755e-06, |
|
"loss": 0.915, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.876923076923077, |
|
"grad_norm": 0.20524592697620392, |
|
"learning_rate": 4.6828644501278775e-06, |
|
"loss": 0.9754, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.8794871794871795, |
|
"grad_norm": 0.19411511719226837, |
|
"learning_rate": 4.6340206185567015e-06, |
|
"loss": 0.9368, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.882051282051282, |
|
"grad_norm": 0.19343458116054535, |
|
"learning_rate": 4.584415584415584e-06, |
|
"loss": 1.0132, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.8846153846153846, |
|
"grad_norm": 0.197899729013443, |
|
"learning_rate": 4.534031413612565e-06, |
|
"loss": 1.0597, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.8871794871794871, |
|
"grad_norm": 0.22261539101600647, |
|
"learning_rate": 4.482849604221636e-06, |
|
"loss": 0.9934, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.8897435897435897, |
|
"grad_norm": 0.21835994720458984, |
|
"learning_rate": 4.430851063829788e-06, |
|
"loss": 0.9954, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.8923076923076922, |
|
"grad_norm": 0.1972758173942566, |
|
"learning_rate": 4.378016085790885e-06, |
|
"loss": 0.9468, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.8948717948717948, |
|
"grad_norm": 0.18865923583507538, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 0.9226, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.8974358974358974, |
|
"grad_norm": 0.19400173425674438, |
|
"learning_rate": 4.2697547683923715e-06, |
|
"loss": 1.1236, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.18145526945590973, |
|
"learning_rate": 4.2142857142857145e-06, |
|
"loss": 0.8993, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.9025641025641025, |
|
"grad_norm": 0.2090071588754654, |
|
"learning_rate": 4.157894736842105e-06, |
|
"loss": 0.9357, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.905128205128205, |
|
"grad_norm": 0.240007683634758, |
|
"learning_rate": 4.100558659217877e-06, |
|
"loss": 0.9786, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.9076923076923076, |
|
"grad_norm": 0.19722330570220947, |
|
"learning_rate": 4.04225352112676e-06, |
|
"loss": 1.0765, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.9102564102564101, |
|
"grad_norm": 0.18485118448734283, |
|
"learning_rate": 3.982954545454546e-06, |
|
"loss": 0.8595, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.9128205128205127, |
|
"grad_norm": 0.2154824435710907, |
|
"learning_rate": 3.922636103151863e-06, |
|
"loss": 1.0258, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.9153846153846152, |
|
"grad_norm": 0.2018478512763977, |
|
"learning_rate": 3.861271676300577e-06, |
|
"loss": 0.9515, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.9179487179487178, |
|
"grad_norm": 0.2598324716091156, |
|
"learning_rate": 3.798833819241983e-06, |
|
"loss": 1.1186, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.9205128205128204, |
|
"grad_norm": 0.21484240889549255, |
|
"learning_rate": 3.735294117647058e-06, |
|
"loss": 0.9179, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 0.20729656517505646, |
|
"learning_rate": 3.6706231454005937e-06, |
|
"loss": 0.9008, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9256410256410257, |
|
"grad_norm": 0.19938671588897705, |
|
"learning_rate": 3.604790419161677e-06, |
|
"loss": 0.9061, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.9282051282051282, |
|
"grad_norm": 0.19618763029575348, |
|
"learning_rate": 3.5377643504531735e-06, |
|
"loss": 0.9478, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.9307692307692308, |
|
"grad_norm": 0.20993918180465698, |
|
"learning_rate": 3.4695121951219514e-06, |
|
"loss": 1.091, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.9333333333333333, |
|
"grad_norm": 0.19574132561683655, |
|
"learning_rate": 3.4e-06, |
|
"loss": 1.0154, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.935897435897436, |
|
"grad_norm": 0.2107248604297638, |
|
"learning_rate": 3.329192546583851e-06, |
|
"loss": 0.997, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.9384615384615385, |
|
"grad_norm": 0.19578175246715546, |
|
"learning_rate": 3.2570532915360505e-06, |
|
"loss": 0.9224, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.941025641025641, |
|
"grad_norm": 0.20714713633060455, |
|
"learning_rate": 3.183544303797469e-06, |
|
"loss": 0.9749, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.9435897435897436, |
|
"grad_norm": 0.1808098554611206, |
|
"learning_rate": 3.1086261980830674e-06, |
|
"loss": 0.898, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.9461538461538461, |
|
"grad_norm": 0.20211873948574066, |
|
"learning_rate": 3.0322580645161295e-06, |
|
"loss": 0.9319, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.9487179487179487, |
|
"grad_norm": 0.17889924347400665, |
|
"learning_rate": 2.9543973941368082e-06, |
|
"loss": 1.0142, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.9512820512820512, |
|
"grad_norm": 0.20043864846229553, |
|
"learning_rate": 2.875e-06, |
|
"loss": 1.0167, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.953846153846154, |
|
"grad_norm": 0.18134412169456482, |
|
"learning_rate": 2.794019933554818e-06, |
|
"loss": 0.8732, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.9564102564102566, |
|
"grad_norm": 0.19279873371124268, |
|
"learning_rate": 2.7114093959731548e-06, |
|
"loss": 0.9885, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.9589743589743591, |
|
"grad_norm": 0.1957969218492508, |
|
"learning_rate": 2.627118644067797e-06, |
|
"loss": 0.9453, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.9615384615384617, |
|
"grad_norm": 0.2282707840204239, |
|
"learning_rate": 2.5410958904109595e-06, |
|
"loss": 0.91, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.9641025641025642, |
|
"grad_norm": 0.20508873462677002, |
|
"learning_rate": 2.453287197231834e-06, |
|
"loss": 0.9894, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.9666666666666668, |
|
"grad_norm": 0.19494283199310303, |
|
"learning_rate": 2.363636363636364e-06, |
|
"loss": 1.0989, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.9692307692307693, |
|
"grad_norm": 0.19367046654224396, |
|
"learning_rate": 2.2720848056537104e-06, |
|
"loss": 1.075, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.971794871794872, |
|
"grad_norm": 0.1860765963792801, |
|
"learning_rate": 2.1785714285714286e-06, |
|
"loss": 0.9745, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.9743589743589745, |
|
"grad_norm": 0.1922086477279663, |
|
"learning_rate": 2.0830324909747296e-06, |
|
"loss": 0.9443, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.976923076923077, |
|
"grad_norm": 0.20211626589298248, |
|
"learning_rate": 1.9854014598540146e-06, |
|
"loss": 0.9371, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.9794871794871796, |
|
"grad_norm": 0.21594083309173584, |
|
"learning_rate": 1.885608856088561e-06, |
|
"loss": 1.0045, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.9820512820512821, |
|
"grad_norm": 0.18539482355117798, |
|
"learning_rate": 1.7835820895522391e-06, |
|
"loss": 0.9609, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.9846153846153847, |
|
"grad_norm": 0.19419516623020172, |
|
"learning_rate": 1.6792452830188683e-06, |
|
"loss": 0.9157, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.9871794871794872, |
|
"grad_norm": 0.19369378685951233, |
|
"learning_rate": 1.572519083969466e-06, |
|
"loss": 0.9668, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.9897435897435898, |
|
"grad_norm": 0.19599087536334991, |
|
"learning_rate": 1.4633204633204633e-06, |
|
"loss": 0.9275, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.9923076923076923, |
|
"grad_norm": 0.20087149739265442, |
|
"learning_rate": 1.3515625000000002e-06, |
|
"loss": 1.057, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.994871794871795, |
|
"grad_norm": 0.19752490520477295, |
|
"learning_rate": 1.2371541501976286e-06, |
|
"loss": 1.0176, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.9974358974358974, |
|
"grad_norm": 0.17145206034183502, |
|
"learning_rate": 1.12e-06, |
|
"loss": 0.982, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.435234934091568, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.8781, |
|
"step": 780 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 195, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.846660370087018e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|