|
{ |
|
"best_global_step": 900, |
|
"best_metric": 0.4358259439468384, |
|
"best_model_checkpoint": "./cv_jd_finetuned_model/checkpoint-900", |
|
"epoch": 2.5003474635163307, |
|
"eval_steps": 300, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002779708130646282, |
|
"grad_norm": 0.47256213426589966, |
|
"learning_rate": 0.0, |
|
"loss": 1.7714, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005559416261292564, |
|
"grad_norm": 0.43564292788505554, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7393, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008339124391938846, |
|
"grad_norm": 0.4533561170101166, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.7359, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011118832522585128, |
|
"grad_norm": 0.4747747778892517, |
|
"learning_rate": 6e-06, |
|
"loss": 1.79, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01389854065323141, |
|
"grad_norm": 0.4859880208969116, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.7613, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01667824878387769, |
|
"grad_norm": 0.46851226687431335, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7779, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019457956914523976, |
|
"grad_norm": 0.468657910823822, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.7623, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.022237665045170257, |
|
"grad_norm": 0.468654602766037, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 1.801, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02501737317581654, |
|
"grad_norm": 0.46270835399627686, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.6855, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02779708130646282, |
|
"grad_norm": 0.4720575511455536, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.7408, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030576789437109102, |
|
"grad_norm": 0.4369213283061981, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7412, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03335649756775538, |
|
"grad_norm": 0.4069555401802063, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.6908, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03613620569840167, |
|
"grad_norm": 0.44282251596450806, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.7716, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03891591382904795, |
|
"grad_norm": 0.4090988039970398, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 1.66, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04169562195969423, |
|
"grad_norm": 0.44722557067871094, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 1.7268, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04447533009034051, |
|
"grad_norm": 0.423772931098938, |
|
"learning_rate": 3e-05, |
|
"loss": 1.7199, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0472550382209868, |
|
"grad_norm": 0.3943701386451721, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.7025, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05003474635163308, |
|
"grad_norm": 0.3880269527435303, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 1.6418, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05281445448227936, |
|
"grad_norm": 0.3725886046886444, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.6165, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05559416261292564, |
|
"grad_norm": 0.3829341530799866, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.6348, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05837387074357193, |
|
"grad_norm": 0.38574135303497314, |
|
"learning_rate": 4e-05, |
|
"loss": 1.6028, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.061153578874218205, |
|
"grad_norm": 0.34429848194122314, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.6449, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06393328700486449, |
|
"grad_norm": 0.33892038464546204, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.627, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06671299513551077, |
|
"grad_norm": 0.31090790033340454, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.5466, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06949270326615706, |
|
"grad_norm": 0.3035236895084381, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.5175, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07227241139680333, |
|
"grad_norm": 0.2932650148868561, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4668, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07505211952744961, |
|
"grad_norm": 0.27232736349105835, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 1.5586, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0778318276580959, |
|
"grad_norm": 0.25519007444381714, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 1.5489, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08061153578874218, |
|
"grad_norm": 0.23789168894290924, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 1.5095, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08339124391938846, |
|
"grad_norm": 0.2281704694032669, |
|
"learning_rate": 5.8e-05, |
|
"loss": 1.5199, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08617095205003475, |
|
"grad_norm": 0.23827943205833435, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3646, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08895066018068103, |
|
"grad_norm": 0.24246342480182648, |
|
"learning_rate": 6.2e-05, |
|
"loss": 1.4594, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0917303683113273, |
|
"grad_norm": 0.2456520050764084, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 1.4243, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0945100764419736, |
|
"grad_norm": 0.2509920001029968, |
|
"learning_rate": 6.6e-05, |
|
"loss": 1.3821, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09728978457261987, |
|
"grad_norm": 0.2679535448551178, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 1.3136, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10006949270326616, |
|
"grad_norm": 0.2893657684326172, |
|
"learning_rate": 7e-05, |
|
"loss": 1.3514, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10284920083391244, |
|
"grad_norm": 0.31409314274787903, |
|
"learning_rate": 7.2e-05, |
|
"loss": 1.3186, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10562890896455872, |
|
"grad_norm": 0.29165610671043396, |
|
"learning_rate": 7.4e-05, |
|
"loss": 1.2345, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10840861709520501, |
|
"grad_norm": 0.31779947876930237, |
|
"learning_rate": 7.6e-05, |
|
"loss": 1.2938, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11118832522585129, |
|
"grad_norm": 0.298592209815979, |
|
"learning_rate": 7.800000000000001e-05, |
|
"loss": 1.2937, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11396803335649756, |
|
"grad_norm": 0.30116093158721924, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2387, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11674774148714386, |
|
"grad_norm": 0.28846633434295654, |
|
"learning_rate": 8.2e-05, |
|
"loss": 1.2688, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11952744961779013, |
|
"grad_norm": 0.27421048283576965, |
|
"learning_rate": 8.4e-05, |
|
"loss": 1.3615, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12230715774843641, |
|
"grad_norm": 0.25272664427757263, |
|
"learning_rate": 8.6e-05, |
|
"loss": 1.2837, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1250868658790827, |
|
"grad_norm": 0.28714367747306824, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 1.1749, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12786657400972898, |
|
"grad_norm": 0.27853626012802124, |
|
"learning_rate": 9e-05, |
|
"loss": 1.2653, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13064628214037527, |
|
"grad_norm": 0.29935234785079956, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 1.0846, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13342599027102153, |
|
"grad_norm": 0.2770155072212219, |
|
"learning_rate": 9.4e-05, |
|
"loss": 1.1273, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13620569840166782, |
|
"grad_norm": 0.29866236448287964, |
|
"learning_rate": 9.6e-05, |
|
"loss": 1.1003, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13898540653231412, |
|
"grad_norm": 0.3060087561607361, |
|
"learning_rate": 9.8e-05, |
|
"loss": 1.0825, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14176511466296038, |
|
"grad_norm": 0.2942337095737457, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1054, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14454482279360667, |
|
"grad_norm": 0.2934916019439697, |
|
"learning_rate": 0.00010200000000000001, |
|
"loss": 1.0711, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14732453092425296, |
|
"grad_norm": 0.33025407791137695, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 0.9641, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15010423905489922, |
|
"grad_norm": 0.3368203341960907, |
|
"learning_rate": 0.00010600000000000002, |
|
"loss": 0.9126, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15288394718554552, |
|
"grad_norm": 0.3417721092700958, |
|
"learning_rate": 0.00010800000000000001, |
|
"loss": 1.0195, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1556636553161918, |
|
"grad_norm": 0.33795756101608276, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 0.8796, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15844336344683807, |
|
"grad_norm": 0.341162770986557, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 0.9361, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.16122307157748436, |
|
"grad_norm": 0.3660062849521637, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 0.8179, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16400277970813065, |
|
"grad_norm": 0.3034508526325226, |
|
"learning_rate": 0.000116, |
|
"loss": 0.8709, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16678248783877692, |
|
"grad_norm": 0.31419962644577026, |
|
"learning_rate": 0.000118, |
|
"loss": 0.8925, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1695621959694232, |
|
"grad_norm": 0.32202863693237305, |
|
"learning_rate": 0.00012, |
|
"loss": 0.9444, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1723419041000695, |
|
"grad_norm": 0.29973354935646057, |
|
"learning_rate": 0.000122, |
|
"loss": 0.9334, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17512161223071576, |
|
"grad_norm": 0.2913822531700134, |
|
"learning_rate": 0.000124, |
|
"loss": 0.8633, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17790132036136205, |
|
"grad_norm": 0.3123023509979248, |
|
"learning_rate": 0.000126, |
|
"loss": 0.8474, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18068102849200834, |
|
"grad_norm": 0.36691340804100037, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 0.8491, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1834607366226546, |
|
"grad_norm": 0.2867358922958374, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 0.8812, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1862404447533009, |
|
"grad_norm": 0.33814680576324463, |
|
"learning_rate": 0.000132, |
|
"loss": 0.7977, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1890201528839472, |
|
"grad_norm": 0.2705424129962921, |
|
"learning_rate": 0.000134, |
|
"loss": 0.8189, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.19179986101459348, |
|
"grad_norm": 0.27234122157096863, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 0.8661, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19457956914523974, |
|
"grad_norm": 0.3268398642539978, |
|
"learning_rate": 0.000138, |
|
"loss": 0.7359, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19735927727588604, |
|
"grad_norm": 0.29902949929237366, |
|
"learning_rate": 0.00014, |
|
"loss": 0.7943, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.20013898540653233, |
|
"grad_norm": 0.31011486053466797, |
|
"learning_rate": 0.000142, |
|
"loss": 1.0169, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2029186935371786, |
|
"grad_norm": 0.28908616304397583, |
|
"learning_rate": 0.000144, |
|
"loss": 0.7617, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20569840166782488, |
|
"grad_norm": 0.31617382168769836, |
|
"learning_rate": 0.000146, |
|
"loss": 0.7512, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.20847810979847117, |
|
"grad_norm": 0.32012176513671875, |
|
"learning_rate": 0.000148, |
|
"loss": 0.838, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21125781792911744, |
|
"grad_norm": 0.32813915610313416, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.8314, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21403752605976373, |
|
"grad_norm": 0.311210572719574, |
|
"learning_rate": 0.000152, |
|
"loss": 0.7498, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.21681723419041002, |
|
"grad_norm": 0.2896203100681305, |
|
"learning_rate": 0.000154, |
|
"loss": 0.8062, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.21959694232105628, |
|
"grad_norm": 0.30190443992614746, |
|
"learning_rate": 0.00015600000000000002, |
|
"loss": 0.8198, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22237665045170257, |
|
"grad_norm": 0.31988754868507385, |
|
"learning_rate": 0.00015800000000000002, |
|
"loss": 0.8781, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22515635858234886, |
|
"grad_norm": 0.29290610551834106, |
|
"learning_rate": 0.00016, |
|
"loss": 0.7494, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22793606671299513, |
|
"grad_norm": 0.29982951283454895, |
|
"learning_rate": 0.000162, |
|
"loss": 0.6786, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.23071577484364142, |
|
"grad_norm": 0.3004741966724396, |
|
"learning_rate": 0.000164, |
|
"loss": 0.6723, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2334954829742877, |
|
"grad_norm": 0.30331557989120483, |
|
"learning_rate": 0.000166, |
|
"loss": 0.6686, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23627519110493397, |
|
"grad_norm": 0.320073664188385, |
|
"learning_rate": 0.000168, |
|
"loss": 0.7309, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.23905489923558026, |
|
"grad_norm": 0.37164461612701416, |
|
"learning_rate": 0.00017, |
|
"loss": 0.883, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.24183460736622656, |
|
"grad_norm": 0.3153933584690094, |
|
"learning_rate": 0.000172, |
|
"loss": 0.7538, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.24461431549687282, |
|
"grad_norm": 0.37620604038238525, |
|
"learning_rate": 0.000174, |
|
"loss": 0.7233, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2473940236275191, |
|
"grad_norm": 0.3310216963291168, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 0.668, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2501737317581654, |
|
"grad_norm": 0.3648437261581421, |
|
"learning_rate": 0.00017800000000000002, |
|
"loss": 0.7219, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2529534398888117, |
|
"grad_norm": 0.33160319924354553, |
|
"learning_rate": 0.00018, |
|
"loss": 0.6968, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25573314801945796, |
|
"grad_norm": 0.3842083513736725, |
|
"learning_rate": 0.000182, |
|
"loss": 0.7334, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2585128561501042, |
|
"grad_norm": 0.34885042905807495, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 0.6107, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.26129256428075054, |
|
"grad_norm": 0.4350070059299469, |
|
"learning_rate": 0.00018600000000000002, |
|
"loss": 0.6964, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2640722724113968, |
|
"grad_norm": 0.3995032012462616, |
|
"learning_rate": 0.000188, |
|
"loss": 0.7111, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.26685198054204307, |
|
"grad_norm": 0.4035239517688751, |
|
"learning_rate": 0.00019, |
|
"loss": 0.6004, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2696316886726894, |
|
"grad_norm": 0.39563143253326416, |
|
"learning_rate": 0.000192, |
|
"loss": 0.7174, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.27241139680333565, |
|
"grad_norm": 0.4484492540359497, |
|
"learning_rate": 0.000194, |
|
"loss": 0.732, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2751911049339819, |
|
"grad_norm": 0.4170342683792114, |
|
"learning_rate": 0.000196, |
|
"loss": 0.6415, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.27797081306462823, |
|
"grad_norm": 0.36684471368789673, |
|
"learning_rate": 0.00019800000000000002, |
|
"loss": 0.7303, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2807505211952745, |
|
"grad_norm": 0.417539119720459, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7282, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.28353022932592076, |
|
"grad_norm": 0.3748982846736908, |
|
"learning_rate": 0.00019979591836734694, |
|
"loss": 0.7453, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2863099374565671, |
|
"grad_norm": 0.3669414222240448, |
|
"learning_rate": 0.0001995918367346939, |
|
"loss": 0.7206, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.28908964558721334, |
|
"grad_norm": 0.43745529651641846, |
|
"learning_rate": 0.00019938775510204082, |
|
"loss": 0.7008, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2918693537178596, |
|
"grad_norm": 0.4588426351547241, |
|
"learning_rate": 0.00019918367346938775, |
|
"loss": 0.6981, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2946490618485059, |
|
"grad_norm": 0.46913576126098633, |
|
"learning_rate": 0.0001989795918367347, |
|
"loss": 0.7276, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2974287699791522, |
|
"grad_norm": 0.38426539301872253, |
|
"learning_rate": 0.00019877551020408164, |
|
"loss": 0.6409, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.30020847810979845, |
|
"grad_norm": 0.4170157313346863, |
|
"learning_rate": 0.0001985714285714286, |
|
"loss": 0.6113, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.30298818624044477, |
|
"grad_norm": 0.4131574332714081, |
|
"learning_rate": 0.00019836734693877553, |
|
"loss": 0.5925, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.30576789437109103, |
|
"grad_norm": 0.4434458017349243, |
|
"learning_rate": 0.00019816326530612246, |
|
"loss": 0.7052, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3085476025017373, |
|
"grad_norm": 0.45088446140289307, |
|
"learning_rate": 0.00019795918367346938, |
|
"loss": 0.5575, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3113273106323836, |
|
"grad_norm": 0.48712992668151855, |
|
"learning_rate": 0.00019775510204081634, |
|
"loss": 0.6706, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.3141070187630299, |
|
"grad_norm": 0.44475221633911133, |
|
"learning_rate": 0.00019755102040816327, |
|
"loss": 0.6569, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31688672689367614, |
|
"grad_norm": 0.4516242742538452, |
|
"learning_rate": 0.0001973469387755102, |
|
"loss": 0.5981, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.31966643502432246, |
|
"grad_norm": 0.4031848907470703, |
|
"learning_rate": 0.00019714285714285716, |
|
"loss": 0.7407, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3224461431549687, |
|
"grad_norm": 0.5948619842529297, |
|
"learning_rate": 0.00019693877551020409, |
|
"loss": 0.5176, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.325225851285615, |
|
"grad_norm": 0.42586585879325867, |
|
"learning_rate": 0.00019673469387755104, |
|
"loss": 0.5305, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3280055594162613, |
|
"grad_norm": 1.2202147245407104, |
|
"learning_rate": 0.00019653061224489797, |
|
"loss": 0.7352, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.33078526754690757, |
|
"grad_norm": 0.4561997354030609, |
|
"learning_rate": 0.0001963265306122449, |
|
"loss": 0.4617, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.33356497567755383, |
|
"grad_norm": 0.38021618127822876, |
|
"learning_rate": 0.00019612244897959183, |
|
"loss": 0.5754, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33634468380820015, |
|
"grad_norm": 0.4197412431240082, |
|
"learning_rate": 0.0001959183673469388, |
|
"loss": 0.6426, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3391243919388464, |
|
"grad_norm": 0.39121460914611816, |
|
"learning_rate": 0.00019571428571428572, |
|
"loss": 0.6242, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3419041000694927, |
|
"grad_norm": 0.413143515586853, |
|
"learning_rate": 0.00019551020408163265, |
|
"loss": 0.6013, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.344683808200139, |
|
"grad_norm": 0.4672967195510864, |
|
"learning_rate": 0.0001953061224489796, |
|
"loss": 0.5439, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.34746351633078526, |
|
"grad_norm": 0.4725366532802582, |
|
"learning_rate": 0.00019510204081632656, |
|
"loss": 0.6042, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3502432244614315, |
|
"grad_norm": 0.483952134847641, |
|
"learning_rate": 0.0001948979591836735, |
|
"loss": 0.4544, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.35302293259207784, |
|
"grad_norm": 0.39228469133377075, |
|
"learning_rate": 0.00019469387755102042, |
|
"loss": 0.6016, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3558026407227241, |
|
"grad_norm": 0.4152607023715973, |
|
"learning_rate": 0.00019448979591836735, |
|
"loss": 0.4931, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.35858234885337037, |
|
"grad_norm": 0.402338445186615, |
|
"learning_rate": 0.0001942857142857143, |
|
"loss": 0.525, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3613620569840167, |
|
"grad_norm": 0.42365092039108276, |
|
"learning_rate": 0.00019408163265306123, |
|
"loss": 0.6608, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36414176511466295, |
|
"grad_norm": 0.4249265491962433, |
|
"learning_rate": 0.00019387755102040816, |
|
"loss": 0.5565, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3669214732453092, |
|
"grad_norm": 0.6368371248245239, |
|
"learning_rate": 0.0001936734693877551, |
|
"loss": 0.4547, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.36970118137595553, |
|
"grad_norm": 0.37348538637161255, |
|
"learning_rate": 0.00019346938775510205, |
|
"loss": 0.4073, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3724808895066018, |
|
"grad_norm": 0.3562554717063904, |
|
"learning_rate": 0.000193265306122449, |
|
"loss": 0.4968, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3752605976372481, |
|
"grad_norm": 0.42278632521629333, |
|
"learning_rate": 0.00019306122448979593, |
|
"loss": 0.5421, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3780403057678944, |
|
"grad_norm": 0.47804152965545654, |
|
"learning_rate": 0.00019285714285714286, |
|
"loss": 0.6516, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.38082001389854064, |
|
"grad_norm": 0.4298154413700104, |
|
"learning_rate": 0.0001926530612244898, |
|
"loss": 0.4563, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.38359972202918696, |
|
"grad_norm": 0.37862634658813477, |
|
"learning_rate": 0.00019244897959183675, |
|
"loss": 0.5742, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3863794301598332, |
|
"grad_norm": 0.5051096081733704, |
|
"learning_rate": 0.00019224489795918368, |
|
"loss": 0.4427, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3891591382904795, |
|
"grad_norm": 0.4495854079723358, |
|
"learning_rate": 0.0001920408163265306, |
|
"loss": 0.5915, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3919388464211258, |
|
"grad_norm": 0.535527765750885, |
|
"learning_rate": 0.00019183673469387756, |
|
"loss": 0.4995, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.39471855455177207, |
|
"grad_norm": 0.4394996464252472, |
|
"learning_rate": 0.00019163265306122452, |
|
"loss": 0.4898, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.39749826268241834, |
|
"grad_norm": 0.3254806697368622, |
|
"learning_rate": 0.00019142857142857145, |
|
"loss": 0.4628, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.40027797081306465, |
|
"grad_norm": 0.4018654525279999, |
|
"learning_rate": 0.00019122448979591838, |
|
"loss": 0.571, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4030576789437109, |
|
"grad_norm": 0.4496287703514099, |
|
"learning_rate": 0.0001910204081632653, |
|
"loss": 0.4502, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4058373870743572, |
|
"grad_norm": 0.3519289195537567, |
|
"learning_rate": 0.00019081632653061227, |
|
"loss": 0.5355, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4086170952050035, |
|
"grad_norm": 0.44861000776290894, |
|
"learning_rate": 0.0001906122448979592, |
|
"loss": 0.5026, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.41139680333564976, |
|
"grad_norm": 0.4265352487564087, |
|
"learning_rate": 0.00019040816326530612, |
|
"loss": 0.5444, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.414176511466296, |
|
"grad_norm": 0.3963538706302643, |
|
"learning_rate": 0.00019020408163265305, |
|
"loss": 0.5061, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.41695621959694235, |
|
"grad_norm": 0.4118248522281647, |
|
"learning_rate": 0.00019, |
|
"loss": 0.4564, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4197359277275886, |
|
"grad_norm": 0.3554701507091522, |
|
"learning_rate": 0.00018979591836734697, |
|
"loss": 0.5173, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4225156358582349, |
|
"grad_norm": 0.3614487648010254, |
|
"learning_rate": 0.0001895918367346939, |
|
"loss": 0.5661, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4252953439888812, |
|
"grad_norm": 0.3155677318572998, |
|
"learning_rate": 0.00018938775510204083, |
|
"loss": 0.5626, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.42807505211952745, |
|
"grad_norm": 0.4833175241947174, |
|
"learning_rate": 0.00018918367346938776, |
|
"loss": 0.6273, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4308547602501737, |
|
"grad_norm": 0.37056633830070496, |
|
"learning_rate": 0.0001889795918367347, |
|
"loss": 0.5984, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.43363446838082004, |
|
"grad_norm": 0.36759206652641296, |
|
"learning_rate": 0.00018877551020408164, |
|
"loss": 0.5022, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4364141765114663, |
|
"grad_norm": 0.4722707271575928, |
|
"learning_rate": 0.00018857142857142857, |
|
"loss": 0.5679, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.43919388464211256, |
|
"grad_norm": 0.5502737760543823, |
|
"learning_rate": 0.0001883673469387755, |
|
"loss": 0.5484, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4419735927727589, |
|
"grad_norm": 0.41092830896377563, |
|
"learning_rate": 0.00018816326530612246, |
|
"loss": 0.5346, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.44475330090340515, |
|
"grad_norm": 0.35556185245513916, |
|
"learning_rate": 0.0001879591836734694, |
|
"loss": 0.5094, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4475330090340514, |
|
"grad_norm": 0.4490595757961273, |
|
"learning_rate": 0.00018775510204081634, |
|
"loss": 0.5659, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.45031271716469773, |
|
"grad_norm": 0.4202437102794647, |
|
"learning_rate": 0.00018755102040816327, |
|
"loss": 0.5904, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.453092425295344, |
|
"grad_norm": 0.39136362075805664, |
|
"learning_rate": 0.00018734693877551023, |
|
"loss": 0.5214, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.45587213342599026, |
|
"grad_norm": 0.3859161138534546, |
|
"learning_rate": 0.00018714285714285716, |
|
"loss": 0.5011, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4586518415566366, |
|
"grad_norm": 0.5123438835144043, |
|
"learning_rate": 0.0001869387755102041, |
|
"loss": 0.6053, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.46143154968728284, |
|
"grad_norm": 0.466137170791626, |
|
"learning_rate": 0.00018673469387755102, |
|
"loss": 0.4499, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4642112578179291, |
|
"grad_norm": 0.4053160548210144, |
|
"learning_rate": 0.00018653061224489797, |
|
"loss": 0.5452, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4669909659485754, |
|
"grad_norm": 0.3608758747577667, |
|
"learning_rate": 0.0001863265306122449, |
|
"loss": 0.4941, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4697706740792217, |
|
"grad_norm": 0.36142799258232117, |
|
"learning_rate": 0.00018612244897959183, |
|
"loss": 0.5056, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.47255038220986795, |
|
"grad_norm": 0.2707204818725586, |
|
"learning_rate": 0.0001859183673469388, |
|
"loss": 0.3958, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47533009034051427, |
|
"grad_norm": 0.3678928017616272, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 0.47, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.47810979847116053, |
|
"grad_norm": 0.37397581338882446, |
|
"learning_rate": 0.00018551020408163267, |
|
"loss": 0.5774, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4808895066018068, |
|
"grad_norm": 0.3246667981147766, |
|
"learning_rate": 0.0001853061224489796, |
|
"loss": 0.4607, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4836692147324531, |
|
"grad_norm": 0.34554117918014526, |
|
"learning_rate": 0.00018510204081632653, |
|
"loss": 0.3862, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4864489228630994, |
|
"grad_norm": 0.356503963470459, |
|
"learning_rate": 0.00018489795918367346, |
|
"loss": 0.4969, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.48922863099374564, |
|
"grad_norm": 0.3777051866054535, |
|
"learning_rate": 0.00018469387755102042, |
|
"loss": 0.4337, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.49200833912439196, |
|
"grad_norm": 0.3148108422756195, |
|
"learning_rate": 0.00018448979591836735, |
|
"loss": 0.4244, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4947880472550382, |
|
"grad_norm": 0.49584245681762695, |
|
"learning_rate": 0.00018428571428571428, |
|
"loss": 0.5775, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4975677553856845, |
|
"grad_norm": 0.34690654277801514, |
|
"learning_rate": 0.00018408163265306123, |
|
"loss": 0.577, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5003474635163307, |
|
"grad_norm": 0.36074724793434143, |
|
"learning_rate": 0.0001838775510204082, |
|
"loss": 0.441, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5031271716469771, |
|
"grad_norm": 0.3747076392173767, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 0.5022, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5059068797776234, |
|
"grad_norm": 0.41926515102386475, |
|
"learning_rate": 0.00018346938775510205, |
|
"loss": 0.4178, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5086865879082696, |
|
"grad_norm": 0.32807132601737976, |
|
"learning_rate": 0.00018326530612244898, |
|
"loss": 0.4493, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5114662960389159, |
|
"grad_norm": 0.3409689962863922, |
|
"learning_rate": 0.00018306122448979593, |
|
"loss": 0.557, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5142460041695622, |
|
"grad_norm": 0.4119493365287781, |
|
"learning_rate": 0.00018285714285714286, |
|
"loss": 0.4969, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5170257123002084, |
|
"grad_norm": 0.2936202585697174, |
|
"learning_rate": 0.0001826530612244898, |
|
"loss": 0.4222, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5198054204308548, |
|
"grad_norm": 0.4165465235710144, |
|
"learning_rate": 0.00018244897959183672, |
|
"loss": 0.5563, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5225851285615011, |
|
"grad_norm": 0.34087347984313965, |
|
"learning_rate": 0.00018224489795918368, |
|
"loss": 0.5508, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5253648366921473, |
|
"grad_norm": 0.39741185307502747, |
|
"learning_rate": 0.00018204081632653064, |
|
"loss": 0.4635, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5281445448227936, |
|
"grad_norm": 0.373943954706192, |
|
"learning_rate": 0.00018183673469387757, |
|
"loss": 0.4667, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5309242529534399, |
|
"grad_norm": 0.3398171067237854, |
|
"learning_rate": 0.0001816326530612245, |
|
"loss": 0.488, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5337039610840861, |
|
"grad_norm": 0.5641401410102844, |
|
"learning_rate": 0.00018142857142857142, |
|
"loss": 0.377, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5364836692147325, |
|
"grad_norm": 0.47961774468421936, |
|
"learning_rate": 0.00018122448979591838, |
|
"loss": 0.5046, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5392633773453788, |
|
"grad_norm": 0.4699658155441284, |
|
"learning_rate": 0.0001810204081632653, |
|
"loss": 0.4936, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.542043085476025, |
|
"grad_norm": 0.2884581983089447, |
|
"learning_rate": 0.00018081632653061224, |
|
"loss": 0.4059, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5448227936066713, |
|
"grad_norm": 0.4616682827472687, |
|
"learning_rate": 0.00018061224489795917, |
|
"loss": 0.553, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5476025017373176, |
|
"grad_norm": 0.35249197483062744, |
|
"learning_rate": 0.00018040816326530615, |
|
"loss": 0.4637, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5503822098679638, |
|
"grad_norm": 0.4296030104160309, |
|
"learning_rate": 0.00018020408163265308, |
|
"loss": 0.5117, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5531619179986101, |
|
"grad_norm": 0.3835342228412628, |
|
"learning_rate": 0.00018, |
|
"loss": 0.5288, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5559416261292565, |
|
"grad_norm": 0.3516342043876648, |
|
"learning_rate": 0.00017979591836734694, |
|
"loss": 0.5322, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5587213342599027, |
|
"grad_norm": 0.4156709909439087, |
|
"learning_rate": 0.0001795918367346939, |
|
"loss": 0.4886, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.561501042390549, |
|
"grad_norm": 0.32229727506637573, |
|
"learning_rate": 0.00017938775510204083, |
|
"loss": 0.4377, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5642807505211953, |
|
"grad_norm": 0.384962260723114, |
|
"learning_rate": 0.00017918367346938776, |
|
"loss": 0.5345, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5670604586518415, |
|
"grad_norm": 0.41784927248954773, |
|
"learning_rate": 0.00017897959183673469, |
|
"loss": 0.5386, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5698401667824878, |
|
"grad_norm": 0.46640586853027344, |
|
"learning_rate": 0.00017877551020408164, |
|
"loss": 0.4544, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5726198749131342, |
|
"grad_norm": 0.34132063388824463, |
|
"learning_rate": 0.0001785714285714286, |
|
"loss": 0.454, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5753995830437804, |
|
"grad_norm": 0.461137592792511, |
|
"learning_rate": 0.00017836734693877553, |
|
"loss": 0.478, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5781792911744267, |
|
"grad_norm": 0.5866886377334595, |
|
"learning_rate": 0.00017816326530612246, |
|
"loss": 0.546, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.580958999305073, |
|
"grad_norm": 0.3185846209526062, |
|
"learning_rate": 0.0001779591836734694, |
|
"loss": 0.5385, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5837387074357192, |
|
"grad_norm": 0.43802475929260254, |
|
"learning_rate": 0.00017775510204081634, |
|
"loss": 0.554, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5865184155663655, |
|
"grad_norm": 0.2952940762042999, |
|
"learning_rate": 0.00017755102040816327, |
|
"loss": 0.4358, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5892981236970118, |
|
"grad_norm": 0.32370179891586304, |
|
"learning_rate": 0.0001773469387755102, |
|
"loss": 0.4418, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.592077831827658, |
|
"grad_norm": 0.38936758041381836, |
|
"learning_rate": 0.00017714285714285713, |
|
"loss": 0.409, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5948575399583044, |
|
"grad_norm": 0.3157341182231903, |
|
"learning_rate": 0.0001769387755102041, |
|
"loss": 0.4019, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5976372480889507, |
|
"grad_norm": 0.39846348762512207, |
|
"learning_rate": 0.00017673469387755104, |
|
"loss": 0.4804, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6004169562195969, |
|
"grad_norm": 0.3177434802055359, |
|
"learning_rate": 0.00017653061224489797, |
|
"loss": 0.4092, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6031966643502432, |
|
"grad_norm": 0.39317747950553894, |
|
"learning_rate": 0.0001763265306122449, |
|
"loss": 0.5265, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6059763724808895, |
|
"grad_norm": 0.26835039258003235, |
|
"learning_rate": 0.00017612244897959186, |
|
"loss": 0.3989, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6087560806115357, |
|
"grad_norm": 0.4008518159389496, |
|
"learning_rate": 0.0001759183673469388, |
|
"loss": 0.498, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6115357887421821, |
|
"grad_norm": 0.31940510869026184, |
|
"learning_rate": 0.00017571428571428572, |
|
"loss": 0.4493, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6143154968728284, |
|
"grad_norm": 0.43578922748565674, |
|
"learning_rate": 0.00017551020408163265, |
|
"loss": 0.4742, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6170952050034746, |
|
"grad_norm": 0.3231724500656128, |
|
"learning_rate": 0.0001753061224489796, |
|
"loss": 0.5019, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6198749131341209, |
|
"grad_norm": 0.30763351917266846, |
|
"learning_rate": 0.00017510204081632653, |
|
"loss": 0.6015, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6226546212647672, |
|
"grad_norm": 0.32532060146331787, |
|
"learning_rate": 0.0001748979591836735, |
|
"loss": 0.3062, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6254343293954134, |
|
"grad_norm": 0.39833390712738037, |
|
"learning_rate": 0.00017469387755102042, |
|
"loss": 0.3969, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6282140375260598, |
|
"grad_norm": 0.411516010761261, |
|
"learning_rate": 0.00017448979591836735, |
|
"loss": 0.4763, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6309937456567061, |
|
"grad_norm": 0.40843451023101807, |
|
"learning_rate": 0.0001742857142857143, |
|
"loss": 0.4998, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6337734537873523, |
|
"grad_norm": 0.39122384786605835, |
|
"learning_rate": 0.00017408163265306123, |
|
"loss": 0.4365, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6365531619179986, |
|
"grad_norm": 0.37201231718063354, |
|
"learning_rate": 0.00017387755102040816, |
|
"loss": 0.4427, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6393328700486449, |
|
"grad_norm": 0.4359400272369385, |
|
"learning_rate": 0.0001736734693877551, |
|
"loss": 0.4686, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6421125781792911, |
|
"grad_norm": 0.4254358112812042, |
|
"learning_rate": 0.00017346938775510205, |
|
"loss": 0.4274, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6448922863099374, |
|
"grad_norm": 0.383859783411026, |
|
"learning_rate": 0.00017326530612244898, |
|
"loss": 0.3683, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6476719944405838, |
|
"grad_norm": 0.41463732719421387, |
|
"learning_rate": 0.00017306122448979594, |
|
"loss": 0.4606, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.65045170257123, |
|
"grad_norm": 0.32726776599884033, |
|
"learning_rate": 0.00017285714285714287, |
|
"loss": 0.3883, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6532314107018763, |
|
"grad_norm": 0.4143235683441162, |
|
"learning_rate": 0.00017265306122448982, |
|
"loss": 0.5929, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6560111188325226, |
|
"grad_norm": 0.419869601726532, |
|
"learning_rate": 0.00017244897959183675, |
|
"loss": 0.527, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6587908269631688, |
|
"grad_norm": 0.4330616295337677, |
|
"learning_rate": 0.00017224489795918368, |
|
"loss": 0.523, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6615705350938151, |
|
"grad_norm": 0.42590218782424927, |
|
"learning_rate": 0.0001720408163265306, |
|
"loss": 0.4894, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6643502432244615, |
|
"grad_norm": 0.5940903425216675, |
|
"learning_rate": 0.00017183673469387757, |
|
"loss": 0.458, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6671299513551077, |
|
"grad_norm": 0.3962993025779724, |
|
"learning_rate": 0.0001716326530612245, |
|
"loss": 0.4934, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.669909659485754, |
|
"grad_norm": 0.3732195496559143, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.4779, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6726893676164003, |
|
"grad_norm": 0.39993205666542053, |
|
"learning_rate": 0.00017122448979591838, |
|
"loss": 0.3835, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6754690757470465, |
|
"grad_norm": 0.37663185596466064, |
|
"learning_rate": 0.0001710204081632653, |
|
"loss": 0.3876, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6782487838776928, |
|
"grad_norm": 0.33526360988616943, |
|
"learning_rate": 0.00017081632653061227, |
|
"loss": 0.4415, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6810284920083391, |
|
"grad_norm": 0.4333009421825409, |
|
"learning_rate": 0.0001706122448979592, |
|
"loss": 0.4897, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6838082001389854, |
|
"grad_norm": 0.4983868896961212, |
|
"learning_rate": 0.00017040816326530613, |
|
"loss": 0.4477, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6865879082696317, |
|
"grad_norm": 0.39352232217788696, |
|
"learning_rate": 0.00017020408163265306, |
|
"loss": 0.4524, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.689367616400278, |
|
"grad_norm": 0.40973153710365295, |
|
"learning_rate": 0.00017, |
|
"loss": 0.4884, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6921473245309242, |
|
"grad_norm": 0.33771470189094543, |
|
"learning_rate": 0.00016979591836734694, |
|
"loss": 0.4938, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6949270326615705, |
|
"grad_norm": 0.34451448917388916, |
|
"learning_rate": 0.00016959183673469387, |
|
"loss": 0.3722, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6977067407922168, |
|
"grad_norm": 0.38862481713294983, |
|
"learning_rate": 0.00016938775510204083, |
|
"loss": 0.426, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.700486448922863, |
|
"grad_norm": 0.41708311438560486, |
|
"learning_rate": 0.00016918367346938778, |
|
"loss": 0.5239, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7032661570535094, |
|
"grad_norm": 0.4317916929721832, |
|
"learning_rate": 0.0001689795918367347, |
|
"loss": 0.4556, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7060458651841557, |
|
"grad_norm": 0.3338056802749634, |
|
"learning_rate": 0.00016877551020408164, |
|
"loss": 0.3332, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7088255733148019, |
|
"grad_norm": 0.41748374700546265, |
|
"learning_rate": 0.00016857142857142857, |
|
"loss": 0.4594, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7116052814454482, |
|
"grad_norm": 0.4123172461986542, |
|
"learning_rate": 0.00016836734693877553, |
|
"loss": 0.4289, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7143849895760945, |
|
"grad_norm": 0.40377530455589294, |
|
"learning_rate": 0.00016816326530612246, |
|
"loss": 0.5744, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7171646977067407, |
|
"grad_norm": 0.369667649269104, |
|
"learning_rate": 0.0001679591836734694, |
|
"loss": 0.5848, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7199444058373871, |
|
"grad_norm": 0.39214134216308594, |
|
"learning_rate": 0.00016775510204081632, |
|
"loss": 0.3939, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7227241139680334, |
|
"grad_norm": 0.3648947775363922, |
|
"learning_rate": 0.00016755102040816327, |
|
"loss": 0.3941, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7255038220986796, |
|
"grad_norm": 0.3529266119003296, |
|
"learning_rate": 0.00016734693877551023, |
|
"loss": 0.3505, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7282835302293259, |
|
"grad_norm": 0.3326796889305115, |
|
"learning_rate": 0.00016714285714285716, |
|
"loss": 0.3973, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7310632383599722, |
|
"grad_norm": 0.37780439853668213, |
|
"learning_rate": 0.0001669387755102041, |
|
"loss": 0.4882, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7338429464906184, |
|
"grad_norm": 0.4995975196361542, |
|
"learning_rate": 0.00016673469387755102, |
|
"loss": 0.4417, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7366226546212647, |
|
"grad_norm": 0.40474021434783936, |
|
"learning_rate": 0.00016653061224489797, |
|
"loss": 0.4511, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7394023627519111, |
|
"grad_norm": 0.8704133629798889, |
|
"learning_rate": 0.0001663265306122449, |
|
"loss": 0.6664, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7421820708825573, |
|
"grad_norm": 0.7991705536842346, |
|
"learning_rate": 0.00016612244897959183, |
|
"loss": 0.5963, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7449617790132036, |
|
"grad_norm": 0.4240580201148987, |
|
"learning_rate": 0.00016591836734693876, |
|
"loss": 0.4474, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7477414871438499, |
|
"grad_norm": 0.4676007032394409, |
|
"learning_rate": 0.00016571428571428575, |
|
"loss": 0.5125, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7505211952744962, |
|
"grad_norm": 0.2894349694252014, |
|
"learning_rate": 0.00016551020408163268, |
|
"loss": 0.4297, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7533009034051424, |
|
"grad_norm": 0.4876716732978821, |
|
"learning_rate": 0.0001653061224489796, |
|
"loss": 0.391, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7560806115357888, |
|
"grad_norm": 0.37176764011383057, |
|
"learning_rate": 0.00016510204081632653, |
|
"loss": 0.4825, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7588603196664351, |
|
"grad_norm": 0.49970927834510803, |
|
"learning_rate": 0.0001648979591836735, |
|
"loss": 0.3729, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7616400277970813, |
|
"grad_norm": 0.3654176890850067, |
|
"learning_rate": 0.00016469387755102042, |
|
"loss": 0.4255, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7644197359277276, |
|
"grad_norm": 0.44572046399116516, |
|
"learning_rate": 0.00016448979591836735, |
|
"loss": 0.502, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7671994440583739, |
|
"grad_norm": 0.4408852159976959, |
|
"learning_rate": 0.00016428571428571428, |
|
"loss": 0.4563, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7699791521890201, |
|
"grad_norm": 0.36519378423690796, |
|
"learning_rate": 0.00016408163265306124, |
|
"loss": 0.4626, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7727588603196665, |
|
"grad_norm": 0.48397397994995117, |
|
"learning_rate": 0.0001638775510204082, |
|
"loss": 0.4365, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7755385684503128, |
|
"grad_norm": 0.39511287212371826, |
|
"learning_rate": 0.00016367346938775512, |
|
"loss": 0.4178, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.778318276580959, |
|
"grad_norm": 0.5128254890441895, |
|
"learning_rate": 0.00016346938775510205, |
|
"loss": 0.3998, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7810979847116053, |
|
"grad_norm": 0.5283316969871521, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 0.3428, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7838776928422516, |
|
"grad_norm": 0.4386744201183319, |
|
"learning_rate": 0.00016306122448979594, |
|
"loss": 0.3976, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7866574009728978, |
|
"grad_norm": 0.5863499641418457, |
|
"learning_rate": 0.00016285714285714287, |
|
"loss": 0.5529, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7894371091035441, |
|
"grad_norm": 0.27297189831733704, |
|
"learning_rate": 0.0001626530612244898, |
|
"loss": 0.3282, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7922168172341905, |
|
"grad_norm": 0.29970136284828186, |
|
"learning_rate": 0.00016244897959183672, |
|
"loss": 0.365, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7949965253648367, |
|
"grad_norm": 0.3835904598236084, |
|
"learning_rate": 0.00016224489795918368, |
|
"loss": 0.4768, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.797776233495483, |
|
"grad_norm": 0.4071420729160309, |
|
"learning_rate": 0.0001620408163265306, |
|
"loss": 0.5426, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8005559416261293, |
|
"grad_norm": 0.28784051537513733, |
|
"learning_rate": 0.00016183673469387757, |
|
"loss": 0.4108, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8033356497567755, |
|
"grad_norm": 0.4735048711299896, |
|
"learning_rate": 0.0001616326530612245, |
|
"loss": 0.5495, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8061153578874218, |
|
"grad_norm": 0.2624104917049408, |
|
"learning_rate": 0.00016142857142857145, |
|
"loss": 0.4767, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8088950660180682, |
|
"grad_norm": 0.38945189118385315, |
|
"learning_rate": 0.00016122448979591838, |
|
"loss": 0.5055, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8116747741487144, |
|
"grad_norm": 0.4181615114212036, |
|
"learning_rate": 0.0001610204081632653, |
|
"loss": 0.4497, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8144544822793607, |
|
"grad_norm": 0.37034186720848083, |
|
"learning_rate": 0.00016081632653061224, |
|
"loss": 0.3471, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.817234190410007, |
|
"grad_norm": 0.3208980858325958, |
|
"learning_rate": 0.0001606122448979592, |
|
"loss": 0.3868, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8200138985406532, |
|
"grad_norm": 0.4345311224460602, |
|
"learning_rate": 0.00016040816326530613, |
|
"loss": 0.439, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8227936066712995, |
|
"grad_norm": 0.31438905000686646, |
|
"learning_rate": 0.00016020408163265306, |
|
"loss": 0.4661, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8255733148019458, |
|
"grad_norm": 0.2713527977466583, |
|
"learning_rate": 0.00016, |
|
"loss": 0.3945, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.828353022932592, |
|
"grad_norm": 0.2937558591365814, |
|
"learning_rate": 0.00015979591836734694, |
|
"loss": 0.3287, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8311327310632384, |
|
"grad_norm": 0.37041494250297546, |
|
"learning_rate": 0.0001595918367346939, |
|
"loss": 0.5133, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8339124391938847, |
|
"grad_norm": 0.4814389646053314, |
|
"learning_rate": 0.00015938775510204083, |
|
"loss": 0.498, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8339124391938847, |
|
"eval_loss": 0.4747912585735321, |
|
"eval_runtime": 212.458, |
|
"eval_samples_per_second": 1.694, |
|
"eval_steps_per_second": 1.694, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8366921473245309, |
|
"grad_norm": 0.3283718526363373, |
|
"learning_rate": 0.00015918367346938776, |
|
"loss": 0.4485, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8394718554551772, |
|
"grad_norm": 0.38611340522766113, |
|
"learning_rate": 0.0001589795918367347, |
|
"loss": 0.4188, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8422515635858235, |
|
"grad_norm": 0.29100102186203003, |
|
"learning_rate": 0.00015877551020408164, |
|
"loss": 0.3489, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8450312717164697, |
|
"grad_norm": 0.32593274116516113, |
|
"learning_rate": 0.00015857142857142857, |
|
"loss": 0.391, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8478109798471161, |
|
"grad_norm": 0.5677832365036011, |
|
"learning_rate": 0.0001583673469387755, |
|
"loss": 0.4837, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8505906879777624, |
|
"grad_norm": 0.334756463766098, |
|
"learning_rate": 0.00015816326530612246, |
|
"loss": 0.3565, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8533703961084086, |
|
"grad_norm": 0.5696679353713989, |
|
"learning_rate": 0.00015795918367346942, |
|
"loss": 0.4759, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8561501042390549, |
|
"grad_norm": 0.25399741530418396, |
|
"learning_rate": 0.00015775510204081634, |
|
"loss": 0.4281, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8589298123697012, |
|
"grad_norm": 0.3591265380382538, |
|
"learning_rate": 0.00015755102040816327, |
|
"loss": 0.4464, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8617095205003474, |
|
"grad_norm": 0.3444579839706421, |
|
"learning_rate": 0.0001573469387755102, |
|
"loss": 0.4565, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8644892286309938, |
|
"grad_norm": 0.3792060315608978, |
|
"learning_rate": 0.00015714285714285716, |
|
"loss": 0.5026, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8672689367616401, |
|
"grad_norm": 0.2783966362476349, |
|
"learning_rate": 0.0001569387755102041, |
|
"loss": 0.4912, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8700486448922863, |
|
"grad_norm": 0.31529495120048523, |
|
"learning_rate": 0.00015673469387755102, |
|
"loss": 0.3993, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8728283530229326, |
|
"grad_norm": 0.3652310073375702, |
|
"learning_rate": 0.00015653061224489795, |
|
"loss": 0.5009, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8756080611535789, |
|
"grad_norm": 0.3820590078830719, |
|
"learning_rate": 0.0001563265306122449, |
|
"loss": 0.3424, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8783877692842251, |
|
"grad_norm": 0.3363693654537201, |
|
"learning_rate": 0.00015612244897959186, |
|
"loss": 0.5251, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8811674774148714, |
|
"grad_norm": 0.31948599219322205, |
|
"learning_rate": 0.0001559183673469388, |
|
"loss": 0.4199, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8839471855455178, |
|
"grad_norm": 0.4196965992450714, |
|
"learning_rate": 0.00015571428571428572, |
|
"loss": 0.493, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.886726893676164, |
|
"grad_norm": 0.4823121428489685, |
|
"learning_rate": 0.00015551020408163265, |
|
"loss": 0.43, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8895066018068103, |
|
"grad_norm": 0.32050636410713196, |
|
"learning_rate": 0.0001553061224489796, |
|
"loss": 0.4247, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8922863099374566, |
|
"grad_norm": 0.3208867609500885, |
|
"learning_rate": 0.00015510204081632654, |
|
"loss": 0.3224, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8950660180681028, |
|
"grad_norm": 0.36326608061790466, |
|
"learning_rate": 0.00015489795918367346, |
|
"loss": 0.4366, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8978457261987491, |
|
"grad_norm": 0.44612210988998413, |
|
"learning_rate": 0.0001546938775510204, |
|
"loss": 0.4212, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9006254343293955, |
|
"grad_norm": 0.5350055694580078, |
|
"learning_rate": 0.00015448979591836735, |
|
"loss": 0.42, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9034051424600417, |
|
"grad_norm": 0.5650726556777954, |
|
"learning_rate": 0.0001542857142857143, |
|
"loss": 0.5001, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.906184850590688, |
|
"grad_norm": 0.2960895001888275, |
|
"learning_rate": 0.00015408163265306124, |
|
"loss": 0.2943, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9089645587213343, |
|
"grad_norm": 0.4059947729110718, |
|
"learning_rate": 0.00015387755102040817, |
|
"loss": 0.4147, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9117442668519805, |
|
"grad_norm": 0.31508710980415344, |
|
"learning_rate": 0.00015367346938775512, |
|
"loss": 0.3567, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9145239749826268, |
|
"grad_norm": 0.30250322818756104, |
|
"learning_rate": 0.00015346938775510205, |
|
"loss": 0.4534, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9173036831132731, |
|
"grad_norm": 0.45266756415367126, |
|
"learning_rate": 0.00015326530612244898, |
|
"loss": 0.4675, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9200833912439194, |
|
"grad_norm": 0.2555678188800812, |
|
"learning_rate": 0.0001530612244897959, |
|
"loss": 0.4132, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9228630993745657, |
|
"grad_norm": 0.3064277768135071, |
|
"learning_rate": 0.00015285714285714287, |
|
"loss": 0.5, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.925642807505212, |
|
"grad_norm": 0.40955591201782227, |
|
"learning_rate": 0.00015265306122448982, |
|
"loss": 0.4131, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9284225156358582, |
|
"grad_norm": 0.32479333877563477, |
|
"learning_rate": 0.00015244897959183675, |
|
"loss": 0.4303, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9312022237665045, |
|
"grad_norm": 0.4044603705406189, |
|
"learning_rate": 0.00015224489795918368, |
|
"loss": 0.4498, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9339819318971508, |
|
"grad_norm": 0.4078894555568695, |
|
"learning_rate": 0.0001520408163265306, |
|
"loss": 0.4442, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.936761640027797, |
|
"grad_norm": 0.4486389756202698, |
|
"learning_rate": 0.00015183673469387757, |
|
"loss": 0.3902, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9395413481584434, |
|
"grad_norm": 0.3673665523529053, |
|
"learning_rate": 0.0001516326530612245, |
|
"loss": 0.4158, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9423210562890897, |
|
"grad_norm": 0.28217577934265137, |
|
"learning_rate": 0.00015142857142857143, |
|
"loss": 0.4327, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9451007644197359, |
|
"grad_norm": 0.30868950486183167, |
|
"learning_rate": 0.00015122448979591836, |
|
"loss": 0.411, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9478804725503822, |
|
"grad_norm": 0.27955666184425354, |
|
"learning_rate": 0.0001510204081632653, |
|
"loss": 0.4125, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9506601806810285, |
|
"grad_norm": 0.49935731291770935, |
|
"learning_rate": 0.00015081632653061227, |
|
"loss": 0.369, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9534398888116747, |
|
"grad_norm": 0.26663827896118164, |
|
"learning_rate": 0.0001506122448979592, |
|
"loss": 0.3864, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9562195969423211, |
|
"grad_norm": 0.32989761233329773, |
|
"learning_rate": 0.00015040816326530613, |
|
"loss": 0.3859, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9589993050729674, |
|
"grad_norm": 0.37399861216545105, |
|
"learning_rate": 0.00015020408163265306, |
|
"loss": 0.4813, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9617790132036136, |
|
"grad_norm": 0.3359721302986145, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.4135, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9645587213342599, |
|
"grad_norm": 0.29389145970344543, |
|
"learning_rate": 0.00014979591836734694, |
|
"loss": 0.407, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9673384294649062, |
|
"grad_norm": 0.39862900972366333, |
|
"learning_rate": 0.00014959183673469387, |
|
"loss": 0.4659, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9701181375955524, |
|
"grad_norm": 0.36769983172416687, |
|
"learning_rate": 0.00014938775510204083, |
|
"loss": 0.4076, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9728978457261988, |
|
"grad_norm": 0.29756975173950195, |
|
"learning_rate": 0.00014918367346938776, |
|
"loss": 0.3922, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9756775538568451, |
|
"grad_norm": 0.4466356337070465, |
|
"learning_rate": 0.00014897959183673472, |
|
"loss": 0.4123, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9784572619874913, |
|
"grad_norm": 0.5409598350524902, |
|
"learning_rate": 0.00014877551020408164, |
|
"loss": 0.2904, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9812369701181376, |
|
"grad_norm": 0.5088945627212524, |
|
"learning_rate": 0.00014857142857142857, |
|
"loss": 0.3555, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9840166782487839, |
|
"grad_norm": 0.41926005482673645, |
|
"learning_rate": 0.00014836734693877553, |
|
"loss": 0.3168, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9867963863794301, |
|
"grad_norm": 0.3465840220451355, |
|
"learning_rate": 0.00014816326530612246, |
|
"loss": 0.4542, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9895760945100764, |
|
"grad_norm": 0.29874542355537415, |
|
"learning_rate": 0.0001479591836734694, |
|
"loss": 0.4541, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9923558026407228, |
|
"grad_norm": 0.32591426372528076, |
|
"learning_rate": 0.00014775510204081632, |
|
"loss": 0.3355, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.995135510771369, |
|
"grad_norm": 0.6298475861549377, |
|
"learning_rate": 0.00014755102040816328, |
|
"loss": 0.4996, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9979152189020153, |
|
"grad_norm": 0.6214368939399719, |
|
"learning_rate": 0.0001473469387755102, |
|
"loss": 0.5361, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.29986897110939026, |
|
"learning_rate": 0.00014714285714285716, |
|
"loss": 0.2962, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0027797081306462, |
|
"grad_norm": 0.27898886799812317, |
|
"learning_rate": 0.0001469387755102041, |
|
"loss": 0.3485, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.0055594162612926, |
|
"grad_norm": 0.3615298569202423, |
|
"learning_rate": 0.00014673469387755102, |
|
"loss": 0.4651, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0083391243919388, |
|
"grad_norm": 0.31298184394836426, |
|
"learning_rate": 0.00014653061224489798, |
|
"loss": 0.4272, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.011118832522585, |
|
"grad_norm": 0.27693745493888855, |
|
"learning_rate": 0.0001463265306122449, |
|
"loss": 0.36, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.0138985406532315, |
|
"grad_norm": 0.3590083122253418, |
|
"learning_rate": 0.00014612244897959183, |
|
"loss": 0.4548, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0166782487838777, |
|
"grad_norm": 0.3620007336139679, |
|
"learning_rate": 0.0001459183673469388, |
|
"loss": 0.3749, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.019457956914524, |
|
"grad_norm": 0.46811267733573914, |
|
"learning_rate": 0.00014571428571428572, |
|
"loss": 0.4619, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.0222376650451703, |
|
"grad_norm": 0.3233739137649536, |
|
"learning_rate": 0.00014551020408163265, |
|
"loss": 0.4955, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0250173731758165, |
|
"grad_norm": 0.3408876061439514, |
|
"learning_rate": 0.0001453061224489796, |
|
"loss": 0.3564, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.0277970813064627, |
|
"grad_norm": 0.33044156432151794, |
|
"learning_rate": 0.00014510204081632654, |
|
"loss": 0.4784, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0305767894371092, |
|
"grad_norm": 0.4836321175098419, |
|
"learning_rate": 0.0001448979591836735, |
|
"loss": 0.2986, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.0333564975677554, |
|
"grad_norm": 0.3213842213153839, |
|
"learning_rate": 0.00014469387755102042, |
|
"loss": 0.3768, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.0361362056984016, |
|
"grad_norm": 0.4250502288341522, |
|
"learning_rate": 0.00014448979591836735, |
|
"loss": 0.4024, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.038915913829048, |
|
"grad_norm": 0.5815131068229675, |
|
"learning_rate": 0.00014428571428571428, |
|
"loss": 0.4631, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.0416956219596942, |
|
"grad_norm": 0.3646114468574524, |
|
"learning_rate": 0.00014408163265306124, |
|
"loss": 0.4658, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0444753300903404, |
|
"grad_norm": 0.4233134388923645, |
|
"learning_rate": 0.00014387755102040817, |
|
"loss": 0.4287, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.0472550382209869, |
|
"grad_norm": 0.29038846492767334, |
|
"learning_rate": 0.0001436734693877551, |
|
"loss": 0.3537, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.050034746351633, |
|
"grad_norm": 0.3281858265399933, |
|
"learning_rate": 0.00014346938775510205, |
|
"loss": 0.3594, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.0528144544822793, |
|
"grad_norm": 0.3003385066986084, |
|
"learning_rate": 0.00014326530612244898, |
|
"loss": 0.429, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.0555941626129257, |
|
"grad_norm": 0.42301732301712036, |
|
"learning_rate": 0.00014306122448979594, |
|
"loss": 0.3517, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.058373870743572, |
|
"grad_norm": 0.3951142728328705, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.3341, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.0611535788742181, |
|
"grad_norm": 0.3241204023361206, |
|
"learning_rate": 0.0001426530612244898, |
|
"loss": 0.4393, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0639332870048646, |
|
"grad_norm": 0.37029820680618286, |
|
"learning_rate": 0.00014244897959183673, |
|
"loss": 0.2978, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0667129951355108, |
|
"grad_norm": 0.2742837965488434, |
|
"learning_rate": 0.00014224489795918368, |
|
"loss": 0.3306, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.069492703266157, |
|
"grad_norm": 0.26682788133621216, |
|
"learning_rate": 0.0001420408163265306, |
|
"loss": 0.4009, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0722724113968034, |
|
"grad_norm": 0.3267010450363159, |
|
"learning_rate": 0.00014183673469387754, |
|
"loss": 0.303, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0750521195274496, |
|
"grad_norm": 0.3354584276676178, |
|
"learning_rate": 0.0001416326530612245, |
|
"loss": 0.3721, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0778318276580958, |
|
"grad_norm": 0.33933278918266296, |
|
"learning_rate": 0.00014142857142857145, |
|
"loss": 0.437, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0806115357887423, |
|
"grad_norm": 0.6241405606269836, |
|
"learning_rate": 0.00014122448979591838, |
|
"loss": 0.4561, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0833912439193885, |
|
"grad_norm": 0.6265623569488525, |
|
"learning_rate": 0.00014102040816326531, |
|
"loss": 0.5705, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0861709520500347, |
|
"grad_norm": 0.4538445770740509, |
|
"learning_rate": 0.00014081632653061224, |
|
"loss": 0.3084, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.088950660180681, |
|
"grad_norm": 0.2949851453304291, |
|
"learning_rate": 0.0001406122448979592, |
|
"loss": 0.3774, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0917303683113273, |
|
"grad_norm": 0.25528761744499207, |
|
"learning_rate": 0.00014040816326530613, |
|
"loss": 0.362, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0945100764419735, |
|
"grad_norm": 0.25837084650993347, |
|
"learning_rate": 0.00014020408163265306, |
|
"loss": 0.341, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.09728978457262, |
|
"grad_norm": 0.3381750285625458, |
|
"learning_rate": 0.00014, |
|
"loss": 0.409, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.1000694927032661, |
|
"grad_norm": 0.296481728553772, |
|
"learning_rate": 0.00013979591836734694, |
|
"loss": 0.3378, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1028492008339124, |
|
"grad_norm": 0.2741848826408386, |
|
"learning_rate": 0.0001395918367346939, |
|
"loss": 0.3112, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.1056289089645588, |
|
"grad_norm": 0.2974790036678314, |
|
"learning_rate": 0.00013938775510204083, |
|
"loss": 0.434, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.108408617095205, |
|
"grad_norm": 0.5039945840835571, |
|
"learning_rate": 0.00013918367346938776, |
|
"loss": 0.4185, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.1111883252258512, |
|
"grad_norm": 0.3443140387535095, |
|
"learning_rate": 0.0001389795918367347, |
|
"loss": 0.2975, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1139680333564976, |
|
"grad_norm": 0.38616564869880676, |
|
"learning_rate": 0.00013877551020408165, |
|
"loss": 0.5452, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.1167477414871438, |
|
"grad_norm": 0.34680166840553284, |
|
"learning_rate": 0.00013857142857142857, |
|
"loss": 0.3738, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.11952744961779, |
|
"grad_norm": 0.30565282702445984, |
|
"learning_rate": 0.0001383673469387755, |
|
"loss": 0.4436, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1223071577484365, |
|
"grad_norm": 0.3291468322277069, |
|
"learning_rate": 0.00013816326530612243, |
|
"loss": 0.369, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.1250868658790827, |
|
"grad_norm": 0.36906489729881287, |
|
"learning_rate": 0.00013795918367346942, |
|
"loss": 0.4367, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.127866574009729, |
|
"grad_norm": 0.2758554220199585, |
|
"learning_rate": 0.00013775510204081635, |
|
"loss": 0.437, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.1306462821403753, |
|
"grad_norm": 0.3159145414829254, |
|
"learning_rate": 0.00013755102040816328, |
|
"loss": 0.4224, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.1334259902710215, |
|
"grad_norm": 0.25819459557533264, |
|
"learning_rate": 0.0001373469387755102, |
|
"loss": 0.331, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.1362056984016677, |
|
"grad_norm": 0.33512744307518005, |
|
"learning_rate": 0.00013714285714285716, |
|
"loss": 0.4196, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.1389854065323142, |
|
"grad_norm": 0.3556046187877655, |
|
"learning_rate": 0.0001369387755102041, |
|
"loss": 0.3745, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.1417651146629604, |
|
"grad_norm": 0.48709914088249207, |
|
"learning_rate": 0.00013673469387755102, |
|
"loss": 0.346, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.1445448227936066, |
|
"grad_norm": 0.3741767704486847, |
|
"learning_rate": 0.00013653061224489795, |
|
"loss": 0.4871, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.147324530924253, |
|
"grad_norm": 0.4699570834636688, |
|
"learning_rate": 0.0001363265306122449, |
|
"loss": 0.3674, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.1501042390548992, |
|
"grad_norm": 0.32232385873794556, |
|
"learning_rate": 0.00013612244897959184, |
|
"loss": 0.3821, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.1528839471855454, |
|
"grad_norm": 0.34662458300590515, |
|
"learning_rate": 0.0001359183673469388, |
|
"loss": 0.3121, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.1556636553161919, |
|
"grad_norm": 0.32288941740989685, |
|
"learning_rate": 0.00013571428571428572, |
|
"loss": 0.3808, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.158443363446838, |
|
"grad_norm": 0.3495519459247589, |
|
"learning_rate": 0.00013551020408163265, |
|
"loss": 0.352, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.1612230715774843, |
|
"grad_norm": 0.3813597559928894, |
|
"learning_rate": 0.0001353061224489796, |
|
"loss": 0.4044, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.1640027797081307, |
|
"grad_norm": 0.2824418246746063, |
|
"learning_rate": 0.00013510204081632654, |
|
"loss": 0.3902, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.166782487838777, |
|
"grad_norm": 0.25736352801322937, |
|
"learning_rate": 0.00013489795918367347, |
|
"loss": 0.3357, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1695621959694231, |
|
"grad_norm": 0.33888882398605347, |
|
"learning_rate": 0.0001346938775510204, |
|
"loss": 0.3691, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.1723419041000696, |
|
"grad_norm": 0.3581472933292389, |
|
"learning_rate": 0.00013448979591836735, |
|
"loss": 0.3576, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.1751216122307158, |
|
"grad_norm": 0.3296295702457428, |
|
"learning_rate": 0.00013428571428571428, |
|
"loss": 0.3832, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.177901320361362, |
|
"grad_norm": 0.33110418915748596, |
|
"learning_rate": 0.00013408163265306124, |
|
"loss": 0.3247, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.1806810284920084, |
|
"grad_norm": 0.3340078592300415, |
|
"learning_rate": 0.00013387755102040817, |
|
"loss": 0.3718, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1834607366226546, |
|
"grad_norm": 0.3025374114513397, |
|
"learning_rate": 0.00013367346938775512, |
|
"loss": 0.3766, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.1862404447533008, |
|
"grad_norm": 0.2760503590106964, |
|
"learning_rate": 0.00013346938775510205, |
|
"loss": 0.3311, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1890201528839472, |
|
"grad_norm": 0.27493569254875183, |
|
"learning_rate": 0.00013326530612244898, |
|
"loss": 0.3487, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1917998610145935, |
|
"grad_norm": 0.27025917172431946, |
|
"learning_rate": 0.0001330612244897959, |
|
"loss": 0.3817, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.1945795691452397, |
|
"grad_norm": 0.6159951686859131, |
|
"learning_rate": 0.00013285714285714287, |
|
"loss": 0.5815, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.197359277275886, |
|
"grad_norm": 0.30732738971710205, |
|
"learning_rate": 0.0001326530612244898, |
|
"loss": 0.3646, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.2001389854065323, |
|
"grad_norm": 0.3065405786037445, |
|
"learning_rate": 0.00013244897959183673, |
|
"loss": 0.4138, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2029186935371785, |
|
"grad_norm": 0.4519249498844147, |
|
"learning_rate": 0.00013224489795918368, |
|
"loss": 0.5033, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.205698401667825, |
|
"grad_norm": 0.2661092281341553, |
|
"learning_rate": 0.0001320408163265306, |
|
"loss": 0.3642, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.2084781097984711, |
|
"grad_norm": 0.27004894614219666, |
|
"learning_rate": 0.00013183673469387757, |
|
"loss": 0.3814, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.2112578179291174, |
|
"grad_norm": 0.39225614070892334, |
|
"learning_rate": 0.0001316326530612245, |
|
"loss": 0.4773, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.2140375260597638, |
|
"grad_norm": 0.2685422897338867, |
|
"learning_rate": 0.00013142857142857143, |
|
"loss": 0.3679, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.21681723419041, |
|
"grad_norm": 0.32003405690193176, |
|
"learning_rate": 0.00013122448979591836, |
|
"loss": 0.4101, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.2195969423210562, |
|
"grad_norm": 0.2831343114376068, |
|
"learning_rate": 0.00013102040816326531, |
|
"loss": 0.3832, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.2223766504517026, |
|
"grad_norm": 0.327888548374176, |
|
"learning_rate": 0.00013081632653061224, |
|
"loss": 0.427, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2251563585823488, |
|
"grad_norm": 0.37563320994377136, |
|
"learning_rate": 0.00013061224489795917, |
|
"loss": 0.4982, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.227936066712995, |
|
"grad_norm": 0.374957799911499, |
|
"learning_rate": 0.00013040816326530613, |
|
"loss": 0.3724, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.2307157748436415, |
|
"grad_norm": 0.3069044053554535, |
|
"learning_rate": 0.00013020408163265309, |
|
"loss": 0.4736, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.2334954829742877, |
|
"grad_norm": 0.2836786210536957, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 0.3139, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.236275191104934, |
|
"grad_norm": 0.5021543502807617, |
|
"learning_rate": 0.00012979591836734695, |
|
"loss": 0.4687, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2390548992355803, |
|
"grad_norm": 0.34825125336647034, |
|
"learning_rate": 0.00012959183673469387, |
|
"loss": 0.4624, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.2418346073662265, |
|
"grad_norm": 0.2713720202445984, |
|
"learning_rate": 0.00012938775510204083, |
|
"loss": 0.3841, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.2446143154968727, |
|
"grad_norm": 0.2995285093784332, |
|
"learning_rate": 0.00012918367346938776, |
|
"loss": 0.4582, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.2473940236275192, |
|
"grad_norm": 0.2539880871772766, |
|
"learning_rate": 0.0001289795918367347, |
|
"loss": 0.3232, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.2501737317581654, |
|
"grad_norm": 0.24956463277339935, |
|
"learning_rate": 0.00012877551020408162, |
|
"loss": 0.3282, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2529534398888118, |
|
"grad_norm": 0.23669935762882233, |
|
"learning_rate": 0.00012857142857142858, |
|
"loss": 0.3036, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.255733148019458, |
|
"grad_norm": 0.31620267033576965, |
|
"learning_rate": 0.00012836734693877553, |
|
"loss": 0.4203, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.2585128561501042, |
|
"grad_norm": 0.37676119804382324, |
|
"learning_rate": 0.00012816326530612246, |
|
"loss": 0.4163, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.2612925642807506, |
|
"grad_norm": 0.2864663004875183, |
|
"learning_rate": 0.0001279591836734694, |
|
"loss": 0.4024, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.2640722724113969, |
|
"grad_norm": 0.27227628231048584, |
|
"learning_rate": 0.00012775510204081632, |
|
"loss": 0.32, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.266851980542043, |
|
"grad_norm": 0.402497261762619, |
|
"learning_rate": 0.00012755102040816328, |
|
"loss": 0.4626, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.2696316886726895, |
|
"grad_norm": 0.3603473901748657, |
|
"learning_rate": 0.0001273469387755102, |
|
"loss": 0.3218, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.2724113968033357, |
|
"grad_norm": 0.2873530685901642, |
|
"learning_rate": 0.00012714285714285714, |
|
"loss": 0.4026, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.275191104933982, |
|
"grad_norm": 0.29491978883743286, |
|
"learning_rate": 0.00012693877551020406, |
|
"loss": 0.2958, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.2779708130646283, |
|
"grad_norm": 0.3286297917366028, |
|
"learning_rate": 0.00012673469387755105, |
|
"loss": 0.3922, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2807505211952745, |
|
"grad_norm": 0.3373448848724365, |
|
"learning_rate": 0.00012653061224489798, |
|
"loss": 0.4216, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.2835302293259208, |
|
"grad_norm": 0.305073082447052, |
|
"learning_rate": 0.0001263265306122449, |
|
"loss": 0.3676, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.2863099374565672, |
|
"grad_norm": 0.3803780674934387, |
|
"learning_rate": 0.00012612244897959184, |
|
"loss": 0.3714, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.2890896455872134, |
|
"grad_norm": 0.38346725702285767, |
|
"learning_rate": 0.0001259183673469388, |
|
"loss": 0.4816, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.2918693537178596, |
|
"grad_norm": 0.3128606379032135, |
|
"learning_rate": 0.00012571428571428572, |
|
"loss": 0.4051, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.294649061848506, |
|
"grad_norm": 0.2748924493789673, |
|
"learning_rate": 0.00012551020408163265, |
|
"loss": 0.383, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.2974287699791522, |
|
"grad_norm": 0.2946728765964508, |
|
"learning_rate": 0.00012530612244897958, |
|
"loss": 0.3495, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.3002084781097984, |
|
"grad_norm": 0.4133062958717346, |
|
"learning_rate": 0.00012510204081632654, |
|
"loss": 0.4196, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.3029881862404449, |
|
"grad_norm": 0.2911101281642914, |
|
"learning_rate": 0.0001248979591836735, |
|
"loss": 0.3733, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.305767894371091, |
|
"grad_norm": 0.35561490058898926, |
|
"learning_rate": 0.00012469387755102042, |
|
"loss": 0.4773, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.3085476025017373, |
|
"grad_norm": 0.23788857460021973, |
|
"learning_rate": 0.00012448979591836735, |
|
"loss": 0.3111, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.3113273106323837, |
|
"grad_norm": 0.3194721043109894, |
|
"learning_rate": 0.00012428571428571428, |
|
"loss": 0.3641, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.31410701876303, |
|
"grad_norm": 0.30017781257629395, |
|
"learning_rate": 0.00012408163265306124, |
|
"loss": 0.3239, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.3168867268936761, |
|
"grad_norm": 0.2877587676048279, |
|
"learning_rate": 0.00012387755102040817, |
|
"loss": 0.3622, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.3196664350243226, |
|
"grad_norm": 0.3146721422672272, |
|
"learning_rate": 0.0001236734693877551, |
|
"loss": 0.3629, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3224461431549688, |
|
"grad_norm": 0.28086069226264954, |
|
"learning_rate": 0.00012346938775510203, |
|
"loss": 0.3967, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.325225851285615, |
|
"grad_norm": 0.2777217626571655, |
|
"learning_rate": 0.00012326530612244898, |
|
"loss": 0.3531, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.3280055594162614, |
|
"grad_norm": 0.2867282032966614, |
|
"learning_rate": 0.00012306122448979594, |
|
"loss": 0.293, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.3307852675469076, |
|
"grad_norm": 0.3004007339477539, |
|
"learning_rate": 0.00012285714285714287, |
|
"loss": 0.3577, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.3335649756775538, |
|
"grad_norm": 0.32095468044281006, |
|
"learning_rate": 0.0001226530612244898, |
|
"loss": 0.4233, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3363446838082003, |
|
"grad_norm": 0.2812075614929199, |
|
"learning_rate": 0.00012244897959183676, |
|
"loss": 0.3405, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.3391243919388465, |
|
"grad_norm": 0.38106903433799744, |
|
"learning_rate": 0.00012224489795918368, |
|
"loss": 0.3918, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.3419041000694927, |
|
"grad_norm": 0.3127501606941223, |
|
"learning_rate": 0.00012204081632653061, |
|
"loss": 0.3959, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.344683808200139, |
|
"grad_norm": 0.31547605991363525, |
|
"learning_rate": 0.00012183673469387756, |
|
"loss": 0.4783, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.3474635163307853, |
|
"grad_norm": 0.3430207669734955, |
|
"learning_rate": 0.00012163265306122449, |
|
"loss": 0.4181, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.3502432244614315, |
|
"grad_norm": 0.30673524737358093, |
|
"learning_rate": 0.00012142857142857143, |
|
"loss": 0.4545, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.353022932592078, |
|
"grad_norm": 0.2744535505771637, |
|
"learning_rate": 0.00012122448979591839, |
|
"loss": 0.3568, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.3558026407227242, |
|
"grad_norm": 0.28088897466659546, |
|
"learning_rate": 0.00012102040816326532, |
|
"loss": 0.2957, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.3585823488533704, |
|
"grad_norm": 0.2807769775390625, |
|
"learning_rate": 0.00012081632653061226, |
|
"loss": 0.316, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.3613620569840168, |
|
"grad_norm": 0.33393171429634094, |
|
"learning_rate": 0.00012061224489795919, |
|
"loss": 0.3952, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.364141765114663, |
|
"grad_norm": 0.270470529794693, |
|
"learning_rate": 0.00012040816326530613, |
|
"loss": 0.3263, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.3669214732453092, |
|
"grad_norm": 0.26981666684150696, |
|
"learning_rate": 0.00012020408163265306, |
|
"loss": 0.3758, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.3697011813759556, |
|
"grad_norm": 0.41900643706321716, |
|
"learning_rate": 0.00012, |
|
"loss": 0.4978, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.3724808895066019, |
|
"grad_norm": 0.30232396721839905, |
|
"learning_rate": 0.00011979591836734693, |
|
"loss": 0.3857, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.375260597637248, |
|
"grad_norm": 0.302409827709198, |
|
"learning_rate": 0.00011959183673469388, |
|
"loss": 0.4455, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.3780403057678945, |
|
"grad_norm": 0.30566728115081787, |
|
"learning_rate": 0.00011938775510204083, |
|
"loss": 0.3365, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.3808200138985407, |
|
"grad_norm": 0.2815055847167969, |
|
"learning_rate": 0.00011918367346938777, |
|
"loss": 0.395, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.383599722029187, |
|
"grad_norm": 0.27659547328948975, |
|
"learning_rate": 0.0001189795918367347, |
|
"loss": 0.3635, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.3863794301598333, |
|
"grad_norm": 0.2691234052181244, |
|
"learning_rate": 0.00011877551020408165, |
|
"loss": 0.354, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.3891591382904795, |
|
"grad_norm": 0.26110008358955383, |
|
"learning_rate": 0.00011857142857142858, |
|
"loss": 0.3056, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3919388464211258, |
|
"grad_norm": 0.3188588619232178, |
|
"learning_rate": 0.00011836734693877552, |
|
"loss": 0.3483, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.3947185545517722, |
|
"grad_norm": 0.26784244179725647, |
|
"learning_rate": 0.00011816326530612245, |
|
"loss": 0.4165, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.3974982626824184, |
|
"grad_norm": 0.27212825417518616, |
|
"learning_rate": 0.00011795918367346939, |
|
"loss": 0.3358, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.4002779708130646, |
|
"grad_norm": 0.27269822359085083, |
|
"learning_rate": 0.00011775510204081632, |
|
"loss": 0.3894, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.403057678943711, |
|
"grad_norm": 0.33584582805633545, |
|
"learning_rate": 0.00011755102040816328, |
|
"loss": 0.3801, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.4058373870743572, |
|
"grad_norm": 0.34887197613716125, |
|
"learning_rate": 0.00011734693877551022, |
|
"loss": 0.4057, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.4086170952050034, |
|
"grad_norm": 0.2805575728416443, |
|
"learning_rate": 0.00011714285714285715, |
|
"loss": 0.3556, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.4113968033356499, |
|
"grad_norm": 0.3376087546348572, |
|
"learning_rate": 0.00011693877551020409, |
|
"loss": 0.3511, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.414176511466296, |
|
"grad_norm": 0.29646357893943787, |
|
"learning_rate": 0.00011673469387755102, |
|
"loss": 0.3396, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.4169562195969423, |
|
"grad_norm": 0.2987593710422516, |
|
"learning_rate": 0.00011653061224489797, |
|
"loss": 0.3152, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.4197359277275887, |
|
"grad_norm": 0.27871114015579224, |
|
"learning_rate": 0.0001163265306122449, |
|
"loss": 0.3039, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.422515635858235, |
|
"grad_norm": 0.3051932752132416, |
|
"learning_rate": 0.00011612244897959184, |
|
"loss": 0.3619, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.4252953439888811, |
|
"grad_norm": 0.31153935194015503, |
|
"learning_rate": 0.00011591836734693877, |
|
"loss": 0.4433, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.4280750521195276, |
|
"grad_norm": 0.30891695618629456, |
|
"learning_rate": 0.00011571428571428574, |
|
"loss": 0.4737, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.4308547602501738, |
|
"grad_norm": 0.2646159529685974, |
|
"learning_rate": 0.00011551020408163267, |
|
"loss": 0.3155, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.43363446838082, |
|
"grad_norm": 0.31023016571998596, |
|
"learning_rate": 0.00011530612244897961, |
|
"loss": 0.468, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.4364141765114664, |
|
"grad_norm": 0.32484954595565796, |
|
"learning_rate": 0.00011510204081632654, |
|
"loss": 0.4442, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.4391938846421126, |
|
"grad_norm": 0.30216851830482483, |
|
"learning_rate": 0.00011489795918367348, |
|
"loss": 0.3959, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.4419735927727588, |
|
"grad_norm": 0.3013352155685425, |
|
"learning_rate": 0.00011469387755102041, |
|
"loss": 0.3682, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.4447533009034053, |
|
"grad_norm": 0.3074597716331482, |
|
"learning_rate": 0.00011448979591836735, |
|
"loss": 0.3796, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4475330090340515, |
|
"grad_norm": 0.29347458481788635, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.387, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.4503127171646977, |
|
"grad_norm": 0.2919844686985016, |
|
"learning_rate": 0.00011408163265306123, |
|
"loss": 0.3413, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.453092425295344, |
|
"grad_norm": 0.274503618478775, |
|
"learning_rate": 0.00011387755102040818, |
|
"loss": 0.3384, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.4558721334259903, |
|
"grad_norm": 0.30001312494277954, |
|
"learning_rate": 0.00011367346938775511, |
|
"loss": 0.3218, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.4586518415566365, |
|
"grad_norm": 0.2913724482059479, |
|
"learning_rate": 0.00011346938775510206, |
|
"loss": 0.3212, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.461431549687283, |
|
"grad_norm": 0.2810291051864624, |
|
"learning_rate": 0.00011326530612244898, |
|
"loss": 0.4783, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.4642112578179292, |
|
"grad_norm": 0.28154751658439636, |
|
"learning_rate": 0.00011306122448979593, |
|
"loss": 0.384, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.4669909659485754, |
|
"grad_norm": 0.3322899639606476, |
|
"learning_rate": 0.00011285714285714286, |
|
"loss": 0.345, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.4697706740792218, |
|
"grad_norm": 0.2776385247707367, |
|
"learning_rate": 0.0001126530612244898, |
|
"loss": 0.3287, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.472550382209868, |
|
"grad_norm": 0.3325115144252777, |
|
"learning_rate": 0.00011244897959183673, |
|
"loss": 0.4717, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4753300903405142, |
|
"grad_norm": 0.30507102608680725, |
|
"learning_rate": 0.00011224489795918367, |
|
"loss": 0.3773, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.4781097984711606, |
|
"grad_norm": 0.28410202264785767, |
|
"learning_rate": 0.0001120408163265306, |
|
"loss": 0.3844, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.4808895066018068, |
|
"grad_norm": 0.3436387777328491, |
|
"learning_rate": 0.00011183673469387757, |
|
"loss": 0.4171, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.483669214732453, |
|
"grad_norm": 0.2561332881450653, |
|
"learning_rate": 0.0001116326530612245, |
|
"loss": 0.3597, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.4864489228630995, |
|
"grad_norm": 0.31354856491088867, |
|
"learning_rate": 0.00011142857142857144, |
|
"loss": 0.3473, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.4892286309937457, |
|
"grad_norm": 0.26828843355178833, |
|
"learning_rate": 0.00011122448979591837, |
|
"loss": 0.2624, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.492008339124392, |
|
"grad_norm": 0.3968587815761566, |
|
"learning_rate": 0.00011102040816326532, |
|
"loss": 0.387, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.4947880472550383, |
|
"grad_norm": 0.28001683950424194, |
|
"learning_rate": 0.00011081632653061225, |
|
"loss": 0.3883, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.4975677553856845, |
|
"grad_norm": 0.2872998118400574, |
|
"learning_rate": 0.00011061224489795919, |
|
"loss": 0.3847, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.5003474635163307, |
|
"grad_norm": 0.27555397152900696, |
|
"learning_rate": 0.00011040816326530612, |
|
"loss": 0.2961, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5031271716469772, |
|
"grad_norm": 0.27156969904899597, |
|
"learning_rate": 0.00011020408163265306, |
|
"loss": 0.3722, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.5059068797776234, |
|
"grad_norm": 0.3042210638523102, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 0.4093, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.5086865879082696, |
|
"grad_norm": 0.2673455774784088, |
|
"learning_rate": 0.00010979591836734695, |
|
"loss": 0.3467, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.511466296038916, |
|
"grad_norm": 0.4710778594017029, |
|
"learning_rate": 0.00010959183673469389, |
|
"loss": 0.4335, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.5142460041695622, |
|
"grad_norm": 0.28414055705070496, |
|
"learning_rate": 0.00010938775510204082, |
|
"loss": 0.3329, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.5170257123002084, |
|
"grad_norm": 0.28005218505859375, |
|
"learning_rate": 0.00010918367346938776, |
|
"loss": 0.3585, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.5198054204308549, |
|
"grad_norm": 0.36730489134788513, |
|
"learning_rate": 0.00010897959183673469, |
|
"loss": 0.3679, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.522585128561501, |
|
"grad_norm": 0.2829911410808563, |
|
"learning_rate": 0.00010877551020408163, |
|
"loss": 0.364, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.5253648366921473, |
|
"grad_norm": 0.27309224009513855, |
|
"learning_rate": 0.00010857142857142856, |
|
"loss": 0.413, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.5281445448227937, |
|
"grad_norm": 0.25464004278182983, |
|
"learning_rate": 0.00010836734693877551, |
|
"loss": 0.3196, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.53092425295344, |
|
"grad_norm": 0.2633742094039917, |
|
"learning_rate": 0.00010816326530612246, |
|
"loss": 0.312, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.5337039610840861, |
|
"grad_norm": 0.34395769238471985, |
|
"learning_rate": 0.0001079591836734694, |
|
"loss": 0.3483, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.5364836692147326, |
|
"grad_norm": 0.27649009227752686, |
|
"learning_rate": 0.00010775510204081634, |
|
"loss": 0.4565, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.5392633773453788, |
|
"grad_norm": 0.4239721894264221, |
|
"learning_rate": 0.00010755102040816328, |
|
"loss": 0.4087, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.542043085476025, |
|
"grad_norm": 0.5541340708732605, |
|
"learning_rate": 0.00010734693877551021, |
|
"loss": 0.4309, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5448227936066714, |
|
"grad_norm": 0.2893712818622589, |
|
"learning_rate": 0.00010714285714285715, |
|
"loss": 0.3447, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.5476025017373176, |
|
"grad_norm": 0.26238903403282166, |
|
"learning_rate": 0.00010693877551020408, |
|
"loss": 0.3256, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.5503822098679638, |
|
"grad_norm": 0.29899922013282776, |
|
"learning_rate": 0.00010673469387755102, |
|
"loss": 0.3142, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.5531619179986103, |
|
"grad_norm": 0.28399357199668884, |
|
"learning_rate": 0.00010653061224489795, |
|
"loss": 0.3314, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.5559416261292565, |
|
"grad_norm": 0.3450354337692261, |
|
"learning_rate": 0.00010632653061224491, |
|
"loss": 0.3733, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.5587213342599027, |
|
"grad_norm": 0.283597856760025, |
|
"learning_rate": 0.00010612244897959185, |
|
"loss": 0.3828, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.561501042390549, |
|
"grad_norm": 0.28581732511520386, |
|
"learning_rate": 0.00010591836734693878, |
|
"loss": 0.3131, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.5642807505211953, |
|
"grad_norm": 0.2778254747390747, |
|
"learning_rate": 0.00010571428571428572, |
|
"loss": 0.4744, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.5670604586518415, |
|
"grad_norm": 0.2736669182777405, |
|
"learning_rate": 0.00010551020408163265, |
|
"loss": 0.4084, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.569840166782488, |
|
"grad_norm": 0.31702694296836853, |
|
"learning_rate": 0.0001053061224489796, |
|
"loss": 0.3877, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.5726198749131342, |
|
"grad_norm": 0.4662436246871948, |
|
"learning_rate": 0.00010510204081632653, |
|
"loss": 0.3619, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.5753995830437804, |
|
"grad_norm": 0.26047173142433167, |
|
"learning_rate": 0.00010489795918367347, |
|
"loss": 0.3031, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.5781792911744268, |
|
"grad_norm": 0.4014153778553009, |
|
"learning_rate": 0.0001046938775510204, |
|
"loss": 0.416, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.580958999305073, |
|
"grad_norm": 0.2595798969268799, |
|
"learning_rate": 0.00010448979591836735, |
|
"loss": 0.3635, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.5837387074357192, |
|
"grad_norm": 0.3327747881412506, |
|
"learning_rate": 0.0001042857142857143, |
|
"loss": 0.5032, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.5865184155663656, |
|
"grad_norm": 0.28941619396209717, |
|
"learning_rate": 0.00010408163265306123, |
|
"loss": 0.4001, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.5892981236970118, |
|
"grad_norm": 0.29737424850463867, |
|
"learning_rate": 0.00010387755102040817, |
|
"loss": 0.4263, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.592077831827658, |
|
"grad_norm": 0.3156750202178955, |
|
"learning_rate": 0.00010367346938775511, |
|
"loss": 0.326, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.5948575399583045, |
|
"grad_norm": 0.33826690912246704, |
|
"learning_rate": 0.00010346938775510204, |
|
"loss": 0.4291, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.5976372480889507, |
|
"grad_norm": 0.2737540900707245, |
|
"learning_rate": 0.00010326530612244899, |
|
"loss": 0.4464, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.600416956219597, |
|
"grad_norm": 0.3012180030345917, |
|
"learning_rate": 0.00010306122448979591, |
|
"loss": 0.4044, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.6031966643502433, |
|
"grad_norm": 0.3628576397895813, |
|
"learning_rate": 0.00010285714285714286, |
|
"loss": 0.4376, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.6059763724808895, |
|
"grad_norm": 0.3489641845226288, |
|
"learning_rate": 0.00010265306122448981, |
|
"loss": 0.3054, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.6087560806115357, |
|
"grad_norm": 0.28231537342071533, |
|
"learning_rate": 0.00010244897959183674, |
|
"loss": 0.2566, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.6115357887421822, |
|
"grad_norm": 0.2675357758998871, |
|
"learning_rate": 0.00010224489795918369, |
|
"loss": 0.3917, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.6143154968728284, |
|
"grad_norm": 0.25992777943611145, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 0.3511, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.6170952050034746, |
|
"grad_norm": 0.2999131977558136, |
|
"learning_rate": 0.00010183673469387756, |
|
"loss": 0.3135, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.619874913134121, |
|
"grad_norm": 0.2928028404712677, |
|
"learning_rate": 0.00010163265306122449, |
|
"loss": 0.34, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.6226546212647672, |
|
"grad_norm": 0.24898597598075867, |
|
"learning_rate": 0.00010142857142857143, |
|
"loss": 0.3334, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.6254343293954134, |
|
"grad_norm": 0.309908002614975, |
|
"learning_rate": 0.00010122448979591836, |
|
"loss": 0.4202, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.6282140375260599, |
|
"grad_norm": 0.2774292230606079, |
|
"learning_rate": 0.0001010204081632653, |
|
"loss": 0.3986, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.630993745656706, |
|
"grad_norm": 0.4288952648639679, |
|
"learning_rate": 0.00010081632653061226, |
|
"loss": 0.4282, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.6337734537873523, |
|
"grad_norm": 0.2605331540107727, |
|
"learning_rate": 0.00010061224489795919, |
|
"loss": 0.356, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.6365531619179987, |
|
"grad_norm": 0.2896113395690918, |
|
"learning_rate": 0.00010040816326530613, |
|
"loss": 0.3454, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.639332870048645, |
|
"grad_norm": 0.26861974596977234, |
|
"learning_rate": 0.00010020408163265306, |
|
"loss": 0.4541, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6421125781792911, |
|
"grad_norm": 0.3458561599254608, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3547, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.6448922863099376, |
|
"grad_norm": 0.287142276763916, |
|
"learning_rate": 9.979591836734695e-05, |
|
"loss": 0.3585, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.6476719944405838, |
|
"grad_norm": 0.2605501711368561, |
|
"learning_rate": 9.959183673469388e-05, |
|
"loss": 0.345, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.65045170257123, |
|
"grad_norm": 0.2797659635543823, |
|
"learning_rate": 9.938775510204082e-05, |
|
"loss": 0.3176, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.6532314107018764, |
|
"grad_norm": 0.26743918657302856, |
|
"learning_rate": 9.918367346938776e-05, |
|
"loss": 0.2928, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.6560111188325226, |
|
"grad_norm": 0.2472202330827713, |
|
"learning_rate": 9.897959183673469e-05, |
|
"loss": 0.2373, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.6587908269631688, |
|
"grad_norm": 0.26467829942703247, |
|
"learning_rate": 9.877551020408164e-05, |
|
"loss": 0.3824, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.6615705350938152, |
|
"grad_norm": 0.272390753030777, |
|
"learning_rate": 9.857142857142858e-05, |
|
"loss": 0.322, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.6643502432244615, |
|
"grad_norm": 0.2785828709602356, |
|
"learning_rate": 9.836734693877552e-05, |
|
"loss": 0.3201, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.6671299513551077, |
|
"grad_norm": 0.28154081106185913, |
|
"learning_rate": 9.816326530612245e-05, |
|
"loss": 0.3767, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6671299513551077, |
|
"eval_loss": 0.4453812837600708, |
|
"eval_runtime": 212.689, |
|
"eval_samples_per_second": 1.693, |
|
"eval_steps_per_second": 1.693, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.669909659485754, |
|
"grad_norm": 0.26373493671417236, |
|
"learning_rate": 9.79591836734694e-05, |
|
"loss": 0.3756, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.6726893676164003, |
|
"grad_norm": 0.31205254793167114, |
|
"learning_rate": 9.775510204081632e-05, |
|
"loss": 0.2962, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.6754690757470465, |
|
"grad_norm": 0.2865038812160492, |
|
"learning_rate": 9.755102040816328e-05, |
|
"loss": 0.345, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.678248783877693, |
|
"grad_norm": 0.2950930893421173, |
|
"learning_rate": 9.734693877551021e-05, |
|
"loss": 0.3678, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.6810284920083391, |
|
"grad_norm": 0.35823482275009155, |
|
"learning_rate": 9.714285714285715e-05, |
|
"loss": 0.4096, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.6838082001389854, |
|
"grad_norm": 0.2647465169429779, |
|
"learning_rate": 9.693877551020408e-05, |
|
"loss": 0.4015, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.6865879082696318, |
|
"grad_norm": 0.30505606532096863, |
|
"learning_rate": 9.673469387755102e-05, |
|
"loss": 0.4116, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.689367616400278, |
|
"grad_norm": 0.3076132833957672, |
|
"learning_rate": 9.653061224489797e-05, |
|
"loss": 0.3867, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.6921473245309242, |
|
"grad_norm": 0.2777102589607239, |
|
"learning_rate": 9.63265306122449e-05, |
|
"loss": 0.3944, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.6949270326615706, |
|
"grad_norm": 0.3163359463214874, |
|
"learning_rate": 9.612244897959184e-05, |
|
"loss": 0.4266, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.6977067407922168, |
|
"grad_norm": 0.3227400779724121, |
|
"learning_rate": 9.591836734693878e-05, |
|
"loss": 0.397, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.700486448922863, |
|
"grad_norm": 0.27346518635749817, |
|
"learning_rate": 9.571428571428573e-05, |
|
"loss": 0.3463, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.7032661570535095, |
|
"grad_norm": 0.3154374957084656, |
|
"learning_rate": 9.551020408163265e-05, |
|
"loss": 0.3921, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.7060458651841557, |
|
"grad_norm": 0.2859618365764618, |
|
"learning_rate": 9.53061224489796e-05, |
|
"loss": 0.3736, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.708825573314802, |
|
"grad_norm": 0.28541162610054016, |
|
"learning_rate": 9.510204081632653e-05, |
|
"loss": 0.3689, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.7116052814454483, |
|
"grad_norm": 0.2870318293571472, |
|
"learning_rate": 9.489795918367348e-05, |
|
"loss": 0.2594, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.7143849895760945, |
|
"grad_norm": 0.2808590531349182, |
|
"learning_rate": 9.469387755102041e-05, |
|
"loss": 0.4471, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.7171646977067407, |
|
"grad_norm": 0.2777983248233795, |
|
"learning_rate": 9.448979591836736e-05, |
|
"loss": 0.304, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.7199444058373872, |
|
"grad_norm": 0.2791382372379303, |
|
"learning_rate": 9.428571428571429e-05, |
|
"loss": 0.3117, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.7227241139680334, |
|
"grad_norm": 0.3301670253276825, |
|
"learning_rate": 9.408163265306123e-05, |
|
"loss": 0.3445, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.7255038220986796, |
|
"grad_norm": 0.2752249240875244, |
|
"learning_rate": 9.387755102040817e-05, |
|
"loss": 0.3807, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.728283530229326, |
|
"grad_norm": 0.32973968982696533, |
|
"learning_rate": 9.367346938775511e-05, |
|
"loss": 0.4078, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.7310632383599722, |
|
"grad_norm": 0.30001533031463623, |
|
"learning_rate": 9.346938775510204e-05, |
|
"loss": 0.4355, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.7338429464906184, |
|
"grad_norm": 0.255593866109848, |
|
"learning_rate": 9.326530612244899e-05, |
|
"loss": 0.3447, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.7366226546212649, |
|
"grad_norm": 0.2974906861782074, |
|
"learning_rate": 9.306122448979592e-05, |
|
"loss": 0.4654, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.739402362751911, |
|
"grad_norm": 0.3247474133968353, |
|
"learning_rate": 9.285714285714286e-05, |
|
"loss": 0.3117, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.7421820708825573, |
|
"grad_norm": 0.27801772952079773, |
|
"learning_rate": 9.26530612244898e-05, |
|
"loss": 0.4573, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.7449617790132037, |
|
"grad_norm": 0.2884966731071472, |
|
"learning_rate": 9.244897959183673e-05, |
|
"loss": 0.3575, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.74774148714385, |
|
"grad_norm": 0.27776768803596497, |
|
"learning_rate": 9.224489795918367e-05, |
|
"loss": 0.4243, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.7505211952744961, |
|
"grad_norm": 0.2852678596973419, |
|
"learning_rate": 9.204081632653062e-05, |
|
"loss": 0.4214, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7533009034051426, |
|
"grad_norm": 0.7854850888252258, |
|
"learning_rate": 9.183673469387756e-05, |
|
"loss": 0.4479, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.7560806115357888, |
|
"grad_norm": 0.3441762626171112, |
|
"learning_rate": 9.163265306122449e-05, |
|
"loss": 0.3926, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.758860319666435, |
|
"grad_norm": 0.27086740732192993, |
|
"learning_rate": 9.142857142857143e-05, |
|
"loss": 0.3389, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.7616400277970814, |
|
"grad_norm": 0.273234486579895, |
|
"learning_rate": 9.122448979591836e-05, |
|
"loss": 0.3376, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.7644197359277276, |
|
"grad_norm": 0.29030004143714905, |
|
"learning_rate": 9.102040816326532e-05, |
|
"loss": 0.3156, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.7671994440583738, |
|
"grad_norm": 0.2696784436702728, |
|
"learning_rate": 9.081632653061225e-05, |
|
"loss": 0.3006, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.7699791521890202, |
|
"grad_norm": 0.2931046485900879, |
|
"learning_rate": 9.061224489795919e-05, |
|
"loss": 0.3289, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.7727588603196665, |
|
"grad_norm": 0.2624610364437103, |
|
"learning_rate": 9.040816326530612e-05, |
|
"loss": 0.316, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.7755385684503127, |
|
"grad_norm": 0.2946118414402008, |
|
"learning_rate": 9.020408163265308e-05, |
|
"loss": 0.4373, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.778318276580959, |
|
"grad_norm": 0.32654690742492676, |
|
"learning_rate": 9e-05, |
|
"loss": 0.3699, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.7810979847116053, |
|
"grad_norm": 0.2824501693248749, |
|
"learning_rate": 8.979591836734695e-05, |
|
"loss": 0.3846, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.7838776928422515, |
|
"grad_norm": 0.3406214118003845, |
|
"learning_rate": 8.959183673469388e-05, |
|
"loss": 0.3932, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.786657400972898, |
|
"grad_norm": 0.26329147815704346, |
|
"learning_rate": 8.938775510204082e-05, |
|
"loss": 0.3152, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.7894371091035441, |
|
"grad_norm": 0.2712422311306, |
|
"learning_rate": 8.918367346938776e-05, |
|
"loss": 0.3595, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.7922168172341904, |
|
"grad_norm": 0.30618801712989807, |
|
"learning_rate": 8.89795918367347e-05, |
|
"loss": 0.3724, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.7949965253648368, |
|
"grad_norm": 0.2918386161327362, |
|
"learning_rate": 8.877551020408164e-05, |
|
"loss": 0.3537, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.797776233495483, |
|
"grad_norm": 0.27820420265197754, |
|
"learning_rate": 8.857142857142857e-05, |
|
"loss": 0.3404, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.8005559416261292, |
|
"grad_norm": 0.296987920999527, |
|
"learning_rate": 8.836734693877552e-05, |
|
"loss": 0.3644, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.8033356497567756, |
|
"grad_norm": 0.29092004895210266, |
|
"learning_rate": 8.816326530612245e-05, |
|
"loss": 0.428, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.8061153578874218, |
|
"grad_norm": 0.2843736410140991, |
|
"learning_rate": 8.79591836734694e-05, |
|
"loss": 0.3709, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.808895066018068, |
|
"grad_norm": 0.2677766978740692, |
|
"learning_rate": 8.775510204081632e-05, |
|
"loss": 0.2649, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.8116747741487145, |
|
"grad_norm": 0.2586863338947296, |
|
"learning_rate": 8.755102040816327e-05, |
|
"loss": 0.3241, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.8144544822793607, |
|
"grad_norm": 0.32978034019470215, |
|
"learning_rate": 8.734693877551021e-05, |
|
"loss": 0.3787, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.8172341904100069, |
|
"grad_norm": 0.27411404252052307, |
|
"learning_rate": 8.714285714285715e-05, |
|
"loss": 0.3447, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.8200138985406533, |
|
"grad_norm": 0.26756396889686584, |
|
"learning_rate": 8.693877551020408e-05, |
|
"loss": 0.3528, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.8227936066712995, |
|
"grad_norm": 0.30654609203338623, |
|
"learning_rate": 8.673469387755102e-05, |
|
"loss": 0.3805, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.8255733148019457, |
|
"grad_norm": 0.29328277707099915, |
|
"learning_rate": 8.653061224489797e-05, |
|
"loss": 0.3276, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.8283530229325922, |
|
"grad_norm": 0.2501872777938843, |
|
"learning_rate": 8.632653061224491e-05, |
|
"loss": 0.3368, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.8311327310632384, |
|
"grad_norm": 0.3712775707244873, |
|
"learning_rate": 8.612244897959184e-05, |
|
"loss": 0.3671, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.8339124391938846, |
|
"grad_norm": 0.24037517607212067, |
|
"learning_rate": 8.591836734693878e-05, |
|
"loss": 0.259, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.836692147324531, |
|
"grad_norm": 0.25154754519462585, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.2983, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.8394718554551772, |
|
"grad_norm": 0.2672985792160034, |
|
"learning_rate": 8.551020408163266e-05, |
|
"loss": 0.3657, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.8422515635858234, |
|
"grad_norm": 0.31283143162727356, |
|
"learning_rate": 8.53061224489796e-05, |
|
"loss": 0.3211, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.8450312717164699, |
|
"grad_norm": 0.26797670125961304, |
|
"learning_rate": 8.510204081632653e-05, |
|
"loss": 0.3665, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.847810979847116, |
|
"grad_norm": 0.2569994032382965, |
|
"learning_rate": 8.489795918367347e-05, |
|
"loss": 0.4167, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.8505906879777623, |
|
"grad_norm": 0.261764258146286, |
|
"learning_rate": 8.469387755102041e-05, |
|
"loss": 0.3976, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.8533703961084087, |
|
"grad_norm": 0.28137752413749695, |
|
"learning_rate": 8.448979591836736e-05, |
|
"loss": 0.2909, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.856150104239055, |
|
"grad_norm": 0.2962735891342163, |
|
"learning_rate": 8.428571428571429e-05, |
|
"loss": 0.3961, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.8589298123697011, |
|
"grad_norm": 0.2491084784269333, |
|
"learning_rate": 8.408163265306123e-05, |
|
"loss": 0.334, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.8617095205003475, |
|
"grad_norm": 0.30861160159111023, |
|
"learning_rate": 8.387755102040816e-05, |
|
"loss": 0.3896, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.8644892286309938, |
|
"grad_norm": 0.2767215371131897, |
|
"learning_rate": 8.367346938775511e-05, |
|
"loss": 0.457, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.86726893676164, |
|
"grad_norm": 0.2909168303012848, |
|
"learning_rate": 8.346938775510204e-05, |
|
"loss": 0.4469, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.8700486448922864, |
|
"grad_norm": 0.30907315015792847, |
|
"learning_rate": 8.326530612244899e-05, |
|
"loss": 0.4008, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.8728283530229326, |
|
"grad_norm": 0.26147618889808655, |
|
"learning_rate": 8.306122448979592e-05, |
|
"loss": 0.3456, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.8756080611535788, |
|
"grad_norm": 0.280230313539505, |
|
"learning_rate": 8.285714285714287e-05, |
|
"loss": 0.454, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.8783877692842252, |
|
"grad_norm": 0.2674858570098877, |
|
"learning_rate": 8.26530612244898e-05, |
|
"loss": 0.3247, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.8811674774148714, |
|
"grad_norm": 0.26258382201194763, |
|
"learning_rate": 8.244897959183675e-05, |
|
"loss": 0.3402, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.8839471855455177, |
|
"grad_norm": 0.2922073006629944, |
|
"learning_rate": 8.224489795918367e-05, |
|
"loss": 0.4958, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.886726893676164, |
|
"grad_norm": 0.2971295714378357, |
|
"learning_rate": 8.204081632653062e-05, |
|
"loss": 0.3341, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.8895066018068103, |
|
"grad_norm": 0.27569159865379333, |
|
"learning_rate": 8.183673469387756e-05, |
|
"loss": 0.3873, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8922863099374565, |
|
"grad_norm": 0.30602556467056274, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 0.2462, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.895066018068103, |
|
"grad_norm": 0.298258900642395, |
|
"learning_rate": 8.142857142857143e-05, |
|
"loss": 0.3863, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.8978457261987491, |
|
"grad_norm": 0.2906138598918915, |
|
"learning_rate": 8.122448979591836e-05, |
|
"loss": 0.4014, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.9006254343293953, |
|
"grad_norm": 0.2641445994377136, |
|
"learning_rate": 8.10204081632653e-05, |
|
"loss": 0.367, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.9034051424600418, |
|
"grad_norm": 0.7982620596885681, |
|
"learning_rate": 8.081632653061225e-05, |
|
"loss": 0.6423, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.906184850590688, |
|
"grad_norm": 0.26612165570259094, |
|
"learning_rate": 8.061224489795919e-05, |
|
"loss": 0.3586, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.9089645587213342, |
|
"grad_norm": 0.27895480394363403, |
|
"learning_rate": 8.040816326530612e-05, |
|
"loss": 0.3828, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.9117442668519806, |
|
"grad_norm": 0.2650473713874817, |
|
"learning_rate": 8.020408163265306e-05, |
|
"loss": 0.403, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.9145239749826268, |
|
"grad_norm": 0.2670430541038513, |
|
"learning_rate": 8e-05, |
|
"loss": 0.2993, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.917303683113273, |
|
"grad_norm": 0.27033767104148865, |
|
"learning_rate": 7.979591836734695e-05, |
|
"loss": 0.3602, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.9200833912439195, |
|
"grad_norm": 0.279803603887558, |
|
"learning_rate": 7.959183673469388e-05, |
|
"loss": 0.4034, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.9228630993745657, |
|
"grad_norm": 0.2839685082435608, |
|
"learning_rate": 7.938775510204082e-05, |
|
"loss": 0.3458, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.9256428075052119, |
|
"grad_norm": 0.2718431353569031, |
|
"learning_rate": 7.918367346938775e-05, |
|
"loss": 0.3323, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.9284225156358583, |
|
"grad_norm": 0.2734295725822449, |
|
"learning_rate": 7.897959183673471e-05, |
|
"loss": 0.2751, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.9312022237665045, |
|
"grad_norm": 0.3010067641735077, |
|
"learning_rate": 7.877551020408164e-05, |
|
"loss": 0.3508, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.9339819318971507, |
|
"grad_norm": 0.3028511703014374, |
|
"learning_rate": 7.857142857142858e-05, |
|
"loss": 0.3794, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.9367616400277972, |
|
"grad_norm": 0.26215803623199463, |
|
"learning_rate": 7.836734693877551e-05, |
|
"loss": 0.3737, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.9395413481584434, |
|
"grad_norm": 0.3025459051132202, |
|
"learning_rate": 7.816326530612245e-05, |
|
"loss": 0.44, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.9423210562890896, |
|
"grad_norm": 0.29042086005210876, |
|
"learning_rate": 7.79591836734694e-05, |
|
"loss": 0.3401, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.945100764419736, |
|
"grad_norm": 0.31652623414993286, |
|
"learning_rate": 7.775510204081632e-05, |
|
"loss": 0.4041, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9478804725503822, |
|
"grad_norm": 0.2801991403102875, |
|
"learning_rate": 7.755102040816327e-05, |
|
"loss": 0.3173, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.9506601806810284, |
|
"grad_norm": 0.30574724078178406, |
|
"learning_rate": 7.73469387755102e-05, |
|
"loss": 0.3477, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.9534398888116749, |
|
"grad_norm": 0.3101007640361786, |
|
"learning_rate": 7.714285714285715e-05, |
|
"loss": 0.3974, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.956219596942321, |
|
"grad_norm": 0.27257412672042847, |
|
"learning_rate": 7.693877551020408e-05, |
|
"loss": 0.3449, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.9589993050729673, |
|
"grad_norm": 0.2764023542404175, |
|
"learning_rate": 7.673469387755103e-05, |
|
"loss": 0.3517, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9617790132036137, |
|
"grad_norm": 0.29003384709358215, |
|
"learning_rate": 7.653061224489796e-05, |
|
"loss": 0.3886, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.96455872133426, |
|
"grad_norm": 0.4509872794151306, |
|
"learning_rate": 7.632653061224491e-05, |
|
"loss": 0.4117, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.9673384294649061, |
|
"grad_norm": 0.28539761900901794, |
|
"learning_rate": 7.612244897959184e-05, |
|
"loss": 0.3671, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.9701181375955525, |
|
"grad_norm": 0.27598991990089417, |
|
"learning_rate": 7.591836734693878e-05, |
|
"loss": 0.3327, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.9728978457261988, |
|
"grad_norm": 0.36246633529663086, |
|
"learning_rate": 7.571428571428571e-05, |
|
"loss": 0.4338, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.975677553856845, |
|
"grad_norm": 0.29050305485725403, |
|
"learning_rate": 7.551020408163266e-05, |
|
"loss": 0.5175, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.9784572619874914, |
|
"grad_norm": 0.4746512174606323, |
|
"learning_rate": 7.53061224489796e-05, |
|
"loss": 0.4983, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.9812369701181376, |
|
"grad_norm": 0.27185899019241333, |
|
"learning_rate": 7.510204081632653e-05, |
|
"loss": 0.3781, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.9840166782487838, |
|
"grad_norm": 0.28276991844177246, |
|
"learning_rate": 7.489795918367347e-05, |
|
"loss": 0.3137, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.9867963863794302, |
|
"grad_norm": 0.27965956926345825, |
|
"learning_rate": 7.469387755102041e-05, |
|
"loss": 0.3681, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.9895760945100764, |
|
"grad_norm": 0.245852991938591, |
|
"learning_rate": 7.448979591836736e-05, |
|
"loss": 0.3879, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.9923558026407227, |
|
"grad_norm": 0.28121981024742126, |
|
"learning_rate": 7.428571428571429e-05, |
|
"loss": 0.4085, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.995135510771369, |
|
"grad_norm": 0.2986759543418884, |
|
"learning_rate": 7.408163265306123e-05, |
|
"loss": 0.2768, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.9979152189020153, |
|
"grad_norm": 0.26322054862976074, |
|
"learning_rate": 7.387755102040816e-05, |
|
"loss": 0.3901, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3256114721298218, |
|
"learning_rate": 7.36734693877551e-05, |
|
"loss": 0.3634, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0027797081306464, |
|
"grad_norm": 0.27563294768333435, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 0.3962, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.0055594162612924, |
|
"grad_norm": 0.36298710107803345, |
|
"learning_rate": 7.326530612244899e-05, |
|
"loss": 0.3589, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.008339124391939, |
|
"grad_norm": 0.28008803725242615, |
|
"learning_rate": 7.306122448979592e-05, |
|
"loss": 0.3883, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.0111188325225853, |
|
"grad_norm": 0.34973540902137756, |
|
"learning_rate": 7.285714285714286e-05, |
|
"loss": 0.4629, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.0138985406532313, |
|
"grad_norm": 0.38526445627212524, |
|
"learning_rate": 7.26530612244898e-05, |
|
"loss": 0.4442, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.0166782487838777, |
|
"grad_norm": 0.2718683183193207, |
|
"learning_rate": 7.244897959183675e-05, |
|
"loss": 0.3513, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.019457956914524, |
|
"grad_norm": 0.26923590898513794, |
|
"learning_rate": 7.224489795918368e-05, |
|
"loss": 0.326, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.02223766504517, |
|
"grad_norm": 0.2851525545120239, |
|
"learning_rate": 7.204081632653062e-05, |
|
"loss": 0.2804, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.0250173731758165, |
|
"grad_norm": 0.3298304080963135, |
|
"learning_rate": 7.183673469387755e-05, |
|
"loss": 0.4877, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.027797081306463, |
|
"grad_norm": 0.304770290851593, |
|
"learning_rate": 7.163265306122449e-05, |
|
"loss": 0.3777, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.030576789437109, |
|
"grad_norm": 0.2693670094013214, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 0.3315, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.0333564975677554, |
|
"grad_norm": 0.2542417049407959, |
|
"learning_rate": 7.122448979591836e-05, |
|
"loss": 0.3493, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.036136205698402, |
|
"grad_norm": 0.2779878079891205, |
|
"learning_rate": 7.10204081632653e-05, |
|
"loss": 0.3686, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.038915913829048, |
|
"grad_norm": 0.3229525685310364, |
|
"learning_rate": 7.081632653061225e-05, |
|
"loss": 0.4557, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.0416956219596942, |
|
"grad_norm": 0.30455026030540466, |
|
"learning_rate": 7.061224489795919e-05, |
|
"loss": 0.3291, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.0444753300903407, |
|
"grad_norm": 0.26810574531555176, |
|
"learning_rate": 7.040816326530612e-05, |
|
"loss": 0.3269, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.0472550382209866, |
|
"grad_norm": 0.26769372820854187, |
|
"learning_rate": 7.020408163265306e-05, |
|
"loss": 0.3385, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.050034746351633, |
|
"grad_norm": 0.3141888380050659, |
|
"learning_rate": 7e-05, |
|
"loss": 0.3584, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.0528144544822795, |
|
"grad_norm": 0.31162917613983154, |
|
"learning_rate": 6.979591836734695e-05, |
|
"loss": 0.3418, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.0555941626129255, |
|
"grad_norm": 0.28655368089675903, |
|
"learning_rate": 6.959183673469388e-05, |
|
"loss": 0.3356, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.058373870743572, |
|
"grad_norm": 0.3098087012767792, |
|
"learning_rate": 6.938775510204082e-05, |
|
"loss": 0.4558, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.0611535788742184, |
|
"grad_norm": 0.28569385409355164, |
|
"learning_rate": 6.918367346938775e-05, |
|
"loss": 0.2827, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.0639332870048643, |
|
"grad_norm": 0.2704540491104126, |
|
"learning_rate": 6.897959183673471e-05, |
|
"loss": 0.3118, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.0667129951355108, |
|
"grad_norm": 0.2655661702156067, |
|
"learning_rate": 6.877551020408164e-05, |
|
"loss": 0.3569, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.069492703266157, |
|
"grad_norm": 0.292889267206192, |
|
"learning_rate": 6.857142857142858e-05, |
|
"loss": 0.4, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.072272411396803, |
|
"grad_norm": 0.30672675371170044, |
|
"learning_rate": 6.836734693877551e-05, |
|
"loss": 0.4197, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.0750521195274496, |
|
"grad_norm": 0.2781777083873749, |
|
"learning_rate": 6.816326530612245e-05, |
|
"loss": 0.3949, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.077831827658096, |
|
"grad_norm": 0.2681523561477661, |
|
"learning_rate": 6.79591836734694e-05, |
|
"loss": 0.3073, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.080611535788742, |
|
"grad_norm": 0.2789252996444702, |
|
"learning_rate": 6.775510204081633e-05, |
|
"loss": 0.308, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.0833912439193885, |
|
"grad_norm": 0.3158692717552185, |
|
"learning_rate": 6.755102040816327e-05, |
|
"loss": 0.4182, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.086170952050035, |
|
"grad_norm": 0.2763406038284302, |
|
"learning_rate": 6.73469387755102e-05, |
|
"loss": 0.3707, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.088950660180681, |
|
"grad_norm": 0.2762833833694458, |
|
"learning_rate": 6.714285714285714e-05, |
|
"loss": 0.3287, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.0917303683113273, |
|
"grad_norm": 0.26660582423210144, |
|
"learning_rate": 6.693877551020408e-05, |
|
"loss": 0.2388, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.0945100764419737, |
|
"grad_norm": 0.28191235661506653, |
|
"learning_rate": 6.673469387755103e-05, |
|
"loss": 0.2989, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.0972897845726197, |
|
"grad_norm": 0.29017800092697144, |
|
"learning_rate": 6.653061224489796e-05, |
|
"loss": 0.3203, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.100069492703266, |
|
"grad_norm": 0.2984425127506256, |
|
"learning_rate": 6.63265306122449e-05, |
|
"loss": 0.3803, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.1028492008339126, |
|
"grad_norm": 0.30566540360450745, |
|
"learning_rate": 6.612244897959184e-05, |
|
"loss": 0.3248, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.1056289089645586, |
|
"grad_norm": 0.2737206220626831, |
|
"learning_rate": 6.591836734693878e-05, |
|
"loss": 0.3448, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.108408617095205, |
|
"grad_norm": 0.3234175145626068, |
|
"learning_rate": 6.571428571428571e-05, |
|
"loss": 0.3773, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.1111883252258514, |
|
"grad_norm": 0.3321143090724945, |
|
"learning_rate": 6.551020408163266e-05, |
|
"loss": 0.4053, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.1139680333564974, |
|
"grad_norm": 0.3120156228542328, |
|
"learning_rate": 6.530612244897959e-05, |
|
"loss": 0.3614, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.116747741487144, |
|
"grad_norm": 0.3069126307964325, |
|
"learning_rate": 6.510204081632654e-05, |
|
"loss": 0.3392, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.1195274496177903, |
|
"grad_norm": 0.2996152937412262, |
|
"learning_rate": 6.489795918367347e-05, |
|
"loss": 0.4007, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.1223071577484363, |
|
"grad_norm": 0.28609102964401245, |
|
"learning_rate": 6.469387755102042e-05, |
|
"loss": 0.3619, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.1250868658790827, |
|
"grad_norm": 0.28463420271873474, |
|
"learning_rate": 6.448979591836734e-05, |
|
"loss": 0.3684, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.127866574009729, |
|
"grad_norm": 0.2721307873725891, |
|
"learning_rate": 6.428571428571429e-05, |
|
"loss": 0.3153, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.130646282140375, |
|
"grad_norm": 0.29114431142807007, |
|
"learning_rate": 6.408163265306123e-05, |
|
"loss": 0.3558, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.1334259902710215, |
|
"grad_norm": 0.2995449900627136, |
|
"learning_rate": 6.387755102040816e-05, |
|
"loss": 0.3632, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.136205698401668, |
|
"grad_norm": 0.28987494111061096, |
|
"learning_rate": 6.36734693877551e-05, |
|
"loss": 0.3928, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.138985406532314, |
|
"grad_norm": 0.2857901155948639, |
|
"learning_rate": 6.346938775510203e-05, |
|
"loss": 0.4101, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.1417651146629604, |
|
"grad_norm": 0.2712436020374298, |
|
"learning_rate": 6.326530612244899e-05, |
|
"loss": 0.379, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.144544822793607, |
|
"grad_norm": 0.2805217504501343, |
|
"learning_rate": 6.306122448979592e-05, |
|
"loss": 0.3018, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.147324530924253, |
|
"grad_norm": 0.29896092414855957, |
|
"learning_rate": 6.285714285714286e-05, |
|
"loss": 0.3065, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.1501042390548992, |
|
"grad_norm": 0.27301499247550964, |
|
"learning_rate": 6.265306122448979e-05, |
|
"loss": 0.3213, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.1528839471855457, |
|
"grad_norm": 0.2966340482234955, |
|
"learning_rate": 6.244897959183675e-05, |
|
"loss": 0.5333, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.1556636553161916, |
|
"grad_norm": 0.31995972990989685, |
|
"learning_rate": 6.224489795918368e-05, |
|
"loss": 0.3398, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.158443363446838, |
|
"grad_norm": 0.2765233516693115, |
|
"learning_rate": 6.204081632653062e-05, |
|
"loss": 0.3369, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.1612230715774845, |
|
"grad_norm": 0.2692531645298004, |
|
"learning_rate": 6.183673469387755e-05, |
|
"loss": 0.3347, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.1640027797081305, |
|
"grad_norm": 0.2936429977416992, |
|
"learning_rate": 6.163265306122449e-05, |
|
"loss": 0.3794, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.166782487838777, |
|
"grad_norm": 0.28063851594924927, |
|
"learning_rate": 6.142857142857143e-05, |
|
"loss": 0.295, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.1695621959694233, |
|
"grad_norm": 0.29556742310523987, |
|
"learning_rate": 6.122448979591838e-05, |
|
"loss": 0.3784, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.1723419041000693, |
|
"grad_norm": 0.2919243276119232, |
|
"learning_rate": 6.102040816326531e-05, |
|
"loss": 0.2643, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.1751216122307158, |
|
"grad_norm": 0.28946730494499207, |
|
"learning_rate": 6.081632653061224e-05, |
|
"loss": 0.3054, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.177901320361362, |
|
"grad_norm": 0.3075507581233978, |
|
"learning_rate": 6.061224489795919e-05, |
|
"loss": 0.3919, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.180681028492008, |
|
"grad_norm": 0.27501824498176575, |
|
"learning_rate": 6.040816326530613e-05, |
|
"loss": 0.3273, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.1834607366226546, |
|
"grad_norm": 0.30170246958732605, |
|
"learning_rate": 6.0204081632653065e-05, |
|
"loss": 0.3575, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.186240444753301, |
|
"grad_norm": 0.3145295977592468, |
|
"learning_rate": 6e-05, |
|
"loss": 0.3849, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.189020152883947, |
|
"grad_norm": 0.2984488308429718, |
|
"learning_rate": 5.979591836734694e-05, |
|
"loss": 0.3593, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.1917998610145935, |
|
"grad_norm": 0.27264416217803955, |
|
"learning_rate": 5.959183673469389e-05, |
|
"loss": 0.2832, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.19457956914524, |
|
"grad_norm": 0.27420729398727417, |
|
"learning_rate": 5.9387755102040824e-05, |
|
"loss": 0.362, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.197359277275886, |
|
"grad_norm": 0.31026434898376465, |
|
"learning_rate": 5.918367346938776e-05, |
|
"loss": 0.3357, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.2001389854065323, |
|
"grad_norm": 0.31472471356391907, |
|
"learning_rate": 5.8979591836734696e-05, |
|
"loss": 0.3327, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.2029186935371787, |
|
"grad_norm": 0.2863774597644806, |
|
"learning_rate": 5.877551020408164e-05, |
|
"loss": 0.3819, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.2056984016678247, |
|
"grad_norm": 0.35021165013313293, |
|
"learning_rate": 5.8571428571428575e-05, |
|
"loss": 0.3515, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.208478109798471, |
|
"grad_norm": 0.2918795347213745, |
|
"learning_rate": 5.836734693877551e-05, |
|
"loss": 0.4171, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.2112578179291176, |
|
"grad_norm": 0.2973978817462921, |
|
"learning_rate": 5.816326530612245e-05, |
|
"loss": 0.3353, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.2140375260597636, |
|
"grad_norm": 0.3020112216472626, |
|
"learning_rate": 5.7959183673469384e-05, |
|
"loss": 0.3738, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.21681723419041, |
|
"grad_norm": 0.2840433716773987, |
|
"learning_rate": 5.775510204081633e-05, |
|
"loss": 0.3097, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.2195969423210564, |
|
"grad_norm": 0.29329997301101685, |
|
"learning_rate": 5.755102040816327e-05, |
|
"loss": 0.3839, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.2223766504517024, |
|
"grad_norm": 0.29126960039138794, |
|
"learning_rate": 5.7346938775510206e-05, |
|
"loss": 0.3542, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.225156358582349, |
|
"grad_norm": 0.30318763852119446, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.3761, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.2279360667129953, |
|
"grad_norm": 0.4189499616622925, |
|
"learning_rate": 5.693877551020409e-05, |
|
"loss": 0.405, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.2307157748436413, |
|
"grad_norm": 0.2965310513973236, |
|
"learning_rate": 5.673469387755103e-05, |
|
"loss": 0.3449, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.2334954829742877, |
|
"grad_norm": 0.2907022535800934, |
|
"learning_rate": 5.6530612244897964e-05, |
|
"loss": 0.3174, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.236275191104934, |
|
"grad_norm": 0.32027149200439453, |
|
"learning_rate": 5.63265306122449e-05, |
|
"loss": 0.3595, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.23905489923558, |
|
"grad_norm": 0.33651795983314514, |
|
"learning_rate": 5.6122448979591836e-05, |
|
"loss": 0.4608, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.2418346073662265, |
|
"grad_norm": 0.3164324462413788, |
|
"learning_rate": 5.5918367346938786e-05, |
|
"loss": 0.3974, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.244614315496873, |
|
"grad_norm": 0.28678157925605774, |
|
"learning_rate": 5.571428571428572e-05, |
|
"loss": 0.3312, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.247394023627519, |
|
"grad_norm": 0.27763816714286804, |
|
"learning_rate": 5.551020408163266e-05, |
|
"loss": 0.2562, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.2501737317581654, |
|
"grad_norm": 0.28213679790496826, |
|
"learning_rate": 5.5306122448979594e-05, |
|
"loss": 0.3686, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.252953439888812, |
|
"grad_norm": 0.28878074884414673, |
|
"learning_rate": 5.510204081632653e-05, |
|
"loss": 0.3424, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.255733148019458, |
|
"grad_norm": 0.3575231432914734, |
|
"learning_rate": 5.4897959183673473e-05, |
|
"loss": 0.3903, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.258512856150104, |
|
"grad_norm": 0.3188578486442566, |
|
"learning_rate": 5.469387755102041e-05, |
|
"loss": 0.2794, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.2612925642807506, |
|
"grad_norm": 0.2995283603668213, |
|
"learning_rate": 5.4489795918367346e-05, |
|
"loss": 0.3653, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.2640722724113966, |
|
"grad_norm": 0.288529634475708, |
|
"learning_rate": 5.428571428571428e-05, |
|
"loss": 0.2245, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.266851980542043, |
|
"grad_norm": 0.29823359847068787, |
|
"learning_rate": 5.408163265306123e-05, |
|
"loss": 0.4187, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.2696316886726895, |
|
"grad_norm": 0.2879655063152313, |
|
"learning_rate": 5.387755102040817e-05, |
|
"loss": 0.2789, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.2724113968033355, |
|
"grad_norm": 0.3046426773071289, |
|
"learning_rate": 5.3673469387755104e-05, |
|
"loss": 0.3757, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.275191104933982, |
|
"grad_norm": 0.28883394598960876, |
|
"learning_rate": 5.346938775510204e-05, |
|
"loss": 0.3508, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.2779708130646283, |
|
"grad_norm": 0.2815608084201813, |
|
"learning_rate": 5.3265306122448976e-05, |
|
"loss": 0.2869, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.2807505211952743, |
|
"grad_norm": 0.27973008155822754, |
|
"learning_rate": 5.3061224489795926e-05, |
|
"loss": 0.3415, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.2835302293259208, |
|
"grad_norm": 0.3324487805366516, |
|
"learning_rate": 5.285714285714286e-05, |
|
"loss": 0.3302, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.286309937456567, |
|
"grad_norm": 0.30007997155189514, |
|
"learning_rate": 5.26530612244898e-05, |
|
"loss": 0.3831, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.289089645587213, |
|
"grad_norm": 0.3399452269077301, |
|
"learning_rate": 5.2448979591836735e-05, |
|
"loss": 0.3788, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.2918693537178596, |
|
"grad_norm": 0.28804537653923035, |
|
"learning_rate": 5.224489795918368e-05, |
|
"loss": 0.2934, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.294649061848506, |
|
"grad_norm": 0.34307217597961426, |
|
"learning_rate": 5.2040816326530614e-05, |
|
"loss": 0.3915, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.297428769979152, |
|
"grad_norm": 0.26888802647590637, |
|
"learning_rate": 5.1836734693877557e-05, |
|
"loss": 0.2455, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.3002084781097984, |
|
"grad_norm": 0.2897135615348816, |
|
"learning_rate": 5.163265306122449e-05, |
|
"loss": 0.2991, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.302988186240445, |
|
"grad_norm": 0.2886922359466553, |
|
"learning_rate": 5.142857142857143e-05, |
|
"loss": 0.3311, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.305767894371091, |
|
"grad_norm": 0.28240638971328735, |
|
"learning_rate": 5.122448979591837e-05, |
|
"loss": 0.2945, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.3085476025017373, |
|
"grad_norm": 0.29429686069488525, |
|
"learning_rate": 5.102040816326531e-05, |
|
"loss": 0.3996, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.3113273106323837, |
|
"grad_norm": 0.30782943964004517, |
|
"learning_rate": 5.0816326530612244e-05, |
|
"loss": 0.3983, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.3141070187630297, |
|
"grad_norm": 0.2880532741546631, |
|
"learning_rate": 5.061224489795918e-05, |
|
"loss": 0.4122, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.316886726893676, |
|
"grad_norm": 0.29007789492607117, |
|
"learning_rate": 5.040816326530613e-05, |
|
"loss": 0.2972, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.3196664350243226, |
|
"grad_norm": 0.31411439180374146, |
|
"learning_rate": 5.0204081632653066e-05, |
|
"loss": 0.3198, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.3224461431549686, |
|
"grad_norm": 0.2791215479373932, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2701, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.325225851285615, |
|
"grad_norm": 0.3492055833339691, |
|
"learning_rate": 4.979591836734694e-05, |
|
"loss": 0.2776, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.3280055594162614, |
|
"grad_norm": 0.2989236116409302, |
|
"learning_rate": 4.959183673469388e-05, |
|
"loss": 0.3567, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.3307852675469074, |
|
"grad_norm": 0.2757430970668793, |
|
"learning_rate": 4.938775510204082e-05, |
|
"loss": 0.311, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.333564975677554, |
|
"grad_norm": 0.27763327956199646, |
|
"learning_rate": 4.918367346938776e-05, |
|
"loss": 0.3006, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.3363446838082003, |
|
"grad_norm": 0.2845268249511719, |
|
"learning_rate": 4.89795918367347e-05, |
|
"loss": 0.3055, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.3391243919388462, |
|
"grad_norm": 0.29126036167144775, |
|
"learning_rate": 4.877551020408164e-05, |
|
"loss": 0.3033, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.3419041000694927, |
|
"grad_norm": 0.29777050018310547, |
|
"learning_rate": 4.8571428571428576e-05, |
|
"loss": 0.2959, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.344683808200139, |
|
"grad_norm": 0.27072009444236755, |
|
"learning_rate": 4.836734693877551e-05, |
|
"loss": 0.3122, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.347463516330785, |
|
"grad_norm": 0.31056344509124756, |
|
"learning_rate": 4.816326530612245e-05, |
|
"loss": 0.3475, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.3502432244614315, |
|
"grad_norm": 0.2831839323043823, |
|
"learning_rate": 4.795918367346939e-05, |
|
"loss": 0.3048, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.353022932592078, |
|
"grad_norm": 0.2710871398448944, |
|
"learning_rate": 4.775510204081633e-05, |
|
"loss": 0.3236, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.355802640722724, |
|
"grad_norm": 0.2876143753528595, |
|
"learning_rate": 4.7551020408163263e-05, |
|
"loss": 0.3534, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.3585823488533704, |
|
"grad_norm": 0.2987620532512665, |
|
"learning_rate": 4.7346938775510206e-05, |
|
"loss": 0.3324, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.361362056984017, |
|
"grad_norm": 0.2859969735145569, |
|
"learning_rate": 4.714285714285714e-05, |
|
"loss": 0.3564, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.364141765114663, |
|
"grad_norm": 0.3041338324546814, |
|
"learning_rate": 4.6938775510204086e-05, |
|
"loss": 0.4652, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.366921473245309, |
|
"grad_norm": 0.28285476565361023, |
|
"learning_rate": 4.673469387755102e-05, |
|
"loss": 0.3897, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.3697011813759556, |
|
"grad_norm": 0.307158499956131, |
|
"learning_rate": 4.653061224489796e-05, |
|
"loss": 0.3194, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.3724808895066016, |
|
"grad_norm": 0.31216737627983093, |
|
"learning_rate": 4.63265306122449e-05, |
|
"loss": 0.3554, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.375260597637248, |
|
"grad_norm": 0.3206304907798767, |
|
"learning_rate": 4.612244897959184e-05, |
|
"loss": 0.4214, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.3780403057678945, |
|
"grad_norm": 0.29540154337882996, |
|
"learning_rate": 4.591836734693878e-05, |
|
"loss": 0.3425, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.3808200138985405, |
|
"grad_norm": 0.2654211223125458, |
|
"learning_rate": 4.5714285714285716e-05, |
|
"loss": 0.2876, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.383599722029187, |
|
"grad_norm": 0.2998722195625305, |
|
"learning_rate": 4.551020408163266e-05, |
|
"loss": 0.3234, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.3863794301598333, |
|
"grad_norm": 0.305711030960083, |
|
"learning_rate": 4.5306122448979595e-05, |
|
"loss": 0.3229, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.3891591382904793, |
|
"grad_norm": 0.29556146264076233, |
|
"learning_rate": 4.510204081632654e-05, |
|
"loss": 0.3013, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.3919388464211258, |
|
"grad_norm": 0.2814411222934723, |
|
"learning_rate": 4.4897959183673474e-05, |
|
"loss": 0.2751, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.394718554551772, |
|
"grad_norm": 0.29568272829055786, |
|
"learning_rate": 4.469387755102041e-05, |
|
"loss": 0.3107, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.397498262682418, |
|
"grad_norm": 0.28841209411621094, |
|
"learning_rate": 4.448979591836735e-05, |
|
"loss": 0.3088, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.4002779708130646, |
|
"grad_norm": 0.3105250298976898, |
|
"learning_rate": 4.428571428571428e-05, |
|
"loss": 0.3483, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.403057678943711, |
|
"grad_norm": 0.287725567817688, |
|
"learning_rate": 4.4081632653061226e-05, |
|
"loss": 0.3556, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.405837387074357, |
|
"grad_norm": 0.33084672689437866, |
|
"learning_rate": 4.387755102040816e-05, |
|
"loss": 0.4709, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.4086170952050034, |
|
"grad_norm": 0.3187430799007416, |
|
"learning_rate": 4.3673469387755105e-05, |
|
"loss": 0.3438, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.41139680333565, |
|
"grad_norm": 0.29251527786254883, |
|
"learning_rate": 4.346938775510204e-05, |
|
"loss": 0.4519, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.414176511466296, |
|
"grad_norm": 0.27169179916381836, |
|
"learning_rate": 4.3265306122448984e-05, |
|
"loss": 0.3039, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.4169562195969423, |
|
"grad_norm": 0.2867499589920044, |
|
"learning_rate": 4.306122448979592e-05, |
|
"loss": 0.3444, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.4197359277275887, |
|
"grad_norm": 0.33954930305480957, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.4085, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.4225156358582347, |
|
"grad_norm": 0.31500715017318726, |
|
"learning_rate": 4.26530612244898e-05, |
|
"loss": 0.3574, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.425295343988881, |
|
"grad_norm": 0.31862321496009827, |
|
"learning_rate": 4.2448979591836735e-05, |
|
"loss": 0.361, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.4280750521195276, |
|
"grad_norm": 0.28617167472839355, |
|
"learning_rate": 4.224489795918368e-05, |
|
"loss": 0.362, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.4308547602501736, |
|
"grad_norm": 0.3150579333305359, |
|
"learning_rate": 4.2040816326530615e-05, |
|
"loss": 0.281, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.43363446838082, |
|
"grad_norm": 0.28917694091796875, |
|
"learning_rate": 4.183673469387756e-05, |
|
"loss": 0.3215, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.4364141765114664, |
|
"grad_norm": 0.29830271005630493, |
|
"learning_rate": 4.1632653061224494e-05, |
|
"loss": 0.2891, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.4391938846421124, |
|
"grad_norm": 0.30335041880607605, |
|
"learning_rate": 4.1428571428571437e-05, |
|
"loss": 0.3507, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.441973592772759, |
|
"grad_norm": 0.2745303213596344, |
|
"learning_rate": 4.122448979591837e-05, |
|
"loss": 0.366, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.4447533009034053, |
|
"grad_norm": 0.26936864852905273, |
|
"learning_rate": 4.102040816326531e-05, |
|
"loss": 0.3247, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.4475330090340512, |
|
"grad_norm": 0.3225851058959961, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 0.3757, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.4503127171646977, |
|
"grad_norm": 0.28156349062919617, |
|
"learning_rate": 4.061224489795918e-05, |
|
"loss": 0.219, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.453092425295344, |
|
"grad_norm": 0.46358248591423035, |
|
"learning_rate": 4.0408163265306124e-05, |
|
"loss": 0.3567, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.45587213342599, |
|
"grad_norm": 0.2787911295890808, |
|
"learning_rate": 4.020408163265306e-05, |
|
"loss": 0.275, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.4586518415566365, |
|
"grad_norm": 0.3119618892669678, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3376, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.461431549687283, |
|
"grad_norm": 0.2867750823497772, |
|
"learning_rate": 3.979591836734694e-05, |
|
"loss": 0.2792, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.464211257817929, |
|
"grad_norm": 0.29150545597076416, |
|
"learning_rate": 3.9591836734693876e-05, |
|
"loss": 0.3543, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.4669909659485754, |
|
"grad_norm": 0.28618574142456055, |
|
"learning_rate": 3.938775510204082e-05, |
|
"loss": 0.3556, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.469770674079222, |
|
"grad_norm": 0.2973514199256897, |
|
"learning_rate": 3.9183673469387755e-05, |
|
"loss": 0.3071, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.472550382209868, |
|
"grad_norm": 0.36355265974998474, |
|
"learning_rate": 3.89795918367347e-05, |
|
"loss": 0.37, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.475330090340514, |
|
"grad_norm": 0.28134381771087646, |
|
"learning_rate": 3.8775510204081634e-05, |
|
"loss": 0.3189, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.4781097984711606, |
|
"grad_norm": 0.29104146361351013, |
|
"learning_rate": 3.857142857142858e-05, |
|
"loss": 0.3467, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.4808895066018066, |
|
"grad_norm": 0.3080955743789673, |
|
"learning_rate": 3.836734693877551e-05, |
|
"loss": 0.439, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.483669214732453, |
|
"grad_norm": 0.3061097264289856, |
|
"learning_rate": 3.8163265306122456e-05, |
|
"loss": 0.2978, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.4864489228630995, |
|
"grad_norm": 0.30751508474349976, |
|
"learning_rate": 3.795918367346939e-05, |
|
"loss": 0.3819, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.4892286309937455, |
|
"grad_norm": 0.2971366047859192, |
|
"learning_rate": 3.775510204081633e-05, |
|
"loss": 0.2825, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.492008339124392, |
|
"grad_norm": 0.29852065443992615, |
|
"learning_rate": 3.7551020408163264e-05, |
|
"loss": 0.341, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.4947880472550383, |
|
"grad_norm": 0.28704988956451416, |
|
"learning_rate": 3.734693877551021e-05, |
|
"loss": 0.3001, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.4975677553856843, |
|
"grad_norm": 0.3021228015422821, |
|
"learning_rate": 3.7142857142857143e-05, |
|
"loss": 0.3204, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.5003474635163307, |
|
"grad_norm": 0.30266740918159485, |
|
"learning_rate": 3.693877551020408e-05, |
|
"loss": 0.299, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5003474635163307, |
|
"eval_loss": 0.4358259439468384, |
|
"eval_runtime": 212.5847, |
|
"eval_samples_per_second": 1.693, |
|
"eval_steps_per_second": 1.693, |
|
"step": 900 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1080, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8347978086418432e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|