|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.4995098039215686, |
|
"eval_steps": 500, |
|
"global_step": 5099, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004901960784313725, |
|
"grad_norm": 4.5476274490356445, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.6369, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00980392156862745, |
|
"grad_norm": 2.52302885055542, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.484, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014705882352941176, |
|
"grad_norm": 3.1443543434143066, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.3252, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0196078431372549, |
|
"grad_norm": 2.1440389156341553, |
|
"learning_rate": 3.137254901960784e-05, |
|
"loss": 0.2779, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.024509803921568627, |
|
"grad_norm": 1.7569645643234253, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 0.2387, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029411764705882353, |
|
"grad_norm": 1.7137173414230347, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.2146, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03431372549019608, |
|
"grad_norm": 1.0686582326889038, |
|
"learning_rate": 5.490196078431373e-05, |
|
"loss": 0.1638, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0392156862745098, |
|
"grad_norm": 1.834192156791687, |
|
"learning_rate": 6.274509803921569e-05, |
|
"loss": 0.1594, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04411764705882353, |
|
"grad_norm": 2.2320666313171387, |
|
"learning_rate": 7.058823529411765e-05, |
|
"loss": 0.156, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.049019607843137254, |
|
"grad_norm": 1.2987866401672363, |
|
"learning_rate": 7.843137254901961e-05, |
|
"loss": 0.1447, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05392156862745098, |
|
"grad_norm": 1.5711545944213867, |
|
"learning_rate": 8.627450980392158e-05, |
|
"loss": 0.1449, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 0.8892576098442078, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 0.1513, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06372549019607843, |
|
"grad_norm": 1.1401337385177612, |
|
"learning_rate": 0.00010196078431372549, |
|
"loss": 0.1128, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06862745098039216, |
|
"grad_norm": 1.375543475151062, |
|
"learning_rate": 0.00010980392156862746, |
|
"loss": 0.1186, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07352941176470588, |
|
"grad_norm": 1.6076676845550537, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 0.1396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0784313725490196, |
|
"grad_norm": 1.2637161016464233, |
|
"learning_rate": 0.00012549019607843137, |
|
"loss": 0.1187, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.5812987685203552, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.1155, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08823529411764706, |
|
"grad_norm": 0.7302483916282654, |
|
"learning_rate": 0.0001411764705882353, |
|
"loss": 0.1068, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09313725490196079, |
|
"grad_norm": 0.6003187894821167, |
|
"learning_rate": 0.00014901960784313728, |
|
"loss": 0.1084, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09803921568627451, |
|
"grad_norm": 1.3157514333724976, |
|
"learning_rate": 0.00015686274509803922, |
|
"loss": 0.1127, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10294117647058823, |
|
"grad_norm": 0.8480639457702637, |
|
"learning_rate": 0.0001647058823529412, |
|
"loss": 0.1025, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.10784313725490197, |
|
"grad_norm": 1.0640238523483276, |
|
"learning_rate": 0.00017254901960784316, |
|
"loss": 0.1214, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11274509803921569, |
|
"grad_norm": 0.7853420972824097, |
|
"learning_rate": 0.0001803921568627451, |
|
"loss": 0.1103, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 0.760675847530365, |
|
"learning_rate": 0.00018823529411764707, |
|
"loss": 0.1071, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12254901960784313, |
|
"grad_norm": 1.1404098272323608, |
|
"learning_rate": 0.000196078431372549, |
|
"loss": 0.1004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12745098039215685, |
|
"grad_norm": 0.6359620690345764, |
|
"learning_rate": 0.0001999994742235753, |
|
"loss": 0.1065, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1323529411764706, |
|
"grad_norm": 0.7933241724967957, |
|
"learning_rate": 0.00019999526804535039, |
|
"loss": 0.0943, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13725490196078433, |
|
"grad_norm": 1.599077582359314, |
|
"learning_rate": 0.00019998685586582082, |
|
"loss": 0.1304, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14215686274509803, |
|
"grad_norm": 0.8844221234321594, |
|
"learning_rate": 0.00019997423803881975, |
|
"loss": 0.0917, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14705882352941177, |
|
"grad_norm": 1.2456647157669067, |
|
"learning_rate": 0.00019995741509507825, |
|
"loss": 0.111, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15196078431372548, |
|
"grad_norm": 0.6590626239776611, |
|
"learning_rate": 0.00019993638774220307, |
|
"loss": 0.1022, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1568627450980392, |
|
"grad_norm": 0.7061448693275452, |
|
"learning_rate": 0.00019991115686464675, |
|
"loss": 0.0938, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16176470588235295, |
|
"grad_norm": 1.0512727499008179, |
|
"learning_rate": 0.00019988172352367056, |
|
"loss": 0.1059, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.6884363889694214, |
|
"learning_rate": 0.00019984808895729978, |
|
"loss": 0.0801, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1715686274509804, |
|
"grad_norm": 0.8961064219474792, |
|
"learning_rate": 0.00019981025458027169, |
|
"loss": 0.0872, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 0.7410668730735779, |
|
"learning_rate": 0.00019976822198397595, |
|
"loss": 0.0935, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.18137254901960784, |
|
"grad_norm": 0.8089532256126404, |
|
"learning_rate": 0.00019972199293638777, |
|
"loss": 0.0806, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18627450980392157, |
|
"grad_norm": 0.6644020676612854, |
|
"learning_rate": 0.00019967156938199355, |
|
"loss": 0.0885, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.19117647058823528, |
|
"grad_norm": 0.8422799110412598, |
|
"learning_rate": 0.00019961695344170895, |
|
"loss": 0.0952, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 0.8162615299224854, |
|
"learning_rate": 0.00019955814741278986, |
|
"loss": 0.0802, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20098039215686275, |
|
"grad_norm": 0.7302709221839905, |
|
"learning_rate": 0.0001994951537687357, |
|
"loss": 0.0884, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.20588235294117646, |
|
"grad_norm": 0.7032344937324524, |
|
"learning_rate": 0.00019942797515918527, |
|
"loss": 0.0896, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2107843137254902, |
|
"grad_norm": 0.8042428493499756, |
|
"learning_rate": 0.00019935661440980554, |
|
"loss": 0.0811, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.21568627450980393, |
|
"grad_norm": 0.6656658053398132, |
|
"learning_rate": 0.00019928107452217255, |
|
"loss": 0.0856, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.22058823529411764, |
|
"grad_norm": 1.2202825546264648, |
|
"learning_rate": 0.00019920135867364534, |
|
"loss": 0.0895, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22549019607843138, |
|
"grad_norm": 0.8210168480873108, |
|
"learning_rate": 0.00019911747021723216, |
|
"loss": 0.0807, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23039215686274508, |
|
"grad_norm": 0.7217456102371216, |
|
"learning_rate": 0.0001990294126814496, |
|
"loss": 0.0814, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 0.743126392364502, |
|
"learning_rate": 0.00019893718977017402, |
|
"loss": 0.0887, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.24019607843137256, |
|
"grad_norm": 0.7190248370170593, |
|
"learning_rate": 0.00019884080536248578, |
|
"loss": 0.0859, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.24509803921568626, |
|
"grad_norm": 0.5253967046737671, |
|
"learning_rate": 0.00019874026351250623, |
|
"loss": 0.0678, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.8271141052246094, |
|
"learning_rate": 0.00019863556844922696, |
|
"loss": 0.0762, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2549019607843137, |
|
"grad_norm": 0.7656545639038086, |
|
"learning_rate": 0.0001985267245763321, |
|
"loss": 0.0724, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.25980392156862747, |
|
"grad_norm": 0.6673869490623474, |
|
"learning_rate": 0.00019841373647201297, |
|
"loss": 0.0817, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2647058823529412, |
|
"grad_norm": 0.880395770072937, |
|
"learning_rate": 0.00019829660888877565, |
|
"loss": 0.0897, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2696078431372549, |
|
"grad_norm": 0.7278539538383484, |
|
"learning_rate": 0.00019817534675324093, |
|
"loss": 0.0808, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.27450980392156865, |
|
"grad_norm": 0.5380986928939819, |
|
"learning_rate": 0.00019804995516593712, |
|
"loss": 0.077, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.27941176470588236, |
|
"grad_norm": 0.9306485652923584, |
|
"learning_rate": 0.00019792043940108564, |
|
"loss": 0.0883, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.28431372549019607, |
|
"grad_norm": 0.9304268956184387, |
|
"learning_rate": 0.00019778680490637902, |
|
"loss": 0.0899, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.28921568627450983, |
|
"grad_norm": 0.7331838607788086, |
|
"learning_rate": 0.00019764905730275184, |
|
"loss": 0.0709, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 0.47580260038375854, |
|
"learning_rate": 0.00019750720238414425, |
|
"loss": 0.0857, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29901960784313725, |
|
"grad_norm": 0.5752102732658386, |
|
"learning_rate": 0.0001973612461172583, |
|
"loss": 0.0838, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.30392156862745096, |
|
"grad_norm": 0.4644894599914551, |
|
"learning_rate": 0.00019721119464130707, |
|
"loss": 0.0851, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3088235294117647, |
|
"grad_norm": 0.7036497592926025, |
|
"learning_rate": 0.00019705705426775616, |
|
"loss": 0.0741, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3137254901960784, |
|
"grad_norm": 0.499897301197052, |
|
"learning_rate": 0.0001968988314800585, |
|
"loss": 0.0718, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.31862745098039214, |
|
"grad_norm": 0.7211794853210449, |
|
"learning_rate": 0.0001967365329333816, |
|
"loss": 0.0798, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3235294117647059, |
|
"grad_norm": 0.6176502108573914, |
|
"learning_rate": 0.0001965701654543274, |
|
"loss": 0.0695, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3284313725490196, |
|
"grad_norm": 0.7395395636558533, |
|
"learning_rate": 0.0001963997360406454, |
|
"loss": 0.0581, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.5304160714149475, |
|
"learning_rate": 0.00019622525186093818, |
|
"loss": 0.0826, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3382352941176471, |
|
"grad_norm": 0.46235349774360657, |
|
"learning_rate": 0.0001960467202543599, |
|
"loss": 0.056, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3431372549019608, |
|
"grad_norm": 0.5242049098014832, |
|
"learning_rate": 0.00019586414873030758, |
|
"loss": 0.0728, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3480392156862745, |
|
"grad_norm": 0.552486777305603, |
|
"learning_rate": 0.00019567754496810534, |
|
"loss": 0.0806, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 0.5002785325050354, |
|
"learning_rate": 0.0001954869168166812, |
|
"loss": 0.0643, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.35784313725490197, |
|
"grad_norm": 0.47353097796440125, |
|
"learning_rate": 0.00019529227229423717, |
|
"loss": 0.0838, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3627450980392157, |
|
"grad_norm": 0.4200286865234375, |
|
"learning_rate": 0.00019509361958791174, |
|
"loss": 0.0776, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.36764705882352944, |
|
"grad_norm": 0.6603316068649292, |
|
"learning_rate": 0.00019489096705343578, |
|
"loss": 0.0705, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.37254901960784315, |
|
"grad_norm": 0.37562692165374756, |
|
"learning_rate": 0.0001946843232147809, |
|
"loss": 0.072, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.37745098039215685, |
|
"grad_norm": 0.6199838519096375, |
|
"learning_rate": 0.0001944736967638009, |
|
"loss": 0.0649, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.38235294117647056, |
|
"grad_norm": 0.7614375948905945, |
|
"learning_rate": 0.0001942590965598663, |
|
"loss": 0.0735, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3872549019607843, |
|
"grad_norm": 0.671489953994751, |
|
"learning_rate": 0.00019404053162949155, |
|
"loss": 0.065, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 0.5170246362686157, |
|
"learning_rate": 0.0001938180111659556, |
|
"loss": 0.078, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.39705882352941174, |
|
"grad_norm": 0.5392031073570251, |
|
"learning_rate": 0.00019359154452891483, |
|
"loss": 0.063, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4019607843137255, |
|
"grad_norm": 0.6858069896697998, |
|
"learning_rate": 0.00019336114124400978, |
|
"loss": 0.0783, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4068627450980392, |
|
"grad_norm": 0.7257099151611328, |
|
"learning_rate": 0.0001931268110024642, |
|
"loss": 0.0798, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4117647058823529, |
|
"grad_norm": 0.7296270132064819, |
|
"learning_rate": 0.00019288856366067746, |
|
"loss": 0.0619, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.6048017740249634, |
|
"learning_rate": 0.0001926464092398101, |
|
"loss": 0.0634, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4215686274509804, |
|
"grad_norm": 0.3223126232624054, |
|
"learning_rate": 0.00019240035792536216, |
|
"loss": 0.0755, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4264705882352941, |
|
"grad_norm": 0.45046138763427734, |
|
"learning_rate": 0.0001921504200667449, |
|
"loss": 0.0661, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.43137254901960786, |
|
"grad_norm": 0.609027624130249, |
|
"learning_rate": 0.00019189660617684537, |
|
"loss": 0.0711, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4362745098039216, |
|
"grad_norm": 0.4166688323020935, |
|
"learning_rate": 0.00019163892693158425, |
|
"loss": 0.0644, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 0.6250641345977783, |
|
"learning_rate": 0.00019137739316946685, |
|
"loss": 0.0674, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.44607843137254904, |
|
"grad_norm": 0.6781248450279236, |
|
"learning_rate": 0.00019111201589112718, |
|
"loss": 0.0657, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.45098039215686275, |
|
"grad_norm": 0.9098891615867615, |
|
"learning_rate": 0.00019084280625886516, |
|
"loss": 0.0765, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.45588235294117646, |
|
"grad_norm": 0.5926252603530884, |
|
"learning_rate": 0.00019056977559617731, |
|
"loss": 0.0896, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.46078431372549017, |
|
"grad_norm": 0.6467915773391724, |
|
"learning_rate": 0.0001902929353872803, |
|
"loss": 0.0595, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.46568627450980393, |
|
"grad_norm": 0.4950433671474457, |
|
"learning_rate": 0.0001900122972766279, |
|
"loss": 0.0651, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 0.6317784190177917, |
|
"learning_rate": 0.0001897278730684213, |
|
"loss": 0.08, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.47549019607843135, |
|
"grad_norm": 0.47558578848838806, |
|
"learning_rate": 0.0001894396747261125, |
|
"loss": 0.0622, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4803921568627451, |
|
"grad_norm": 0.5610472559928894, |
|
"learning_rate": 0.0001891477143719012, |
|
"loss": 0.0667, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4852941176470588, |
|
"grad_norm": 0.7227151989936829, |
|
"learning_rate": 0.00018885200428622474, |
|
"loss": 0.0648, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 0.49453797936439514, |
|
"learning_rate": 0.0001885525569072418, |
|
"loss": 0.0663, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4950980392156863, |
|
"grad_norm": 0.4297734200954437, |
|
"learning_rate": 0.000188249384830309, |
|
"loss": 0.0779, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.39416739344596863, |
|
"learning_rate": 0.00018794250080745136, |
|
"loss": 0.0577, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5049019607843137, |
|
"grad_norm": 0.6955050230026245, |
|
"learning_rate": 0.0001876319177468256, |
|
"loss": 0.0579, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5098039215686274, |
|
"grad_norm": 0.5533928871154785, |
|
"learning_rate": 0.00018731764871217753, |
|
"loss": 0.0583, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5147058823529411, |
|
"grad_norm": 0.4718644618988037, |
|
"learning_rate": 0.00018699970692229233, |
|
"loss": 0.0609, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5196078431372549, |
|
"grad_norm": 0.39921796321868896, |
|
"learning_rate": 0.00018667810575043864, |
|
"loss": 0.0612, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5245098039215687, |
|
"grad_norm": 0.34913963079452515, |
|
"learning_rate": 0.0001863528587238061, |
|
"loss": 0.0522, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 0.5829554796218872, |
|
"learning_rate": 0.00018602397952293618, |
|
"loss": 0.0651, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5343137254901961, |
|
"grad_norm": 0.7142338156700134, |
|
"learning_rate": 0.00018569148198114695, |
|
"loss": 0.0643, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5392156862745098, |
|
"grad_norm": 0.24581728875637054, |
|
"learning_rate": 0.00018535538008395124, |
|
"loss": 0.0537, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5441176470588235, |
|
"grad_norm": 0.41139382123947144, |
|
"learning_rate": 0.0001850156879684681, |
|
"loss": 0.0631, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5490196078431373, |
|
"grad_norm": 0.4532317519187927, |
|
"learning_rate": 0.00018467241992282843, |
|
"loss": 0.0573, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.553921568627451, |
|
"grad_norm": 0.45865532755851746, |
|
"learning_rate": 0.00018432559038557397, |
|
"loss": 0.053, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5588235294117647, |
|
"grad_norm": 0.3976840078830719, |
|
"learning_rate": 0.00018397521394504995, |
|
"loss": 0.0529, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5637254901960784, |
|
"grad_norm": 0.47105035185813904, |
|
"learning_rate": 0.00018362130533879133, |
|
"loss": 0.0671, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5686274509803921, |
|
"grad_norm": 0.5433268547058105, |
|
"learning_rate": 0.00018326387945290313, |
|
"loss": 0.0529, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5735294117647058, |
|
"grad_norm": 0.6220820546150208, |
|
"learning_rate": 0.00018290295132143415, |
|
"loss": 0.0697, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5784313725490197, |
|
"grad_norm": 0.38451075553894043, |
|
"learning_rate": 0.00018253853612574473, |
|
"loss": 0.0621, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 0.49342110753059387, |
|
"learning_rate": 0.00018217064919386807, |
|
"loss": 0.0603, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.5240128636360168, |
|
"learning_rate": 0.00018179930599986554, |
|
"loss": 0.0614, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5931372549019608, |
|
"grad_norm": 0.5015797019004822, |
|
"learning_rate": 0.0001814245221631758, |
|
"loss": 0.0651, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5980392156862745, |
|
"grad_norm": 0.7029892802238464, |
|
"learning_rate": 0.0001810463134479579, |
|
"loss": 0.0598, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6029411764705882, |
|
"grad_norm": 0.3569225072860718, |
|
"learning_rate": 0.00018066469576242806, |
|
"loss": 0.0479, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6078431372549019, |
|
"grad_norm": 0.4940333068370819, |
|
"learning_rate": 0.00018027968515819072, |
|
"loss": 0.055, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6127450980392157, |
|
"grad_norm": 0.5233299732208252, |
|
"learning_rate": 0.00017989129782956323, |
|
"loss": 0.0555, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6176470588235294, |
|
"grad_norm": 0.35107848048210144, |
|
"learning_rate": 0.00017949955011289465, |
|
"loss": 0.0472, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6225490196078431, |
|
"grad_norm": 0.42003870010375977, |
|
"learning_rate": 0.00017910445848587885, |
|
"loss": 0.0454, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": 0.24233393371105194, |
|
"learning_rate": 0.00017870603956686117, |
|
"loss": 0.0631, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6323529411764706, |
|
"grad_norm": 0.5557372570037842, |
|
"learning_rate": 0.0001783043101141395, |
|
"loss": 0.0628, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6372549019607843, |
|
"grad_norm": 0.33980950713157654, |
|
"learning_rate": 0.00017789928702525952, |
|
"loss": 0.0591, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6421568627450981, |
|
"grad_norm": 0.2716699242591858, |
|
"learning_rate": 0.00017749098733630368, |
|
"loss": 0.0584, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6470588235294118, |
|
"grad_norm": 0.42181700468063354, |
|
"learning_rate": 0.00017707942822117495, |
|
"loss": 0.0572, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6519607843137255, |
|
"grad_norm": 0.46250826120376587, |
|
"learning_rate": 0.00017666462699087422, |
|
"loss": 0.0614, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6568627450980392, |
|
"grad_norm": 0.7147281169891357, |
|
"learning_rate": 0.00017624660109277223, |
|
"loss": 0.0666, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6617647058823529, |
|
"grad_norm": 0.6105577945709229, |
|
"learning_rate": 0.00017582536810987576, |
|
"loss": 0.0508, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.5564696788787842, |
|
"learning_rate": 0.00017540094576008796, |
|
"loss": 0.0581, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6715686274509803, |
|
"grad_norm": 0.4955359399318695, |
|
"learning_rate": 0.00017497335189546308, |
|
"loss": 0.0569, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6764705882352942, |
|
"grad_norm": 0.40812528133392334, |
|
"learning_rate": 0.0001745426045014558, |
|
"loss": 0.065, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6813725490196079, |
|
"grad_norm": 0.31670013070106506, |
|
"learning_rate": 0.00017410872169616447, |
|
"loss": 0.0632, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6862745098039216, |
|
"grad_norm": 0.5076479911804199, |
|
"learning_rate": 0.00017367172172956906, |
|
"loss": 0.0558, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6911764705882353, |
|
"grad_norm": 0.5511890053749084, |
|
"learning_rate": 0.0001732316229827637, |
|
"loss": 0.0669, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.696078431372549, |
|
"grad_norm": 0.32073989510536194, |
|
"learning_rate": 0.00017278844396718336, |
|
"loss": 0.0543, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7009803921568627, |
|
"grad_norm": 0.5955519080162048, |
|
"learning_rate": 0.00017234220332382528, |
|
"loss": 0.0594, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 0.5735410451889038, |
|
"learning_rate": 0.00017189291982246493, |
|
"loss": 0.0498, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7107843137254902, |
|
"grad_norm": 0.4336249530315399, |
|
"learning_rate": 0.0001714406123608665, |
|
"loss": 0.0577, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7156862745098039, |
|
"grad_norm": 0.2760525047779083, |
|
"learning_rate": 0.00017098529996398796, |
|
"loss": 0.05, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7205882352941176, |
|
"grad_norm": 0.466795951128006, |
|
"learning_rate": 0.00017052700178318088, |
|
"loss": 0.0435, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7254901960784313, |
|
"grad_norm": 0.2780659794807434, |
|
"learning_rate": 0.00017006573709538492, |
|
"loss": 0.0516, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7303921568627451, |
|
"grad_norm": 0.31002551317214966, |
|
"learning_rate": 0.00016960152530231696, |
|
"loss": 0.0494, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 0.4070112407207489, |
|
"learning_rate": 0.00016913438592965497, |
|
"loss": 0.0594, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7401960784313726, |
|
"grad_norm": 0.5858240127563477, |
|
"learning_rate": 0.00016866433862621692, |
|
"loss": 0.0421, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7450980392156863, |
|
"grad_norm": 0.2468300759792328, |
|
"learning_rate": 0.00016819140316313397, |
|
"loss": 0.0499, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.4881417155265808, |
|
"learning_rate": 0.00016771559943301926, |
|
"loss": 0.0557, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7549019607843137, |
|
"grad_norm": 0.428586483001709, |
|
"learning_rate": 0.00016723694744913087, |
|
"loss": 0.0547, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7598039215686274, |
|
"grad_norm": 0.4626677334308624, |
|
"learning_rate": 0.0001667554673445302, |
|
"loss": 0.0577, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7647058823529411, |
|
"grad_norm": 0.44555196166038513, |
|
"learning_rate": 0.000166271179371235, |
|
"loss": 0.0498, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7696078431372549, |
|
"grad_norm": 0.4426194131374359, |
|
"learning_rate": 0.0001657841038993677, |
|
"loss": 0.0491, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7745098039215687, |
|
"grad_norm": 0.3198953866958618, |
|
"learning_rate": 0.00016529426141629843, |
|
"loss": 0.0472, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7794117647058824, |
|
"grad_norm": 0.2759292423725128, |
|
"learning_rate": 0.0001648016725257834, |
|
"loss": 0.0508, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 0.33216890692710876, |
|
"learning_rate": 0.00016430635794709817, |
|
"loss": 0.0516, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7892156862745098, |
|
"grad_norm": 0.40735071897506714, |
|
"learning_rate": 0.0001638083385141662, |
|
"loss": 0.0549, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7941176470588235, |
|
"grad_norm": 0.3739156424999237, |
|
"learning_rate": 0.0001633076351746827, |
|
"loss": 0.0543, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7990196078431373, |
|
"grad_norm": 0.2865970730781555, |
|
"learning_rate": 0.0001628042689892331, |
|
"loss": 0.0557, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.803921568627451, |
|
"grad_norm": 0.6009504795074463, |
|
"learning_rate": 0.00016229826113040767, |
|
"loss": 0.0481, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8088235294117647, |
|
"grad_norm": 0.2973253130912781, |
|
"learning_rate": 0.00016178963288191072, |
|
"loss": 0.0465, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8137254901960784, |
|
"grad_norm": 0.36205539107322693, |
|
"learning_rate": 0.00016127840563766527, |
|
"loss": 0.0676, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8186274509803921, |
|
"grad_norm": 0.41221827268600464, |
|
"learning_rate": 0.0001607646009009135, |
|
"loss": 0.0544, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8235294117647058, |
|
"grad_norm": 0.7448700666427612, |
|
"learning_rate": 0.00016024824028331195, |
|
"loss": 0.0544, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8284313725490197, |
|
"grad_norm": 0.5625671744346619, |
|
"learning_rate": 0.0001597293455040227, |
|
"loss": 0.0659, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.5039217472076416, |
|
"learning_rate": 0.00015920793838879966, |
|
"loss": 0.0522, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8382352941176471, |
|
"grad_norm": 0.6583822965621948, |
|
"learning_rate": 0.00015868404086907077, |
|
"loss": 0.0473, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8431372549019608, |
|
"grad_norm": 0.4799802899360657, |
|
"learning_rate": 0.00015815767498101522, |
|
"loss": 0.0502, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8480392156862745, |
|
"grad_norm": 0.38270649313926697, |
|
"learning_rate": 0.00015762886286463683, |
|
"loss": 0.0488, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8529411764705882, |
|
"grad_norm": 0.5844431519508362, |
|
"learning_rate": 0.0001570976267628326, |
|
"loss": 0.0506, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8578431372549019, |
|
"grad_norm": 0.3698306977748871, |
|
"learning_rate": 0.00015656398902045727, |
|
"loss": 0.0474, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8627450980392157, |
|
"grad_norm": 0.4788702428340912, |
|
"learning_rate": 0.00015602797208338337, |
|
"loss": 0.0452, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8676470588235294, |
|
"grad_norm": 0.47195127606391907, |
|
"learning_rate": 0.00015548959849755715, |
|
"loss": 0.0497, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8725490196078431, |
|
"grad_norm": 0.3890342712402344, |
|
"learning_rate": 0.00015494889090805018, |
|
"loss": 0.0466, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8774509803921569, |
|
"grad_norm": 0.24695785343647003, |
|
"learning_rate": 0.00015440587205810692, |
|
"loss": 0.0525, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 0.41040509939193726, |
|
"learning_rate": 0.00015386056478818814, |
|
"loss": 0.0556, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8872549019607843, |
|
"grad_norm": 0.5128910541534424, |
|
"learning_rate": 0.00015331299203501, |
|
"loss": 0.0485, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8921568627450981, |
|
"grad_norm": 0.3270062804222107, |
|
"learning_rate": 0.0001527631768305796, |
|
"loss": 0.0423, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8970588235294118, |
|
"grad_norm": 0.43017369508743286, |
|
"learning_rate": 0.00015221114230122584, |
|
"loss": 0.0461, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9019607843137255, |
|
"grad_norm": 0.501888632774353, |
|
"learning_rate": 0.00015165691166662705, |
|
"loss": 0.0472, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9068627450980392, |
|
"grad_norm": 0.286864310503006, |
|
"learning_rate": 0.00015110050823883406, |
|
"loss": 0.0418, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9117647058823529, |
|
"grad_norm": 0.5109400153160095, |
|
"learning_rate": 0.00015054195542128968, |
|
"loss": 0.0426, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 0.32924002408981323, |
|
"learning_rate": 0.00014998127670784448, |
|
"loss": 0.0389, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9215686274509803, |
|
"grad_norm": 0.21681684255599976, |
|
"learning_rate": 0.0001494184956817684, |
|
"loss": 0.0487, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9264705882352942, |
|
"grad_norm": 0.20006486773490906, |
|
"learning_rate": 0.00014885363601475888, |
|
"loss": 0.0521, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9313725490196079, |
|
"grad_norm": 0.6207095980644226, |
|
"learning_rate": 0.00014828672146594511, |
|
"loss": 0.0542, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9362745098039216, |
|
"grad_norm": 0.5559191703796387, |
|
"learning_rate": 0.00014771777588088884, |
|
"loss": 0.0446, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 0.3139231503009796, |
|
"learning_rate": 0.00014714682319058112, |
|
"loss": 0.0403, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.946078431372549, |
|
"grad_norm": 0.3132689893245697, |
|
"learning_rate": 0.00014657388741043606, |
|
"loss": 0.0398, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9509803921568627, |
|
"grad_norm": 0.2745281457901001, |
|
"learning_rate": 0.00014599899263928028, |
|
"loss": 0.0358, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9558823529411765, |
|
"grad_norm": 0.2483411729335785, |
|
"learning_rate": 0.00014542216305833968, |
|
"loss": 0.0506, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9607843137254902, |
|
"grad_norm": 0.3583033084869385, |
|
"learning_rate": 0.000144843422930222, |
|
"loss": 0.0442, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9656862745098039, |
|
"grad_norm": 0.2526845932006836, |
|
"learning_rate": 0.00014426279659789651, |
|
"loss": 0.0458, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9705882352941176, |
|
"grad_norm": 0.3186612129211426, |
|
"learning_rate": 0.00014368030848367, |
|
"loss": 0.052, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9754901960784313, |
|
"grad_norm": 0.3088064193725586, |
|
"learning_rate": 0.00014309598308815945, |
|
"loss": 0.0453, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 0.44008663296699524, |
|
"learning_rate": 0.00014250984498926167, |
|
"loss": 0.0449, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9852941176470589, |
|
"grad_norm": 0.5169370174407959, |
|
"learning_rate": 0.0001419219188411194, |
|
"loss": 0.0411, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9901960784313726, |
|
"grad_norm": 0.32191041111946106, |
|
"learning_rate": 0.0001413322293730842, |
|
"loss": 0.0406, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9950980392156863, |
|
"grad_norm": 0.300047904253006, |
|
"learning_rate": 0.00014074080138867654, |
|
"loss": 0.0432, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6571453809738159, |
|
"learning_rate": 0.00014014765976454231, |
|
"loss": 0.0421, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0049019607843137, |
|
"grad_norm": 0.43792489171028137, |
|
"learning_rate": 0.00013955282944940652, |
|
"loss": 0.0389, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0098039215686274, |
|
"grad_norm": 0.40551769733428955, |
|
"learning_rate": 0.0001389563354630239, |
|
"loss": 0.0418, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0147058823529411, |
|
"grad_norm": 0.3299430012702942, |
|
"learning_rate": 0.0001383582028951265, |
|
"loss": 0.0465, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0196078431372548, |
|
"grad_norm": 0.2800670266151428, |
|
"learning_rate": 0.00013775845690436848, |
|
"loss": 0.0443, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0245098039215685, |
|
"grad_norm": 0.49185529351234436, |
|
"learning_rate": 0.00013715712271726772, |
|
"loss": 0.0415, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.0294117647058822, |
|
"grad_norm": 0.6726065278053284, |
|
"learning_rate": 0.0001365542256271448, |
|
"loss": 0.038, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0343137254901962, |
|
"grad_norm": 0.5443005561828613, |
|
"learning_rate": 0.00013594979099305928, |
|
"loss": 0.0407, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0392156862745099, |
|
"grad_norm": 0.3882359564304352, |
|
"learning_rate": 0.00013534384423874272, |
|
"loss": 0.0479, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0441176470588236, |
|
"grad_norm": 0.3460078239440918, |
|
"learning_rate": 0.00013473641085152957, |
|
"loss": 0.0472, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.0490196078431373, |
|
"grad_norm": 0.3264780342578888, |
|
"learning_rate": 0.00013412751638128503, |
|
"loss": 0.0374, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.053921568627451, |
|
"grad_norm": 0.28663188219070435, |
|
"learning_rate": 0.0001335171864393304, |
|
"loss": 0.0386, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"grad_norm": 0.22994215786457062, |
|
"learning_rate": 0.00013290544669736576, |
|
"loss": 0.0492, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.0637254901960784, |
|
"grad_norm": 0.2018628716468811, |
|
"learning_rate": 0.0001322923228863902, |
|
"loss": 0.0367, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.0686274509803921, |
|
"grad_norm": 0.20426690578460693, |
|
"learning_rate": 0.0001316778407956196, |
|
"loss": 0.0322, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.0735294117647058, |
|
"grad_norm": 0.32902026176452637, |
|
"learning_rate": 0.00013106202627140163, |
|
"loss": 0.0321, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.0784313725490196, |
|
"grad_norm": 0.3210899233818054, |
|
"learning_rate": 0.00013044490521612904, |
|
"loss": 0.0405, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 0.23476989567279816, |
|
"learning_rate": 0.00012982650358714967, |
|
"loss": 0.0416, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.088235294117647, |
|
"grad_norm": 0.3046343922615051, |
|
"learning_rate": 0.000129206847395675, |
|
"loss": 0.0394, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.093137254901961, |
|
"grad_norm": 0.27315208315849304, |
|
"learning_rate": 0.0001285859627056858, |
|
"loss": 0.0439, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.0980392156862746, |
|
"grad_norm": 0.34966346621513367, |
|
"learning_rate": 0.00012796387563283605, |
|
"loss": 0.0387, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1029411764705883, |
|
"grad_norm": 0.24096551537513733, |
|
"learning_rate": 0.00012734061234335434, |
|
"loss": 0.0412, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.107843137254902, |
|
"grad_norm": 0.26461055874824524, |
|
"learning_rate": 0.00012671619905294326, |
|
"loss": 0.0494, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1127450980392157, |
|
"grad_norm": 0.24669981002807617, |
|
"learning_rate": 0.0001260906620256767, |
|
"loss": 0.0396, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1176470588235294, |
|
"grad_norm": 0.37667712569236755, |
|
"learning_rate": 0.00012546402757289532, |
|
"loss": 0.0426, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1225490196078431, |
|
"grad_norm": 0.3720461130142212, |
|
"learning_rate": 0.00012483632205209953, |
|
"loss": 0.042, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1274509803921569, |
|
"grad_norm": 0.3975540101528168, |
|
"learning_rate": 0.0001242075718658411, |
|
"loss": 0.0464, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1323529411764706, |
|
"grad_norm": 0.34910279512405396, |
|
"learning_rate": 0.00012357780346061256, |
|
"loss": 0.0412, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.1372549019607843, |
|
"grad_norm": 0.34874585270881653, |
|
"learning_rate": 0.00012294704332573462, |
|
"loss": 0.0458, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.142156862745098, |
|
"grad_norm": 0.24936801195144653, |
|
"learning_rate": 0.0001223153179922423, |
|
"loss": 0.0437, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.1470588235294117, |
|
"grad_norm": 0.39299267530441284, |
|
"learning_rate": 0.00012168265403176864, |
|
"loss": 0.0419, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.1519607843137254, |
|
"grad_norm": 0.44494786858558655, |
|
"learning_rate": 0.0001210490780554274, |
|
"loss": 0.0391, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.156862745098039, |
|
"grad_norm": 0.26526331901550293, |
|
"learning_rate": 0.00012041461671269337, |
|
"loss": 0.0338, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.161764705882353, |
|
"grad_norm": 0.4189550280570984, |
|
"learning_rate": 0.00011977929669028174, |
|
"loss": 0.0441, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.2872006595134735, |
|
"learning_rate": 0.00011914314471102545, |
|
"loss": 0.0427, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.1715686274509804, |
|
"grad_norm": 0.2823413908481598, |
|
"learning_rate": 0.0001185061875327512, |
|
"loss": 0.0443, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.26944294571876526, |
|
"learning_rate": 0.00011786845194715403, |
|
"loss": 0.0387, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.1813725490196079, |
|
"grad_norm": 0.3672547936439514, |
|
"learning_rate": 0.00011722996477867026, |
|
"loss": 0.0397, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.1862745098039216, |
|
"grad_norm": 0.2237931340932846, |
|
"learning_rate": 0.00011659075288334938, |
|
"loss": 0.0444, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.1911764705882353, |
|
"grad_norm": 0.2790263295173645, |
|
"learning_rate": 0.00011595084314772429, |
|
"loss": 0.0358, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.196078431372549, |
|
"grad_norm": 0.2825845777988434, |
|
"learning_rate": 0.00011531026248768048, |
|
"loss": 0.0368, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.2009803921568627, |
|
"grad_norm": 0.27560582756996155, |
|
"learning_rate": 0.00011466903784732381, |
|
"loss": 0.0474, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2058823529411764, |
|
"grad_norm": 0.4237360656261444, |
|
"learning_rate": 0.00011402719619784734, |
|
"loss": 0.0375, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.2107843137254901, |
|
"grad_norm": 0.3769036531448364, |
|
"learning_rate": 0.00011338476453639666, |
|
"loss": 0.0308, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.215686274509804, |
|
"grad_norm": 0.32824084162712097, |
|
"learning_rate": 0.00011274176988493454, |
|
"loss": 0.0386, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2205882352941178, |
|
"grad_norm": 0.30048561096191406, |
|
"learning_rate": 0.0001120982392891042, |
|
"loss": 0.0391, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2254901960784315, |
|
"grad_norm": 0.30690962076187134, |
|
"learning_rate": 0.00011145419981709169, |
|
"loss": 0.0443, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2303921568627452, |
|
"grad_norm": 0.4496728479862213, |
|
"learning_rate": 0.00011080967855848755, |
|
"loss": 0.0447, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.2352941176470589, |
|
"grad_norm": 0.25722137093544006, |
|
"learning_rate": 0.00011016470262314707, |
|
"loss": 0.0333, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.2401960784313726, |
|
"grad_norm": 0.32415884733200073, |
|
"learning_rate": 0.00010951929914005033, |
|
"loss": 0.0375, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.2450980392156863, |
|
"grad_norm": 0.33738696575164795, |
|
"learning_rate": 0.00010887349525616075, |
|
"loss": 0.0408, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.3490372896194458, |
|
"learning_rate": 0.00010822731813528354, |
|
"loss": 0.0337, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2549019607843137, |
|
"grad_norm": 0.42863723635673523, |
|
"learning_rate": 0.00010758079495692294, |
|
"loss": 0.0442, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.2598039215686274, |
|
"grad_norm": 0.29293495416641235, |
|
"learning_rate": 0.00010693395291513908, |
|
"loss": 0.0408, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.2647058823529411, |
|
"grad_norm": 0.27024197578430176, |
|
"learning_rate": 0.00010628681921740414, |
|
"loss": 0.0377, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.2696078431372548, |
|
"grad_norm": 0.35880324244499207, |
|
"learning_rate": 0.00010563942108345785, |
|
"loss": 0.0364, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.2745098039215685, |
|
"grad_norm": 0.3253026306629181, |
|
"learning_rate": 0.0001049917857441628, |
|
"loss": 0.0374, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.2794117647058822, |
|
"grad_norm": 0.33250367641448975, |
|
"learning_rate": 0.00010434394044035878, |
|
"loss": 0.0384, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.284313725490196, |
|
"grad_norm": 0.34700486063957214, |
|
"learning_rate": 0.00010369591242171719, |
|
"loss": 0.0369, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.2892156862745099, |
|
"grad_norm": 0.3287598788738251, |
|
"learning_rate": 0.00010304772894559475, |
|
"loss": 0.0419, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.2941176470588236, |
|
"grad_norm": 0.3363092243671417, |
|
"learning_rate": 0.00010239941727588707, |
|
"loss": 0.0419, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.2990196078431373, |
|
"grad_norm": 0.32619622349739075, |
|
"learning_rate": 0.0001017510046818817, |
|
"loss": 0.0353, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.303921568627451, |
|
"grad_norm": 0.28630563616752625, |
|
"learning_rate": 0.00010110251843711149, |
|
"loss": 0.0317, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.3088235294117647, |
|
"grad_norm": 0.470198392868042, |
|
"learning_rate": 0.00010045398581820702, |
|
"loss": 0.0397, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.3137254901960784, |
|
"grad_norm": 0.34590962529182434, |
|
"learning_rate": 9.98054341037495e-05, |
|
"loss": 0.0336, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.3186274509803921, |
|
"grad_norm": 0.18714579939842224, |
|
"learning_rate": 9.91568905731234e-05, |
|
"loss": 0.0268, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.3235294117647058, |
|
"grad_norm": 0.3055776357650757, |
|
"learning_rate": 9.850838250536885e-05, |
|
"loss": 0.0384, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.3284313725490196, |
|
"grad_norm": 0.33092889189720154, |
|
"learning_rate": 9.785993717803445e-05, |
|
"loss": 0.0323, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.26138371229171753, |
|
"learning_rate": 9.721158186602979e-05, |
|
"loss": 0.0391, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.3382352941176472, |
|
"grad_norm": 0.2703782320022583, |
|
"learning_rate": 9.656334384047812e-05, |
|
"loss": 0.0268, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.343137254901961, |
|
"grad_norm": 0.24206914007663727, |
|
"learning_rate": 9.591525036756952e-05, |
|
"loss": 0.032, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.3480392156862746, |
|
"grad_norm": 0.3793281018733978, |
|
"learning_rate": 9.526732870741386e-05, |
|
"loss": 0.0399, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.3529411764705883, |
|
"grad_norm": 0.3143618404865265, |
|
"learning_rate": 9.46196061128942e-05, |
|
"loss": 0.0365, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.357843137254902, |
|
"grad_norm": 0.23540951311588287, |
|
"learning_rate": 9.397210982852053e-05, |
|
"loss": 0.0328, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.3627450980392157, |
|
"grad_norm": 0.2023368775844574, |
|
"learning_rate": 9.332486708928373e-05, |
|
"loss": 0.0316, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.3676470588235294, |
|
"grad_norm": 0.26689231395721436, |
|
"learning_rate": 9.267790511951015e-05, |
|
"loss": 0.0326, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.3725490196078431, |
|
"grad_norm": 0.2792396545410156, |
|
"learning_rate": 9.203125113171631e-05, |
|
"loss": 0.0336, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.3774509803921569, |
|
"grad_norm": 0.21045692265033722, |
|
"learning_rate": 9.13849323254645e-05, |
|
"loss": 0.0296, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.3823529411764706, |
|
"grad_norm": 0.26224854588508606, |
|
"learning_rate": 9.073897588621853e-05, |
|
"loss": 0.0311, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.3872549019607843, |
|
"grad_norm": 0.30219170451164246, |
|
"learning_rate": 9.009340898420029e-05, |
|
"loss": 0.0379, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.392156862745098, |
|
"grad_norm": 0.19660678505897522, |
|
"learning_rate": 8.944825877324708e-05, |
|
"loss": 0.035, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.3970588235294117, |
|
"grad_norm": 0.2348472774028778, |
|
"learning_rate": 8.880355238966923e-05, |
|
"loss": 0.0366, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4019607843137254, |
|
"grad_norm": 0.44347622990608215, |
|
"learning_rate": 8.815931695110885e-05, |
|
"loss": 0.0333, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.406862745098039, |
|
"grad_norm": 0.34309887886047363, |
|
"learning_rate": 8.751557955539915e-05, |
|
"loss": 0.0394, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"grad_norm": 0.3023855984210968, |
|
"learning_rate": 8.687236727942465e-05, |
|
"loss": 0.0308, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.4166666666666667, |
|
"grad_norm": 0.25127673149108887, |
|
"learning_rate": 8.622970717798227e-05, |
|
"loss": 0.0384, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.4215686274509804, |
|
"grad_norm": 0.17014305293560028, |
|
"learning_rate": 8.558762628264345e-05, |
|
"loss": 0.0331, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.4264705882352942, |
|
"grad_norm": 0.32725852727890015, |
|
"learning_rate": 8.494615160061694e-05, |
|
"loss": 0.0326, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.4313725490196079, |
|
"grad_norm": 0.2895604968070984, |
|
"learning_rate": 8.430531011361298e-05, |
|
"loss": 0.0319, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.4362745098039216, |
|
"grad_norm": 0.3882890045642853, |
|
"learning_rate": 8.366512877670842e-05, |
|
"loss": 0.0331, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.4411764705882353, |
|
"grad_norm": 0.27492624521255493, |
|
"learning_rate": 8.302563451721282e-05, |
|
"loss": 0.0384, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.446078431372549, |
|
"grad_norm": 0.23316094279289246, |
|
"learning_rate": 8.238685423353588e-05, |
|
"loss": 0.0426, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.4509803921568627, |
|
"grad_norm": 0.395353227853775, |
|
"learning_rate": 8.174881479405607e-05, |
|
"loss": 0.0347, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4558823529411764, |
|
"grad_norm": 0.5179559588432312, |
|
"learning_rate": 8.111154303599049e-05, |
|
"loss": 0.0371, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.4607843137254901, |
|
"grad_norm": 0.22947251796722412, |
|
"learning_rate": 8.047506576426596e-05, |
|
"loss": 0.03, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.465686274509804, |
|
"grad_norm": 0.3831785023212433, |
|
"learning_rate": 7.983940975039166e-05, |
|
"loss": 0.0346, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 0.34753093123435974, |
|
"learning_rate": 7.920460173133304e-05, |
|
"loss": 0.0464, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4754901960784315, |
|
"grad_norm": 0.24026577174663544, |
|
"learning_rate": 7.85706684083871e-05, |
|
"loss": 0.0334, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.4803921568627452, |
|
"grad_norm": 0.32264992594718933, |
|
"learning_rate": 7.793763644605947e-05, |
|
"loss": 0.0329, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.4852941176470589, |
|
"grad_norm": 0.25292444229125977, |
|
"learning_rate": 7.730553247094266e-05, |
|
"loss": 0.0306, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.4901960784313726, |
|
"grad_norm": 0.21395047008991241, |
|
"learning_rate": 7.667438307059627e-05, |
|
"loss": 0.0346, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.4950980392156863, |
|
"grad_norm": 0.29967001080513, |
|
"learning_rate": 7.604421479242846e-05, |
|
"loss": 0.0403, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.2828430235385895, |
|
"learning_rate": 7.541505414257959e-05, |
|
"loss": 0.0327, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.5049019607843137, |
|
"grad_norm": 0.43027809262275696, |
|
"learning_rate": 7.478692758480698e-05, |
|
"loss": 0.0331, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5098039215686274, |
|
"grad_norm": 0.34473538398742676, |
|
"learning_rate": 7.415986153937202e-05, |
|
"loss": 0.0444, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.5147058823529411, |
|
"grad_norm": 0.3717981278896332, |
|
"learning_rate": 7.353388238192892e-05, |
|
"loss": 0.042, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.5196078431372548, |
|
"grad_norm": 0.15127846598625183, |
|
"learning_rate": 7.29090164424151e-05, |
|
"loss": 0.0296, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.5245098039215685, |
|
"grad_norm": 0.33397403359413147, |
|
"learning_rate": 7.22852900039438e-05, |
|
"loss": 0.0314, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.5294117647058822, |
|
"grad_norm": 0.25943371653556824, |
|
"learning_rate": 7.166272930169861e-05, |
|
"loss": 0.0342, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.534313725490196, |
|
"grad_norm": 0.3120077848434448, |
|
"learning_rate": 7.104136052182992e-05, |
|
"loss": 0.0317, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.5392156862745097, |
|
"grad_norm": 0.24377594888210297, |
|
"learning_rate": 7.042120980035346e-05, |
|
"loss": 0.0284, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.5441176470588234, |
|
"grad_norm": 0.19070957601070404, |
|
"learning_rate": 6.980230322205099e-05, |
|
"loss": 0.0343, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.5490196078431373, |
|
"grad_norm": 0.22296807169914246, |
|
"learning_rate": 6.918466681937308e-05, |
|
"loss": 0.0299, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.553921568627451, |
|
"grad_norm": 0.2279416173696518, |
|
"learning_rate": 6.856832657134424e-05, |
|
"loss": 0.0333, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.5588235294117647, |
|
"grad_norm": 0.2242182493209839, |
|
"learning_rate": 6.795330840247006e-05, |
|
"loss": 0.0331, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.5637254901960784, |
|
"grad_norm": 0.2774062752723694, |
|
"learning_rate": 6.733963818164686e-05, |
|
"loss": 0.0266, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 0.37312522530555725, |
|
"learning_rate": 6.672734172107354e-05, |
|
"loss": 0.0376, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.5735294117647058, |
|
"grad_norm": 0.23322941362857819, |
|
"learning_rate": 6.611644477516595e-05, |
|
"loss": 0.0282, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.5784313725490198, |
|
"grad_norm": 0.32735109329223633, |
|
"learning_rate": 6.550697303947345e-05, |
|
"loss": 0.0294, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.5833333333333335, |
|
"grad_norm": 0.21853038668632507, |
|
"learning_rate": 6.489895214959828e-05, |
|
"loss": 0.0259, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.5882352941176472, |
|
"grad_norm": 0.3016158640384674, |
|
"learning_rate": 6.429240768011719e-05, |
|
"loss": 0.028, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.593137254901961, |
|
"grad_norm": 0.20449745655059814, |
|
"learning_rate": 6.368736514350568e-05, |
|
"loss": 0.0303, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.5980392156862746, |
|
"grad_norm": 0.2439008206129074, |
|
"learning_rate": 6.308384998906506e-05, |
|
"loss": 0.027, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.6029411764705883, |
|
"grad_norm": 0.28377825021743774, |
|
"learning_rate": 6.248188760185173e-05, |
|
"loss": 0.0302, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.607843137254902, |
|
"grad_norm": 0.32138168811798096, |
|
"learning_rate": 6.188150330160971e-05, |
|
"loss": 0.0255, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.6127450980392157, |
|
"grad_norm": 0.23931661248207092, |
|
"learning_rate": 6.128272234170547e-05, |
|
"loss": 0.0284, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.6176470588235294, |
|
"grad_norm": 0.26550066471099854, |
|
"learning_rate": 6.068556990806579e-05, |
|
"loss": 0.039, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.6225490196078431, |
|
"grad_norm": 0.20831480622291565, |
|
"learning_rate": 6.0090071118118355e-05, |
|
"loss": 0.0248, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.6274509803921569, |
|
"grad_norm": 0.26879703998565674, |
|
"learning_rate": 5.949625101973527e-05, |
|
"loss": 0.0303, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.6323529411764706, |
|
"grad_norm": 0.34190261363983154, |
|
"learning_rate": 5.890413459017958e-05, |
|
"loss": 0.0296, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.6372549019607843, |
|
"grad_norm": 0.3451369106769562, |
|
"learning_rate": 5.8313746735054544e-05, |
|
"loss": 0.0274, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.642156862745098, |
|
"grad_norm": 0.2692447304725647, |
|
"learning_rate": 5.77251122872561e-05, |
|
"loss": 0.0303, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.6470588235294117, |
|
"grad_norm": 0.28057631850242615, |
|
"learning_rate": 5.713825600592841e-05, |
|
"loss": 0.0335, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.6519607843137254, |
|
"grad_norm": 0.20118731260299683, |
|
"learning_rate": 5.6553202575422385e-05, |
|
"loss": 0.0339, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.656862745098039, |
|
"grad_norm": 0.27384528517723083, |
|
"learning_rate": 5.596997660425746e-05, |
|
"loss": 0.0296, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.6617647058823528, |
|
"grad_norm": 0.27839264273643494, |
|
"learning_rate": 5.538860262408632e-05, |
|
"loss": 0.0306, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.26187360286712646, |
|
"learning_rate": 5.480910508866333e-05, |
|
"loss": 0.0327, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.6715686274509802, |
|
"grad_norm": 0.16635389626026154, |
|
"learning_rate": 5.423150837281585e-05, |
|
"loss": 0.0268, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.6764705882352942, |
|
"grad_norm": 0.3123128414154053, |
|
"learning_rate": 5.365583677141883e-05, |
|
"loss": 0.0345, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.6813725490196079, |
|
"grad_norm": 0.30305176973342896, |
|
"learning_rate": 5.308211449837315e-05, |
|
"loss": 0.0264, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.6862745098039216, |
|
"grad_norm": 0.19436487555503845, |
|
"learning_rate": 5.2510365685587026e-05, |
|
"loss": 0.0318, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.6911764705882353, |
|
"grad_norm": 0.2048874795436859, |
|
"learning_rate": 5.1940614381961004e-05, |
|
"loss": 0.0296, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.696078431372549, |
|
"grad_norm": 0.32843217253685, |
|
"learning_rate": 5.137288455237627e-05, |
|
"loss": 0.0288, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.7009803921568627, |
|
"grad_norm": 0.26734060049057007, |
|
"learning_rate": 5.080720007668689e-05, |
|
"loss": 0.0309, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.7058823529411766, |
|
"grad_norm": 0.2050999402999878, |
|
"learning_rate": 5.0243584748715235e-05, |
|
"loss": 0.0283, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.7107843137254903, |
|
"grad_norm": 0.32800912857055664, |
|
"learning_rate": 4.968206227525111e-05, |
|
"loss": 0.0356, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.715686274509804, |
|
"grad_norm": 0.2212320864200592, |
|
"learning_rate": 4.912265627505468e-05, |
|
"loss": 0.0278, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7205882352941178, |
|
"grad_norm": 0.22088485956192017, |
|
"learning_rate": 4.856539027786305e-05, |
|
"loss": 0.0315, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.7254901960784315, |
|
"grad_norm": 0.2626785635948181, |
|
"learning_rate": 4.8010287723400494e-05, |
|
"loss": 0.0395, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.7303921568627452, |
|
"grad_norm": 0.343022882938385, |
|
"learning_rate": 4.745737196039259e-05, |
|
"loss": 0.0235, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.7352941176470589, |
|
"grad_norm": 0.4321844279766083, |
|
"learning_rate": 4.6906666245583965e-05, |
|
"loss": 0.0393, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.7401960784313726, |
|
"grad_norm": 0.34605053067207336, |
|
"learning_rate": 4.6358193742760305e-05, |
|
"loss": 0.0214, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.7450980392156863, |
|
"grad_norm": 0.2320283055305481, |
|
"learning_rate": 4.5811977521773906e-05, |
|
"loss": 0.0331, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.21285474300384521, |
|
"learning_rate": 4.526804055757328e-05, |
|
"loss": 0.0333, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.7549019607843137, |
|
"grad_norm": 0.185288667678833, |
|
"learning_rate": 4.472640572923687e-05, |
|
"loss": 0.0269, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.7598039215686274, |
|
"grad_norm": 0.2527756989002228, |
|
"learning_rate": 4.4187095819010674e-05, |
|
"loss": 0.0296, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.1697828769683838, |
|
"learning_rate": 4.365013351135001e-05, |
|
"loss": 0.0272, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.7696078431372548, |
|
"grad_norm": 0.215946227312088, |
|
"learning_rate": 4.311554139196522e-05, |
|
"loss": 0.0262, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.7745098039215685, |
|
"grad_norm": 0.41971561312675476, |
|
"learning_rate": 4.258334194687188e-05, |
|
"loss": 0.0282, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.7794117647058822, |
|
"grad_norm": 0.18381306529045105, |
|
"learning_rate": 4.205355756144489e-05, |
|
"loss": 0.0293, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.784313725490196, |
|
"grad_norm": 0.24697428941726685, |
|
"learning_rate": 4.152621051947682e-05, |
|
"loss": 0.0205, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.7892156862745097, |
|
"grad_norm": 0.23640835285186768, |
|
"learning_rate": 4.1001323002240754e-05, |
|
"loss": 0.0304, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.7941176470588234, |
|
"grad_norm": 0.21955536305904388, |
|
"learning_rate": 4.047891708755724e-05, |
|
"loss": 0.0281, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.7990196078431373, |
|
"grad_norm": 0.34594038128852844, |
|
"learning_rate": 3.995901474886568e-05, |
|
"loss": 0.0344, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.803921568627451, |
|
"grad_norm": 0.4010615646839142, |
|
"learning_rate": 3.944163785429992e-05, |
|
"loss": 0.0241, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.8088235294117647, |
|
"grad_norm": 0.24570715427398682, |
|
"learning_rate": 3.8926808165768715e-05, |
|
"loss": 0.0269, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.8137254901960784, |
|
"grad_norm": 0.2714114785194397, |
|
"learning_rate": 3.841454733804016e-05, |
|
"loss": 0.0287, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.8186274509803921, |
|
"grad_norm": 0.29776889085769653, |
|
"learning_rate": 3.790487691783099e-05, |
|
"loss": 0.0325, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.8235294117647058, |
|
"grad_norm": 0.20955297350883484, |
|
"learning_rate": 3.739781834290006e-05, |
|
"loss": 0.0319, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.8284313725490198, |
|
"grad_norm": 0.2985910475254059, |
|
"learning_rate": 3.689339294114692e-05, |
|
"loss": 0.0244, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.33746784925460815, |
|
"learning_rate": 3.639162192971457e-05, |
|
"loss": 0.0272, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.8382352941176472, |
|
"grad_norm": 0.2537771761417389, |
|
"learning_rate": 3.5892526414096925e-05, |
|
"loss": 0.0317, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.843137254901961, |
|
"grad_norm": 0.30214208364486694, |
|
"learning_rate": 3.53961273872513e-05, |
|
"loss": 0.0207, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.8480392156862746, |
|
"grad_norm": 0.3083361089229584, |
|
"learning_rate": 3.490244572871524e-05, |
|
"loss": 0.0216, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.8529411764705883, |
|
"grad_norm": 0.24969086050987244, |
|
"learning_rate": 3.44115022037284e-05, |
|
"loss": 0.0254, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.857843137254902, |
|
"grad_norm": 0.2084352970123291, |
|
"learning_rate": 3.3923317462358905e-05, |
|
"loss": 0.0305, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.8627450980392157, |
|
"grad_norm": 0.21671414375305176, |
|
"learning_rate": 3.3437912038635056e-05, |
|
"loss": 0.0303, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.8676470588235294, |
|
"grad_norm": 0.2956879436969757, |
|
"learning_rate": 3.295530634968147e-05, |
|
"loss": 0.0298, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.8725490196078431, |
|
"grad_norm": 0.29368528723716736, |
|
"learning_rate": 3.24755206948602e-05, |
|
"loss": 0.0261, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.8774509803921569, |
|
"grad_norm": 0.13201627135276794, |
|
"learning_rate": 3.199857525491714e-05, |
|
"loss": 0.0217, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"grad_norm": 0.21656860411167145, |
|
"learning_rate": 3.1524490091133e-05, |
|
"loss": 0.0288, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.8872549019607843, |
|
"grad_norm": 0.20571519434452057, |
|
"learning_rate": 3.105328514447957e-05, |
|
"loss": 0.0254, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.892156862745098, |
|
"grad_norm": 0.2538784444332123, |
|
"learning_rate": 3.0584980234780916e-05, |
|
"loss": 0.0325, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.8970588235294117, |
|
"grad_norm": 0.282520592212677, |
|
"learning_rate": 3.0119595059879678e-05, |
|
"loss": 0.0292, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.9019607843137254, |
|
"grad_norm": 0.2727642357349396, |
|
"learning_rate": 2.965714919480872e-05, |
|
"loss": 0.0264, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.906862745098039, |
|
"grad_norm": 0.1925729215145111, |
|
"learning_rate": 2.9197662090967625e-05, |
|
"loss": 0.0282, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.9117647058823528, |
|
"grad_norm": 0.23374304175376892, |
|
"learning_rate": 2.8741153075304438e-05, |
|
"loss": 0.0266, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.9166666666666665, |
|
"grad_norm": 0.24565072357654572, |
|
"learning_rate": 2.828764134950297e-05, |
|
"loss": 0.0198, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.9215686274509802, |
|
"grad_norm": 0.1271701604127884, |
|
"learning_rate": 2.7837145989174974e-05, |
|
"loss": 0.0206, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.9264705882352942, |
|
"grad_norm": 0.25117677450180054, |
|
"learning_rate": 2.7389685943057852e-05, |
|
"loss": 0.0249, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.9313725490196079, |
|
"grad_norm": 0.33383405208587646, |
|
"learning_rate": 2.6945280032217535e-05, |
|
"loss": 0.0298, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.9362745098039216, |
|
"grad_norm": 0.22882990539073944, |
|
"learning_rate": 2.6503946949256974e-05, |
|
"loss": 0.0273, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.9411764705882353, |
|
"grad_norm": 0.2781018316745758, |
|
"learning_rate": 2.6065705257529848e-05, |
|
"loss": 0.0345, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.946078431372549, |
|
"grad_norm": 0.23163281381130219, |
|
"learning_rate": 2.5630573390359624e-05, |
|
"loss": 0.024, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.9509803921568627, |
|
"grad_norm": 0.30135810375213623, |
|
"learning_rate": 2.5198569650264403e-05, |
|
"loss": 0.0245, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.9558823529411766, |
|
"grad_norm": 0.2169611006975174, |
|
"learning_rate": 2.4769712208186967e-05, |
|
"loss": 0.0217, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"grad_norm": 0.2509106397628784, |
|
"learning_rate": 2.4344019102730542e-05, |
|
"loss": 0.0225, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.965686274509804, |
|
"grad_norm": 0.3170604407787323, |
|
"learning_rate": 2.3921508239399913e-05, |
|
"loss": 0.027, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.9705882352941178, |
|
"grad_norm": 0.1460844874382019, |
|
"learning_rate": 2.350219738984849e-05, |
|
"loss": 0.021, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.9754901960784315, |
|
"grad_norm": 0.27143651247024536, |
|
"learning_rate": 2.3086104191130643e-05, |
|
"loss": 0.0262, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.9803921568627452, |
|
"grad_norm": 0.2494950294494629, |
|
"learning_rate": 2.2673246144959935e-05, |
|
"loss": 0.0249, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.9852941176470589, |
|
"grad_norm": 0.26637178659439087, |
|
"learning_rate": 2.226364061697287e-05, |
|
"loss": 0.0325, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.9901960784313726, |
|
"grad_norm": 0.2871919274330139, |
|
"learning_rate": 2.185730483599856e-05, |
|
"loss": 0.0286, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.9950980392156863, |
|
"grad_norm": 0.2098357230424881, |
|
"learning_rate": 2.1454255893334064e-05, |
|
"loss": 0.0332, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.2278250902891159, |
|
"learning_rate": 2.10545107420253e-05, |
|
"loss": 0.0234, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.0049019607843137, |
|
"grad_norm": 0.266368567943573, |
|
"learning_rate": 2.0658086196154236e-05, |
|
"loss": 0.0226, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.0098039215686274, |
|
"grad_norm": 0.10192721337080002, |
|
"learning_rate": 2.026499893013144e-05, |
|
"loss": 0.0217, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.014705882352941, |
|
"grad_norm": 0.2021161913871765, |
|
"learning_rate": 1.9875265477994875e-05, |
|
"loss": 0.0219, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.019607843137255, |
|
"grad_norm": 0.18436592817306519, |
|
"learning_rate": 1.9488902232714267e-05, |
|
"loss": 0.0216, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.0245098039215685, |
|
"grad_norm": 0.18808206915855408, |
|
"learning_rate": 1.9105925445501794e-05, |
|
"loss": 0.0184, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.0294117647058822, |
|
"grad_norm": 0.2091018408536911, |
|
"learning_rate": 1.87263512251284e-05, |
|
"loss": 0.0237, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.034313725490196, |
|
"grad_norm": 0.29868388175964355, |
|
"learning_rate": 1.8350195537246184e-05, |
|
"loss": 0.0251, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.0392156862745097, |
|
"grad_norm": 0.15926848351955414, |
|
"learning_rate": 1.797747420371699e-05, |
|
"loss": 0.0214, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.0441176470588234, |
|
"grad_norm": 0.24016976356506348, |
|
"learning_rate": 1.7608202901946826e-05, |
|
"loss": 0.0206, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.049019607843137, |
|
"grad_norm": 0.18072175979614258, |
|
"learning_rate": 1.7242397164226452e-05, |
|
"loss": 0.0192, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.053921568627451, |
|
"grad_norm": 0.21760503947734833, |
|
"learning_rate": 1.6880072377078026e-05, |
|
"loss": 0.0237, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.0588235294117645, |
|
"grad_norm": 0.28834259510040283, |
|
"learning_rate": 1.6521243780607974e-05, |
|
"loss": 0.0185, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.063725490196078, |
|
"grad_norm": 0.14096632599830627, |
|
"learning_rate": 1.616592646786599e-05, |
|
"loss": 0.0184, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.0686274509803924, |
|
"grad_norm": 0.15555402636528015, |
|
"learning_rate": 1.5814135384210026e-05, |
|
"loss": 0.02, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.073529411764706, |
|
"grad_norm": 0.09052202850580215, |
|
"learning_rate": 1.5465885326677897e-05, |
|
"loss": 0.019, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.0784313725490198, |
|
"grad_norm": 0.1283917874097824, |
|
"learning_rate": 1.512119094336466e-05, |
|
"loss": 0.0194, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 0.268288791179657, |
|
"learning_rate": 1.4780066732806663e-05, |
|
"loss": 0.024, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.088235294117647, |
|
"grad_norm": 0.1498635858297348, |
|
"learning_rate": 1.4442527043371622e-05, |
|
"loss": 0.0226, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.093137254901961, |
|
"grad_norm": 0.31013911962509155, |
|
"learning_rate": 1.4108586072655062e-05, |
|
"loss": 0.0198, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.0980392156862746, |
|
"grad_norm": 0.19611553847789764, |
|
"learning_rate": 1.377825786688326e-05, |
|
"loss": 0.029, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.1029411764705883, |
|
"grad_norm": 0.15280510485172272, |
|
"learning_rate": 1.3451556320322344e-05, |
|
"loss": 0.0305, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.107843137254902, |
|
"grad_norm": 0.1233508512377739, |
|
"learning_rate": 1.3128495174693833e-05, |
|
"loss": 0.0214, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.1127450980392157, |
|
"grad_norm": 0.23667648434638977, |
|
"learning_rate": 1.280908801859676e-05, |
|
"loss": 0.017, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.1176470588235294, |
|
"grad_norm": 0.2285485714673996, |
|
"learning_rate": 1.2493348286936013e-05, |
|
"loss": 0.019, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.122549019607843, |
|
"grad_norm": 0.17249974608421326, |
|
"learning_rate": 1.2181289260357265e-05, |
|
"loss": 0.0233, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.127450980392157, |
|
"grad_norm": 0.18264269828796387, |
|
"learning_rate": 1.1872924064688328e-05, |
|
"loss": 0.0176, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.1323529411764706, |
|
"grad_norm": 0.23498280346393585, |
|
"learning_rate": 1.1568265670387125e-05, |
|
"loss": 0.0216, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.1372549019607843, |
|
"grad_norm": 0.20697841048240662, |
|
"learning_rate": 1.12673268919961e-05, |
|
"loss": 0.0221, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.142156862745098, |
|
"grad_norm": 0.20883601903915405, |
|
"learning_rate": 1.0970120387603122e-05, |
|
"loss": 0.0211, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.1470588235294117, |
|
"grad_norm": 0.2525753676891327, |
|
"learning_rate": 1.0676658658309225e-05, |
|
"loss": 0.0182, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.1519607843137254, |
|
"grad_norm": 0.20986422896385193, |
|
"learning_rate": 1.0386954047702646e-05, |
|
"loss": 0.0222, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.156862745098039, |
|
"grad_norm": 0.18922965228557587, |
|
"learning_rate": 1.010101874133973e-05, |
|
"loss": 0.0215, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.161764705882353, |
|
"grad_norm": 0.23508426547050476, |
|
"learning_rate": 9.81886476623226e-06, |
|
"loss": 0.0158, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.1666666666666665, |
|
"grad_norm": 0.17150916159152985, |
|
"learning_rate": 9.540503990341743e-06, |
|
"loss": 0.0204, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.1715686274509802, |
|
"grad_norm": 0.12821370363235474, |
|
"learning_rate": 9.265948122080048e-06, |
|
"loss": 0.0162, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.176470588235294, |
|
"grad_norm": 0.14045512676239014, |
|
"learning_rate": 8.995208709817071e-06, |
|
"loss": 0.0231, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.1813725490196076, |
|
"grad_norm": 0.24774880707263947, |
|
"learning_rate": 8.728297141394858e-06, |
|
"loss": 0.0205, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.186274509803922, |
|
"grad_norm": 0.20708513259887695, |
|
"learning_rate": 8.465224643648728e-06, |
|
"loss": 0.0217, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.1911764705882355, |
|
"grad_norm": 0.1988290697336197, |
|
"learning_rate": 8.206002281934977e-06, |
|
"loss": 0.0181, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.196078431372549, |
|
"grad_norm": 0.24248534440994263, |
|
"learning_rate": 7.950640959665457e-06, |
|
"loss": 0.014, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.200980392156863, |
|
"grad_norm": 0.199833944439888, |
|
"learning_rate": 7.69915141784896e-06, |
|
"loss": 0.0177, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 0.18880146741867065, |
|
"learning_rate": 7.451544234639473e-06, |
|
"loss": 0.0309, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.2107843137254903, |
|
"grad_norm": 0.19817057251930237, |
|
"learning_rate": 7.207829824891199e-06, |
|
"loss": 0.0135, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.215686274509804, |
|
"grad_norm": 0.22702986001968384, |
|
"learning_rate": 6.968018439720414e-06, |
|
"loss": 0.0242, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.2205882352941178, |
|
"grad_norm": 0.23108519613742828, |
|
"learning_rate": 6.732120166074441e-06, |
|
"loss": 0.0266, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.2254901960784315, |
|
"grad_norm": 0.2683473229408264, |
|
"learning_rate": 6.500144926307295e-06, |
|
"loss": 0.0138, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.230392156862745, |
|
"grad_norm": 0.3906085193157196, |
|
"learning_rate": 6.272102477762254e-06, |
|
"loss": 0.0257, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.235294117647059, |
|
"grad_norm": 0.16685569286346436, |
|
"learning_rate": 6.048002412361598e-06, |
|
"loss": 0.0197, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.2401960784313726, |
|
"grad_norm": 0.19442564249038696, |
|
"learning_rate": 5.827854156203017e-06, |
|
"loss": 0.0183, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.2450980392156863, |
|
"grad_norm": 0.22825686633586884, |
|
"learning_rate": 5.611666969163243e-06, |
|
"loss": 0.0192, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.13165044784545898, |
|
"learning_rate": 5.399449944508439e-06, |
|
"loss": 0.0181, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.2549019607843137, |
|
"grad_norm": 0.32770487666130066, |
|
"learning_rate": 5.1912120085118365e-06, |
|
"loss": 0.0194, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.2598039215686274, |
|
"grad_norm": 0.20789536833763123, |
|
"learning_rate": 4.986961920078204e-06, |
|
"loss": 0.0274, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.264705882352941, |
|
"grad_norm": 0.3107619881629944, |
|
"learning_rate": 4.786708270375462e-06, |
|
"loss": 0.0244, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.269607843137255, |
|
"grad_norm": 0.11234085261821747, |
|
"learning_rate": 4.590459482473286e-06, |
|
"loss": 0.0125, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.2745098039215685, |
|
"grad_norm": 0.2838551700115204, |
|
"learning_rate": 4.398223810988866e-06, |
|
"loss": 0.0209, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.2794117647058822, |
|
"grad_norm": 0.2697765529155731, |
|
"learning_rate": 4.2100093417396845e-06, |
|
"loss": 0.0245, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.284313725490196, |
|
"grad_norm": 0.1846655011177063, |
|
"learning_rate": 4.0258239914033765e-06, |
|
"loss": 0.0271, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.2892156862745097, |
|
"grad_norm": 0.20711906254291534, |
|
"learning_rate": 3.8456755071847765e-06, |
|
"loss": 0.0262, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.2941176470588234, |
|
"grad_norm": 0.23577173054218292, |
|
"learning_rate": 3.6695714664900293e-06, |
|
"loss": 0.0147, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.299019607843137, |
|
"grad_norm": 0.21056267619132996, |
|
"learning_rate": 3.49751927660793e-06, |
|
"loss": 0.0242, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.303921568627451, |
|
"grad_norm": 0.1903896927833557, |
|
"learning_rate": 3.329526174398223e-06, |
|
"loss": 0.0199, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.3088235294117645, |
|
"grad_norm": 0.1740642637014389, |
|
"learning_rate": 3.165599225987381e-06, |
|
"loss": 0.0241, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.313725490196078, |
|
"grad_norm": 0.23523923754692078, |
|
"learning_rate": 3.005745326471254e-06, |
|
"loss": 0.0196, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.318627450980392, |
|
"grad_norm": 0.17898762226104736, |
|
"learning_rate": 2.849971199625112e-06, |
|
"loss": 0.0178, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.323529411764706, |
|
"grad_norm": 0.18050484359264374, |
|
"learning_rate": 2.6982833976208043e-06, |
|
"loss": 0.0264, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.3284313725490198, |
|
"grad_norm": 0.14259324967861176, |
|
"learning_rate": 2.5506883007511695e-06, |
|
"loss": 0.0163, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.25935426354408264, |
|
"learning_rate": 2.407192117161683e-06, |
|
"loss": 0.0215, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.338235294117647, |
|
"grad_norm": 0.24692723155021667, |
|
"learning_rate": 2.2678008825893106e-06, |
|
"loss": 0.0185, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.343137254901961, |
|
"grad_norm": 0.22854046523571014, |
|
"learning_rate": 2.1325204601086222e-06, |
|
"loss": 0.0235, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.3480392156862746, |
|
"grad_norm": 0.31760168075561523, |
|
"learning_rate": 2.001356539885213e-06, |
|
"loss": 0.0279, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 0.19233286380767822, |
|
"learning_rate": 1.8743146389363474e-06, |
|
"loss": 0.0174, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.357843137254902, |
|
"grad_norm": 0.28652098774909973, |
|
"learning_rate": 1.7514001008988923e-06, |
|
"loss": 0.0193, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.3627450980392157, |
|
"grad_norm": 0.16845420002937317, |
|
"learning_rate": 1.6326180958045502e-06, |
|
"loss": 0.0177, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.3676470588235294, |
|
"grad_norm": 0.12303224951028824, |
|
"learning_rate": 1.517973619862445e-06, |
|
"loss": 0.0205, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.372549019607843, |
|
"grad_norm": 0.1442280411720276, |
|
"learning_rate": 1.4074714952489132e-06, |
|
"loss": 0.0215, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.377450980392157, |
|
"grad_norm": 0.18173760175704956, |
|
"learning_rate": 1.3011163699046758e-06, |
|
"loss": 0.0166, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.3823529411764706, |
|
"grad_norm": 0.2543584406375885, |
|
"learning_rate": 1.1989127173393955e-06, |
|
"loss": 0.0195, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.3872549019607843, |
|
"grad_norm": 0.14100182056427002, |
|
"learning_rate": 1.1008648364434493e-06, |
|
"loss": 0.0252, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.392156862745098, |
|
"grad_norm": 0.22381238639354706, |
|
"learning_rate": 1.0069768513071287e-06, |
|
"loss": 0.0184, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.3970588235294117, |
|
"grad_norm": 0.1522032916545868, |
|
"learning_rate": 9.172527110472007e-07, |
|
"loss": 0.0226, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.4019607843137254, |
|
"grad_norm": 0.151298388838768, |
|
"learning_rate": 8.316961896407293e-07, |
|
"loss": 0.0161, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.406862745098039, |
|
"grad_norm": 0.10205589234828949, |
|
"learning_rate": 7.503108857664476e-07, |
|
"loss": 0.0221, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.411764705882353, |
|
"grad_norm": 0.29145070910453796, |
|
"learning_rate": 6.731002226532557e-07, |
|
"loss": 0.0256, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.4166666666666665, |
|
"grad_norm": 0.11820173263549805, |
|
"learning_rate": 6.000674479363366e-07, |
|
"loss": 0.0192, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.4215686274509802, |
|
"grad_norm": 0.34024494886398315, |
|
"learning_rate": 5.312156335205098e-07, |
|
"loss": 0.0184, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.426470588235294, |
|
"grad_norm": 0.2437254935503006, |
|
"learning_rate": 4.665476754510234e-07, |
|
"loss": 0.0172, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.431372549019608, |
|
"grad_norm": 0.22836612164974213, |
|
"learning_rate": 4.0606629379175143e-07, |
|
"loss": 0.0236, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.436274509803922, |
|
"grad_norm": 0.30892133712768555, |
|
"learning_rate": 3.497740325107746e-07, |
|
"loss": 0.021, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.4411764705882355, |
|
"grad_norm": 0.1466980129480362, |
|
"learning_rate": 2.9767325937338775e-07, |
|
"loss": 0.0158, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.446078431372549, |
|
"grad_norm": 0.17119024693965912, |
|
"learning_rate": 2.497661658424688e-07, |
|
"loss": 0.0211, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.450980392156863, |
|
"grad_norm": 0.24735775589942932, |
|
"learning_rate": 2.0605476698636328e-07, |
|
"loss": 0.0198, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.4558823529411766, |
|
"grad_norm": 0.22227314114570618, |
|
"learning_rate": 1.6654090139408551e-07, |
|
"loss": 0.0222, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.4607843137254903, |
|
"grad_norm": 0.2925474941730499, |
|
"learning_rate": 1.3122623109795839e-07, |
|
"loss": 0.0212, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.465686274509804, |
|
"grad_norm": 0.11938751488924026, |
|
"learning_rate": 1.0011224150379139e-07, |
|
"loss": 0.0206, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.4705882352941178, |
|
"grad_norm": 0.18019632995128632, |
|
"learning_rate": 7.320024132829729e-08, |
|
"loss": 0.0175, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.4754901960784315, |
|
"grad_norm": 0.1969996839761734, |
|
"learning_rate": 5.049136254413611e-08, |
|
"loss": 0.0196, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.480392156862745, |
|
"grad_norm": 0.26969224214553833, |
|
"learning_rate": 3.1986560332242234e-08, |
|
"loss": 0.0159, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.485294117647059, |
|
"grad_norm": 0.24808961153030396, |
|
"learning_rate": 1.768661304166752e-08, |
|
"loss": 0.0235, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.4901960784313726, |
|
"grad_norm": 0.08070988208055496, |
|
"learning_rate": 7.592122156829806e-09, |
|
"loss": 0.0196, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.4950980392156863, |
|
"grad_norm": 0.3003765642642975, |
|
"learning_rate": 1.7035122722663943e-09, |
|
"loss": 0.0214, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.4995098039215686, |
|
"step": 5099, |
|
"total_flos": 1.0010895665363712e+17, |
|
"train_loss": 0.049077389520344766, |
|
"train_runtime": 1743.2855, |
|
"train_samples_per_second": 46.799, |
|
"train_steps_per_second": 2.925 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5099, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0010895665363712e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|