diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,17016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9998763851485586, + "eval_steps": 500, + "global_step": 28312, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.7058823529411767e-07, + "loss": 4.9244, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 9.411764705882353e-07, + "loss": 4.5069, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.4117647058823531e-06, + "loss": 3.6782, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.8823529411764707e-06, + "loss": 2.9642, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 2.3529411764705885e-06, + "loss": 2.5767, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 2.8235294117647062e-06, + "loss": 2.401, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 3.2941176470588236e-06, + "loss": 2.3275, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 3.7647058823529414e-06, + "loss": 2.2191, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.235294117647059e-06, + "loss": 2.0439, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.705882352941177e-06, + "loss": 1.7901, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 5.176470588235295e-06, + "loss": 1.4897, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 5.6470588235294125e-06, + "loss": 1.251, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 6.11764705882353e-06, + "loss": 1.1331, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 6.588235294117647e-06, + "loss": 1.1103, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 7.058823529411766e-06, + "loss": 1.1048, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 7.529411764705883e-06, + "loss": 1.0945, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.0806, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 8.470588235294118e-06, + "loss": 1.0726, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 8.941176470588237e-06, + "loss": 1.0658, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 9.411764705882354e-06, + "loss": 1.0578, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 9.882352941176472e-06, + "loss": 1.0535, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 1.035294117647059e-05, + "loss": 1.0402, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 1.0823529411764706e-05, + "loss": 1.047, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 1.1294117647058825e-05, + "loss": 1.0332, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 1.1764705882352942e-05, + "loss": 1.0275, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 1.223529411764706e-05, + "loss": 1.0338, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 1.2705882352941177e-05, + "loss": 1.0286, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 1.3176470588235294e-05, + "loss": 1.0248, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 1.3647058823529413e-05, + "loss": 1.0303, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 1.4117647058823532e-05, + "loss": 1.0269, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 1.4588235294117647e-05, + "loss": 1.019, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 1.5058823529411765e-05, + "loss": 1.0283, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 1.5529411764705882e-05, + "loss": 1.0178, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.0279, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 1.647058823529412e-05, + "loss": 1.0125, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 1.6941176470588237e-05, + "loss": 1.0162, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 1.7411764705882353e-05, + "loss": 1.0113, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 1.7882352941176474e-05, + "loss": 1.0219, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 1.8352941176470587e-05, + "loss": 1.0163, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 1.8823529411764708e-05, + "loss": 1.0142, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 1.9294117647058825e-05, + "loss": 1.0063, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 1.9764705882352945e-05, + "loss": 0.9995, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 2.023529411764706e-05, + "loss": 1.0093, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 2.070588235294118e-05, + "loss": 1.0074, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 2.1176470588235296e-05, + "loss": 1.003, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 2.1647058823529413e-05, + "loss": 1.0029, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 2.2117647058823533e-05, + "loss": 1.0042, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 2.258823529411765e-05, + "loss": 1.0121, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 2.3058823529411763e-05, + "loss": 1.0019, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 2.3529411764705884e-05, + "loss": 1.0064, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 2.4e-05, + "loss": 1.0074, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 2.447058823529412e-05, + "loss": 1.0118, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 2.4941176470588238e-05, + "loss": 0.9997, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 2.5411764705882355e-05, + "loss": 1.0053, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 2.5882352941176475e-05, + "loss": 0.9937, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 2.635294117647059e-05, + "loss": 1.0067, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 2.6823529411764706e-05, + "loss": 1.0023, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 2.7294117647058826e-05, + "loss": 0.9994, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 2.7764705882352943e-05, + "loss": 0.9861, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 2.8235294117647063e-05, + "loss": 0.999, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 2.870588235294118e-05, + "loss": 0.9987, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 2.9176470588235294e-05, + "loss": 0.9934, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 2.9647058823529414e-05, + "loss": 0.9884, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 3.011764705882353e-05, + "loss": 0.9996, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 3.0588235294117644e-05, + "loss": 0.9902, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 3.1058823529411765e-05, + "loss": 0.9965, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 3.1529411764705885e-05, + "loss": 0.9936, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.9821, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 3.247058823529412e-05, + "loss": 0.9889, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 3.294117647058824e-05, + "loss": 0.9944, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 3.341176470588236e-05, + "loss": 0.9821, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 3.388235294117647e-05, + "loss": 0.9928, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 3.4352941176470587e-05, + "loss": 0.991, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 3.482352941176471e-05, + "loss": 0.9895, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 3.529411764705883e-05, + "loss": 0.9899, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 3.576470588235295e-05, + "loss": 0.99, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 3.623529411764706e-05, + "loss": 0.9951, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 3.6705882352941175e-05, + "loss": 0.9871, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 3.7176470588235295e-05, + "loss": 0.9781, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 3.7647058823529415e-05, + "loss": 0.9838, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 3.811764705882353e-05, + "loss": 0.9724, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 3.858823529411765e-05, + "loss": 0.9764, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 3.905882352941177e-05, + "loss": 0.981, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 3.952941176470589e-05, + "loss": 0.9841, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4e-05, + "loss": 0.9871, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 3.999998691314254e-05, + "loss": 0.9905, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 3.999994765258726e-05, + "loss": 0.9767, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 3.999988221838556e-05, + "loss": 0.9712, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 3.9999790610623065e-05, + "loss": 0.9699, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 3.9999672829419655e-05, + "loss": 0.9781, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 3.999952887492948e-05, + "loss": 0.9844, + "step": 910 + }, + { + "epoch": 0.06, + "learning_rate": 3.9999358747340924e-05, + "loss": 0.9808, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 3.999916244687663e-05, + "loss": 0.979, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 3.99989399737935e-05, + "loss": 0.9738, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 3.9998691328382675e-05, + "loss": 0.9724, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 3.999841651096955e-05, + "loss": 0.9729, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 3.999811552191379e-05, + "loss": 0.9831, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 3.999778836160929e-05, + "loss": 0.9809, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 3.999743503048419e-05, + "loss": 0.9688, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 3.999705552900088e-05, + "loss": 0.9832, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 3.9996649857656045e-05, + "loss": 0.97, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 3.999621801698055e-05, + "loss": 0.9693, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 3.999576000753955e-05, + "loss": 0.9661, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 3.999527582993243e-05, + "loss": 0.9646, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 3.999476548479283e-05, + "loss": 0.9751, + "step": 1050 + }, + { + "epoch": 0.07, + "learning_rate": 3.9994228972788636e-05, + "loss": 0.9712, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 3.999366629462197e-05, + "loss": 0.9702, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 3.9993077451029193e-05, + "loss": 0.9658, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 3.9992462442780927e-05, + "loss": 0.9715, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 3.999182127068202e-05, + "loss": 0.9743, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 3.999115393557157e-05, + "loss": 0.9724, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 3.99904604383229e-05, + "loss": 0.9667, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 3.998974077984359e-05, + "loss": 0.9678, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 3.9988994961075434e-05, + "loss": 0.967, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 3.998822298299448e-05, + "loss": 0.9712, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 3.998742484661101e-05, + "loss": 0.9719, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 3.998660055296953e-05, + "loss": 0.96, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 3.998575010314878e-05, + "loss": 0.9663, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 3.9984873498261734e-05, + "loss": 0.9636, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 3.998397073945559e-05, + "loss": 0.963, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 3.998304182791178e-05, + "loss": 0.9634, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 3.998208676484596e-05, + "loss": 0.9656, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 3.998110555150799e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 3.9980098189181985e-05, + "loss": 0.9524, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 3.997906467918627e-05, + "loss": 0.9728, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 3.997800502287336e-05, + "loss": 0.9667, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 3.997691922163004e-05, + "loss": 0.9617, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 3.997580727687727e-05, + "loss": 0.9552, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 3.997466919007024e-05, + "loss": 0.9621, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 3.997350496269834e-05, + "loss": 0.958, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 3.997231459628519e-05, + "loss": 0.971, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 3.9971098092388596e-05, + "loss": 0.9472, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 3.9969855452600585e-05, + "loss": 0.964, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 3.996858667854738e-05, + "loss": 0.9638, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 3.99672917718894e-05, + "loss": 0.9605, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 3.996597073432129e-05, + "loss": 0.9614, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 3.996462356757185e-05, + "loss": 0.9517, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 3.996325027340411e-05, + "loss": 0.9506, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 3.9961850853615285e-05, + "loss": 0.9477, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 3.996042531003677e-05, + "loss": 0.9546, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 3.995897364453415e-05, + "loss": 0.944, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 3.9957495859007206e-05, + "loss": 0.9562, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 3.9955991955389885e-05, + "loss": 0.955, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 3.995446193565033e-05, + "loss": 0.9622, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 3.9952905801790866e-05, + "loss": 0.9558, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 3.995132355584797e-05, + "loss": 0.9489, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 3.994971519989231e-05, + "loss": 0.9579, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 3.9948080736028714e-05, + "loss": 0.9577, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 3.994642016639619e-05, + "loss": 0.9568, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 3.9944733493167885e-05, + "loss": 0.9444, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 3.994302071855114e-05, + "loss": 0.9514, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 3.994128184478745e-05, + "loss": 0.9394, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 3.993951687415242e-05, + "loss": 0.9464, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 3.993772580895587e-05, + "loss": 0.9436, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 3.993590865154173e-05, + "loss": 0.9454, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 3.9934065404288086e-05, + "loss": 0.95, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 3.9932196069607175e-05, + "loss": 0.9569, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 3.9930300649945375e-05, + "loss": 0.9461, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 3.992837914778318e-05, + "loss": 0.9453, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 3.992643156563524e-05, + "loss": 0.9464, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 3.992445790605032e-05, + "loss": 0.942, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 3.992245817161134e-05, + "loss": 0.9395, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 3.992043236493531e-05, + "loss": 0.9497, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 3.991838048867338e-05, + "loss": 0.9503, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 3.9916302545510795e-05, + "loss": 0.9431, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 3.991419853816694e-05, + "loss": 0.9549, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 3.9912068469395315e-05, + "loss": 0.9464, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 3.990991234198349e-05, + "loss": 0.94, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 3.9907730158753164e-05, + "loss": 0.9347, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 3.9905521922560134e-05, + "loss": 0.9414, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 3.9903287636294286e-05, + "loss": 0.9365, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 3.9901027302879585e-05, + "loss": 0.9406, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 3.989874092527411e-05, + "loss": 0.9446, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 3.989642850647002e-05, + "loss": 0.9456, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 3.989409004949352e-05, + "loss": 0.9483, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 3.989172555740494e-05, + "loss": 0.9379, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 3.988933503329864e-05, + "loss": 0.943, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 3.988691848030307e-05, + "loss": 0.938, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 3.9884475901580736e-05, + "loss": 0.9429, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 3.988200730032821e-05, + "loss": 0.9352, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 3.987951267977612e-05, + "loss": 0.9414, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 3.9876992043189135e-05, + "loss": 0.9367, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 3.987444539386597e-05, + "loss": 0.9427, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 3.98718727351394e-05, + "loss": 0.9416, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 3.986927407037622e-05, + "loss": 0.9362, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 3.986664940297726e-05, + "loss": 0.9452, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 3.98639987363774e-05, + "loss": 0.9435, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 3.986132207404551e-05, + "loss": 0.9354, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 3.9858619419484526e-05, + "loss": 0.9401, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 3.985589077623135e-05, + "loss": 0.9341, + "step": 1900 + }, + { + "epoch": 0.13, + "learning_rate": 3.9853136147856924e-05, + "loss": 0.9342, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 3.98503555379662e-05, + "loss": 0.9357, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 3.984754895019812e-05, + "loss": 0.9393, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 3.984471638822562e-05, + "loss": 0.9401, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 3.984185785575562e-05, + "loss": 0.9344, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 3.983897335652907e-05, + "loss": 0.9319, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 3.9836062894320864e-05, + "loss": 0.9449, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 3.983312647293987e-05, + "loss": 0.9364, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 3.983016409622895e-05, + "loss": 0.9347, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 3.982717576806492e-05, + "loss": 0.9319, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 3.9824161492358565e-05, + "loss": 0.9361, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 3.982112127305463e-05, + "loss": 0.9315, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 3.9818055114131795e-05, + "loss": 0.9288, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 3.981496301960271e-05, + "loss": 0.9358, + "step": 2040 + }, + { + "epoch": 0.14, + "learning_rate": 3.981184499351395e-05, + "loss": 0.9358, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 3.9808701039946026e-05, + "loss": 0.9335, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 3.98055311630134e-05, + "loss": 0.941, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 3.980233536686442e-05, + "loss": 0.9366, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 3.9799113655681407e-05, + "loss": 0.9355, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 3.979586603368055e-05, + "loss": 0.9345, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 3.979259250511197e-05, + "loss": 0.9362, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 3.978929307425969e-05, + "loss": 0.9266, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 3.978596774544163e-05, + "loss": 0.9343, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 3.978261652300959e-05, + "loss": 0.9275, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 3.977923941134928e-05, + "loss": 0.9312, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 3.9775836414880266e-05, + "loss": 0.9326, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 3.9772407538056014e-05, + "loss": 0.9425, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 3.976895278536383e-05, + "loss": 0.935, + "step": 2180 + }, + { + "epoch": 0.15, + "learning_rate": 3.976547216132492e-05, + "loss": 0.9308, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 3.976196567049431e-05, + "loss": 0.9319, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 3.97584333174609e-05, + "loss": 0.933, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 3.975487510684743e-05, + "loss": 0.9285, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 3.975129104331048e-05, + "loss": 0.9304, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 3.974768113154046e-05, + "loss": 0.9349, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 3.974404537626162e-05, + "loss": 0.9303, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 3.9740383782232e-05, + "loss": 0.9273, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 3.97366963542435e-05, + "loss": 0.9319, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 3.9732983097121795e-05, + "loss": 0.9261, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 3.972924401572637e-05, + "loss": 0.9354, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 3.972547911495051e-05, + "loss": 0.9292, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 3.972168839972128e-05, + "loss": 0.9302, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 3.9717871874999545e-05, + "loss": 0.9232, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 3.971402954577993e-05, + "loss": 0.9251, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 3.971016141709083e-05, + "loss": 0.9447, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 3.970626749399443e-05, + "loss": 0.9549, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 3.9702347781586645e-05, + "loss": 0.9613, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 3.9698402284997135e-05, + "loss": 1.0125, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 3.9694431009389315e-05, + "loss": 0.9856, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 3.969043395996035e-05, + "loss": 0.9733, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 3.968641114194111e-05, + "loss": 0.9635, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 3.9682362560596214e-05, + "loss": 0.9609, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 3.967828822122397e-05, + "loss": 0.9423, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 3.967418812915641e-05, + "loss": 0.9416, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 3.967006228975927e-05, + "loss": 0.9448, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 3.966591070843197e-05, + "loss": 0.942, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 3.9661733390607635e-05, + "loss": 0.9356, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 3.965753034175305e-05, + "loss": 0.9379, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 3.96533015673687e-05, + "loss": 0.9402, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 3.96490470729887e-05, + "loss": 0.937, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 3.964476686418087e-05, + "loss": 0.9311, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 3.9640460946546645e-05, + "loss": 0.9384, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 3.9636129325721123e-05, + "loss": 0.9325, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 3.963177200737303e-05, + "loss": 0.9311, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 3.9627388997204726e-05, + "loss": 0.9279, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 3.9622980300952204e-05, + "loss": 0.9336, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 3.961854592438504e-05, + "loss": 0.9325, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 3.961408587330647e-05, + "loss": 0.9294, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 3.960960015355327e-05, + "loss": 0.9353, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 3.960508877099586e-05, + "loss": 0.9248, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 3.960055173153821e-05, + "loss": 0.9253, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 3.959598904111788e-05, + "loss": 0.9241, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 3.9591400705706e-05, + "loss": 0.9208, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 3.9586786731307254e-05, + "loss": 0.9267, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 3.9582147123959894e-05, + "loss": 0.9344, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 3.9577481889735707e-05, + "loss": 0.9254, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 3.957279103474001e-05, + "loss": 0.929, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 3.9568074565111665e-05, + "loss": 0.9263, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 3.956333248702304e-05, + "loss": 0.9292, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 3.955856480668004e-05, + "loss": 0.9239, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 3.9553771530322044e-05, + "loss": 0.93, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 3.954895266422195e-05, + "loss": 0.9225, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 3.9544108214686154e-05, + "loss": 0.9265, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 3.9539238188054494e-05, + "loss": 0.9242, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 3.953434259070032e-05, + "loss": 0.9193, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 3.9529421429030426e-05, + "loss": 0.9261, + "step": 2750 + }, + { + "epoch": 0.19, + "learning_rate": 3.952447470948506e-05, + "loss": 0.925, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 3.9519502438537944e-05, + "loss": 0.9155, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 3.95145046226962e-05, + "loss": 0.9255, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 3.95094812685004e-05, + "loss": 0.9201, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 3.950443238252453e-05, + "loss": 0.9211, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 3.9499357971376014e-05, + "loss": 0.9248, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 3.949425804169565e-05, + "loss": 0.9207, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 3.948913260015764e-05, + "loss": 0.9201, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 3.9483981653469586e-05, + "loss": 0.9185, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 3.947880520837245e-05, + "loss": 0.9272, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 3.9473603271640575e-05, + "loss": 0.9164, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 3.9468375850081664e-05, + "loss": 0.9204, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 3.946312295053676e-05, + "loss": 0.9218, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 3.9457844579880274e-05, + "loss": 0.9187, + "step": 2890 + }, + { + "epoch": 0.2, + "learning_rate": 3.9452540745019916e-05, + "loss": 0.9192, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 3.9447211452896754e-05, + "loss": 0.9207, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 3.944185671048514e-05, + "loss": 0.913, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 3.943647652479277e-05, + "loss": 0.9088, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 3.9431070902860605e-05, + "loss": 0.908, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 3.94256398517629e-05, + "loss": 0.9183, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 3.942018337860721e-05, + "loss": 0.9119, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 3.941470149053433e-05, + "loss": 0.9215, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 3.9409194194718325e-05, + "loss": 0.9189, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 3.9403661498366536e-05, + "loss": 0.9221, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 3.93981034087195e-05, + "loss": 0.9125, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9392519933051025e-05, + "loss": 0.9196, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 3.938691107866812e-05, + "loss": 0.9212, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 3.938127685291101e-05, + "loss": 0.918, + "step": 3030 + }, + { + "epoch": 0.21, + "learning_rate": 3.9375617263153136e-05, + "loss": 0.9136, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 3.9369932316801115e-05, + "loss": 0.9161, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 3.936422202129475e-05, + "loss": 0.9088, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 3.935848638410704e-05, + "loss": 0.9151, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 3.9352725412744114e-05, + "loss": 0.9188, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 3.9346939114745284e-05, + "loss": 0.9169, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 3.934112749768299e-05, + "loss": 0.9156, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 3.9335290569162815e-05, + "loss": 0.9141, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 3.9329428336823464e-05, + "loss": 0.9148, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 3.9323540808336755e-05, + "loss": 0.9114, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 3.931762799140762e-05, + "loss": 0.9081, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 3.931168989377407e-05, + "loss": 0.9148, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 3.930572652320721e-05, + "loss": 0.9127, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 3.929973788751122e-05, + "loss": 0.9137, + "step": 3170 + }, + { + "epoch": 0.22, + "learning_rate": 3.929372399452334e-05, + "loss": 0.921, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 3.928768485211387e-05, + "loss": 0.907, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 3.928162046818615e-05, + "loss": 0.9095, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 3.927553085067654e-05, + "loss": 0.9105, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 3.926941600755445e-05, + "loss": 0.9149, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 3.926327594682229e-05, + "loss": 0.913, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 3.925711067651546e-05, + "loss": 0.9057, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 3.9250920204702366e-05, + "loss": 0.905, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 3.924470453948439e-05, + "loss": 0.9102, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 3.923846368899588e-05, + "loss": 0.9165, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 3.923219766140415e-05, + "loss": 0.9106, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 3.922590646490946e-05, + "loss": 0.9157, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 3.9219590107745016e-05, + "loss": 0.9071, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 3.921324859817695e-05, + "loss": 0.9113, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 3.920688194450429e-05, + "loss": 0.91, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 3.920049015505898e-05, + "loss": 0.9212, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 3.919407323820588e-05, + "loss": 0.9073, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 3.918763120234272e-05, + "loss": 0.9135, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 3.918116405590009e-05, + "loss": 0.9148, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 3.9174671807341454e-05, + "loss": 0.9134, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 3.916815446516312e-05, + "loss": 0.9051, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 3.9161612037894256e-05, + "loss": 0.9033, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 3.915504453409683e-05, + "loss": 0.9111, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 3.914845196236564e-05, + "loss": 0.9113, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 3.91418343313283e-05, + "loss": 0.9065, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 3.913519164964521e-05, + "loss": 0.9063, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 3.912852392600954e-05, + "loss": 0.9047, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 3.912183116914725e-05, + "loss": 0.9116, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 3.911511338781707e-05, + "loss": 0.9057, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 3.9108370590810445e-05, + "loss": 0.9045, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 3.910160278695158e-05, + "loss": 0.9155, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 3.909480998509742e-05, + "loss": 0.9151, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 3.908799219413759e-05, + "loss": 0.9137, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 3.9081149422994445e-05, + "loss": 0.907, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 3.907428168062302e-05, + "loss": 0.9205, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 3.906738897601103e-05, + "loss": 0.9105, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 3.906047131817886e-05, + "loss": 0.9154, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 3.905352871617955e-05, + "loss": 0.904, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 3.904656117909878e-05, + "loss": 0.9005, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 3.903956871605488e-05, + "loss": 0.9097, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 3.903255133619877e-05, + "loss": 0.9048, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 3.902550904871401e-05, + "loss": 0.9028, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 3.9018441862816726e-05, + "loss": 0.9088, + "step": 3600 + }, + { + "epoch": 0.25, + "learning_rate": 3.901134978775566e-05, + "loss": 0.9128, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 3.90042328328121e-05, + "loss": 0.94, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 3.89970910072999e-05, + "loss": 0.9483, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 3.898992432056547e-05, + "loss": 0.9345, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 3.898273278198776e-05, + "loss": 0.9188, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 3.897551640097822e-05, + "loss": 0.9143, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 3.8968275186980825e-05, + "loss": 0.9203, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 3.8961009149472055e-05, + "loss": 0.9199, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 3.895371829796087e-05, + "loss": 0.9076, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 3.89464026419887e-05, + "loss": 0.9149, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 3.8939062191129445e-05, + "loss": 0.9114, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 3.893169695498944e-05, + "loss": 0.902, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 3.892430694320747e-05, + "loss": 0.9026, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 3.891689216545474e-05, + "loss": 0.9055, + "step": 3740 + }, + { + "epoch": 0.26, + "learning_rate": 3.890945263143486e-05, + "loss": 0.905, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 3.890198835088384e-05, + "loss": 0.9146, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 3.889449933357009e-05, + "loss": 0.908, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 3.888698558929436e-05, + "loss": 0.9011, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 3.887944712788979e-05, + "loss": 0.9, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 3.887188395922187e-05, + "loss": 0.9093, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 3.886429609318839e-05, + "loss": 0.9, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 3.88566835397195e-05, + "loss": 0.9048, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 3.884904630877763e-05, + "loss": 0.906, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 3.8841384410357505e-05, + "loss": 0.9115, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 3.8833697854486165e-05, + "loss": 0.9083, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 3.882598665122288e-05, + "loss": 0.9096, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 3.8818250810659195e-05, + "loss": 0.9031, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 3.881049034291891e-05, + "loss": 0.8998, + "step": 3880 + }, + { + "epoch": 0.27, + "learning_rate": 3.880270525815801e-05, + "loss": 0.8949, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 3.879489556656474e-05, + "loss": 0.9016, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 3.8787061278359536e-05, + "loss": 0.9036, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 3.877920240379502e-05, + "loss": 0.8993, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 3.877131895315597e-05, + "loss": 0.9081, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 3.8763410936759374e-05, + "loss": 0.9063, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 3.8755478364954315e-05, + "loss": 0.9037, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 3.874752124812205e-05, + "loss": 0.8991, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 3.8739539596675945e-05, + "loss": 0.907, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 3.8731533421061464e-05, + "loss": 0.9055, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 3.872350273175619e-05, + "loss": 0.8946, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 3.8715447539269754e-05, + "loss": 0.8988, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 3.870736785414389e-05, + "loss": 0.8999, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 3.869926368695235e-05, + "loss": 0.9024, + "step": 4020 + }, + { + "epoch": 0.28, + "learning_rate": 3.869113504830095e-05, + "loss": 0.8982, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 3.868298194882752e-05, + "loss": 0.8997, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 3.867480439920191e-05, + "loss": 0.8995, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 3.866660241012596e-05, + "loss": 0.901, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 3.8658375992333495e-05, + "loss": 0.8933, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 3.8650125156590316e-05, + "loss": 0.8971, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 3.864184991369417e-05, + "loss": 0.9019, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 3.8633550274474744e-05, + "loss": 0.8938, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 3.862522624979367e-05, + "loss": 0.8998, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 3.861687785054447e-05, + "loss": 0.9028, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 3.8608505087652584e-05, + "loss": 0.8935, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 3.860010797207532e-05, + "loss": 0.8973, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 3.859168651480186e-05, + "loss": 0.8981, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 3.858324072685326e-05, + "loss": 0.899, + "step": 4160 + }, + { + "epoch": 0.29, + "learning_rate": 3.857477061928239e-05, + "loss": 0.9, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 3.856627620317396e-05, + "loss": 0.9012, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 3.855775748964449e-05, + "loss": 0.8935, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 3.8549214489842316e-05, + "loss": 0.8997, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 3.8540647214947516e-05, + "loss": 0.9008, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 3.853205567617197e-05, + "loss": 0.9024, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 3.852343988475931e-05, + "loss": 0.9004, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 3.851479985198489e-05, + "loss": 0.8942, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 3.85061355891558e-05, + "loss": 0.8981, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 3.849744710761084e-05, + "loss": 0.9069, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 3.8488734418720494e-05, + "loss": 0.8969, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 3.847999753388694e-05, + "loss": 0.8929, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 3.847123646454402e-05, + "loss": 0.9022, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 3.846245122215722e-05, + "loss": 0.8959, + "step": 4300 + }, + { + "epoch": 0.3, + "learning_rate": 3.845364181822365e-05, + "loss": 0.8916, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 3.8444808264272054e-05, + "loss": 0.8907, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 3.843595057186279e-05, + "loss": 0.8874, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 3.842706875258777e-05, + "loss": 0.8962, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 3.841816281807054e-05, + "loss": 0.8932, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 3.8409232779966145e-05, + "loss": 0.8923, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 3.8400278649961197e-05, + "loss": 0.8946, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 3.839130043977385e-05, + "loss": 0.9001, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 3.8382298161153745e-05, + "loss": 0.8927, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 3.837327182588205e-05, + "loss": 0.8923, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 3.8364221445771395e-05, + "loss": 0.9049, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 3.835514703266589e-05, + "loss": 0.8904, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 3.8346048598441076e-05, + "loss": 0.8916, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 3.833692615500396e-05, + "loss": 0.8882, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 3.832777971429295e-05, + "loss": 0.8914, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 3.831860928827785e-05, + "loss": 0.888, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 3.830941488895988e-05, + "loss": 0.8958, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 3.8300196528371616e-05, + "loss": 0.8957, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 3.829095421857699e-05, + "loss": 0.8938, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 3.8281687971671285e-05, + "loss": 0.8947, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 3.827239779978111e-05, + "loss": 0.894, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 3.8263083715064363e-05, + "loss": 0.8921, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 3.8253745729710284e-05, + "loss": 0.8947, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 3.824438385593934e-05, + "loss": 0.8978, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 3.8234998106003283e-05, + "loss": 0.8894, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 3.8225588492185105e-05, + "loss": 0.896, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 3.821615502679906e-05, + "loss": 0.8856, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 3.8206697722190563e-05, + "loss": 0.8892, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 3.819721659073626e-05, + "loss": 0.8863, + "step": 4590 + }, + { + "epoch": 0.32, + "learning_rate": 3.818771164484398e-05, + "loss": 0.8932, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 3.81781828969527e-05, + "loss": 0.8907, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 3.816863035953257e-05, + "loss": 0.8863, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 3.8159054045084846e-05, + "loss": 0.8903, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 3.814945396614192e-05, + "loss": 0.8884, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 3.813983013526728e-05, + "loss": 0.9007, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 3.813018256505549e-05, + "loss": 0.885, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 3.8120511268132195e-05, + "loss": 0.8925, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 3.811081625715408e-05, + "loss": 0.8887, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 3.8101097544808866e-05, + "loss": 0.8858, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 3.80913551438153e-05, + "loss": 0.8851, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 3.808158906692311e-05, + "loss": 0.8947, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 3.8071799326913045e-05, + "loss": 0.8966, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 3.8061985936596776e-05, + "loss": 0.8866, + "step": 4730 + }, + { + "epoch": 0.33, + "learning_rate": 3.805214890881695e-05, + "loss": 0.8919, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 3.804228825644716e-05, + "loss": 0.8876, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 3.8032403992391885e-05, + "loss": 0.8922, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 3.802249612958653e-05, + "loss": 0.8934, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 3.801256468099736e-05, + "loss": 0.8935, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 3.800260965962154e-05, + "loss": 0.8863, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 3.799263107848705e-05, + "loss": 0.8887, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 3.7982628950652725e-05, + "loss": 0.8858, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 3.79726032892082e-05, + "loss": 0.8957, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 3.7962554107273926e-05, + "loss": 0.885, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 3.795248141800112e-05, + "loss": 0.8843, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 3.794238523457175e-05, + "loss": 0.8899, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 3.793226557019857e-05, + "loss": 0.9039, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 3.7922122438125035e-05, + "loss": 0.9641, + "step": 4870 + }, + { + "epoch": 0.34, + "learning_rate": 3.791195585162531e-05, + "loss": 0.9534, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 3.7901765824004264e-05, + "loss": 0.9882, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 3.7891552368597456e-05, + "loss": 1.0478, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 3.788131549877107e-05, + "loss": 0.9491, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 3.787105522792195e-05, + "loss": 0.9363, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 3.7860771569477587e-05, + "loss": 0.9231, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 3.7850464536896036e-05, + "loss": 0.9272, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 3.784013414366598e-05, + "loss": 0.9205, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 3.7829780403306654e-05, + "loss": 0.9177, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 3.781940332936784e-05, + "loss": 0.9212, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 3.780900293542988e-05, + "loss": 0.9116, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 3.779857923510361e-05, + "loss": 0.9004, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 3.778813224203038e-05, + "loss": 0.906, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 3.7777661969882034e-05, + "loss": 0.9011, + "step": 5010 + }, + { + "epoch": 0.35, + "learning_rate": 3.776716843236086e-05, + "loss": 0.9084, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 3.775665164319959e-05, + "loss": 0.9045, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 3.774611161616141e-05, + "loss": 0.9059, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 3.77355483650399e-05, + "loss": 0.9022, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 3.772496190365903e-05, + "loss": 0.9047, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 3.7714352245873154e-05, + "loss": 0.9057, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 3.7703719405566986e-05, + "loss": 0.8948, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 3.7693063396655574e-05, + "loss": 0.8981, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 3.768238423308427e-05, + "loss": 0.8995, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 3.767168192882876e-05, + "loss": 0.8987, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 3.766095649789498e-05, + "loss": 0.899, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 3.765020795431917e-05, + "loss": 0.8897, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 3.763943631216778e-05, + "loss": 0.8925, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 3.7628641585537505e-05, + "loss": 0.8906, + "step": 5150 + }, + { + "epoch": 0.36, + "learning_rate": 3.761782378855525e-05, + "loss": 0.892, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 3.760698293537813e-05, + "loss": 0.8906, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 3.759611904019338e-05, + "loss": 0.8962, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 3.758523211721846e-05, + "loss": 0.8891, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 3.757432218070091e-05, + "loss": 0.8903, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 3.7563389244918415e-05, + "loss": 0.8921, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 3.755243332417875e-05, + "loss": 0.8881, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 3.7541454432819776e-05, + "loss": 0.8935, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 3.753045258520941e-05, + "loss": 0.8919, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 3.751942779574561e-05, + "loss": 0.8902, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 3.750838007885636e-05, + "loss": 0.8911, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 3.749730944899966e-05, + "loss": 0.8917, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 3.7486215920663476e-05, + "loss": 0.8857, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 3.747509950836575e-05, + "loss": 0.8885, + "step": 5290 + }, + { + "epoch": 0.37, + "learning_rate": 3.746396022665438e-05, + "loss": 0.8928, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 3.745279809010718e-05, + "loss": 0.893, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 3.7441613113331876e-05, + "loss": 0.8953, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 3.74304053109661e-05, + "loss": 0.8903, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 3.741917469767731e-05, + "loss": 0.8934, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 3.74079212881629e-05, + "loss": 0.8851, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 3.7396645097150014e-05, + "loss": 0.8973, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 3.7385346139395645e-05, + "loss": 0.8952, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 3.737402442968658e-05, + "loss": 0.8808, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 3.736267998283939e-05, + "loss": 0.886, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 3.735131281370037e-05, + "loss": 0.8864, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 3.7339922937145596e-05, + "loss": 0.8817, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 3.732851036808082e-05, + "loss": 0.8875, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 3.731707512144151e-05, + "loss": 0.8716, + "step": 5430 + }, + { + "epoch": 0.38, + "learning_rate": 3.7305617212192816e-05, + "loss": 0.8835, + "step": 5440 + }, + { + "epoch": 0.38, + "learning_rate": 3.729413665532955e-05, + "loss": 0.8775, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 3.728263346587612e-05, + "loss": 0.8899, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 3.727110765888663e-05, + "loss": 0.889, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 3.7259559249444704e-05, + "loss": 0.8818, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 3.724798825266359e-05, + "loss": 0.884, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 3.72363946836861e-05, + "loss": 0.8796, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 3.722477855768456e-05, + "loss": 0.893, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 3.721313988986083e-05, + "loss": 0.8833, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 3.720147869544628e-05, + "loss": 0.8789, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 3.718979498970172e-05, + "loss": 0.8888, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 3.7178088787917484e-05, + "loss": 0.8824, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 3.7166360105413285e-05, + "loss": 0.8788, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 3.71546089575383e-05, + "loss": 0.8857, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 3.714283535967108e-05, + "loss": 0.8796, + "step": 5580 + }, + { + "epoch": 0.39, + "learning_rate": 3.713103932721957e-05, + "loss": 0.8757, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 3.7119220875621065e-05, + "loss": 0.8838, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 3.71073800203422e-05, + "loss": 0.8872, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 3.709551677687895e-05, + "loss": 0.8847, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 3.708363116075656e-05, + "loss": 0.8829, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 3.707172318752956e-05, + "loss": 0.8821, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 3.705979287278175e-05, + "loss": 0.8831, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 3.704784023212618e-05, + "loss": 0.8776, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 3.703586528120507e-05, + "loss": 0.884, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 3.70238680356899e-05, + "loss": 0.8811, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 3.7011848511281266e-05, + "loss": 0.8814, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 3.699980672370896e-05, + "loss": 0.8762, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 3.69877426887319e-05, + "loss": 0.8859, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 3.697565642213812e-05, + "loss": 0.8864, + "step": 5720 + }, + { + "epoch": 0.4, + "learning_rate": 3.6963547939744737e-05, + "loss": 0.8829, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 3.695141725739795e-05, + "loss": 0.8848, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 3.6939264390973016e-05, + "loss": 0.8864, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 3.692708935637421e-05, + "loss": 0.8772, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 3.691489216953483e-05, + "loss": 0.8742, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 3.690267284641716e-05, + "loss": 0.881, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 3.689043140301246e-05, + "loss": 0.8787, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 3.687816785534092e-05, + "loss": 0.8845, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 3.686588221945168e-05, + "loss": 0.877, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 3.685357451142278e-05, + "loss": 0.8824, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 3.6841244747361126e-05, + "loss": 0.8767, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 3.682889294340252e-05, + "loss": 0.8736, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 3.6816519115711586e-05, + "loss": 0.8808, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 3.680412328048178e-05, + "loss": 0.8791, + "step": 5860 + }, + { + "epoch": 0.41, + "learning_rate": 3.679170545393534e-05, + "loss": 0.8793, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 3.6779265652323315e-05, + "loss": 0.8797, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 3.67668038919255e-05, + "loss": 0.8802, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 3.6754320189050404e-05, + "loss": 0.8805, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 3.674181456003528e-05, + "loss": 0.88, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 3.672928702124607e-05, + "loss": 0.8706, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 3.6716737589077386e-05, + "loss": 0.8814, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 3.670416627995249e-05, + "loss": 0.8861, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 3.669157311032326e-05, + "loss": 0.8821, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 3.667895809667022e-05, + "loss": 0.884, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 3.666632125550245e-05, + "loss": 0.8849, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 3.66536626033576e-05, + "loss": 0.8863, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 3.664098215680187e-05, + "loss": 0.8699, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 3.662827993242998e-05, + "loss": 0.871, + "step": 6000 + }, + { + "epoch": 0.42, + "learning_rate": 3.661555594686515e-05, + "loss": 0.878, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 3.660281021675908e-05, + "loss": 0.8794, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 3.659004275879193e-05, + "loss": 0.8727, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 3.657725358967228e-05, + "loss": 0.8734, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 3.6564442726137134e-05, + "loss": 0.8767, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 3.655161018495189e-05, + "loss": 0.8723, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 3.6538755982910314e-05, + "loss": 0.8725, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 3.6525880136834514e-05, + "loss": 0.8683, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 3.651298266357493e-05, + "loss": 0.8709, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 3.65000635800103e-05, + "loss": 0.8735, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 3.648712290304764e-05, + "loss": 0.8882, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 3.647416064962224e-05, + "loss": 0.8879, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 3.64611768366976e-05, + "loss": 0.8852, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 3.644817148126546e-05, + "loss": 0.8906, + "step": 6140 + }, + { + "epoch": 0.43, + "learning_rate": 3.643514460034575e-05, + "loss": 0.906, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 3.642209621098655e-05, + "loss": 0.9239, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 3.640902633026411e-05, + "loss": 0.9071, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 3.63959349752828e-05, + "loss": 0.9016, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 3.638282216317508e-05, + "loss": 0.896, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 3.63696879111015e-05, + "loss": 0.8891, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 3.635653223625067e-05, + "loss": 0.8903, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 3.634335515583924e-05, + "loss": 0.8887, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 3.633015668711187e-05, + "loss": 0.8891, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 3.63169368473412e-05, + "loss": 0.8896, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 3.630369565382785e-05, + "loss": 0.8906, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 3.629043312390037e-05, + "loss": 0.8869, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 3.627714927491527e-05, + "loss": 0.8818, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 3.6263844124256905e-05, + "loss": 0.8876, + "step": 6280 + }, + { + "epoch": 0.44, + "learning_rate": 3.6250517689337545e-05, + "loss": 0.886, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 3.6237169987597314e-05, + "loss": 0.8833, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 3.622380103650415e-05, + "loss": 0.883, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 3.6210410853553816e-05, + "loss": 0.8848, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 3.6196999456269845e-05, + "loss": 0.8759, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 3.6183566862203555e-05, + "loss": 0.8765, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 3.617011308893398e-05, + "loss": 0.888, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 3.6156638154067876e-05, + "loss": 0.8796, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 3.614314207523971e-05, + "loss": 0.8835, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 3.61296248701116e-05, + "loss": 0.8799, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 3.611608655637333e-05, + "loss": 0.8861, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 3.610252715174229e-05, + "loss": 0.8689, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 3.608894667396347e-05, + "loss": 0.8771, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 3.607534514080946e-05, + "loss": 0.8784, + "step": 6420 + }, + { + "epoch": 0.45, + "learning_rate": 3.60617225700804e-05, + "loss": 0.8831, + "step": 6430 + }, + { + "epoch": 0.45, + "learning_rate": 3.604807897960394e-05, + "loss": 0.882, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 3.603441438723526e-05, + "loss": 0.8744, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 3.602072881085701e-05, + "loss": 0.8706, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 3.6007022268379316e-05, + "loss": 0.8786, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 3.599329477773973e-05, + "loss": 0.8805, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 3.597954635690324e-05, + "loss": 0.877, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 3.5965777023862176e-05, + "loss": 0.878, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 3.5951986796636295e-05, + "loss": 0.8697, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 3.5938175693272655e-05, + "loss": 0.8735, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 3.5924343731845664e-05, + "loss": 0.8823, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 3.5910490930457006e-05, + "loss": 0.8761, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 3.589661730723563e-05, + "loss": 0.8735, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 3.5882722880337776e-05, + "loss": 0.8684, + "step": 6560 + }, + { + "epoch": 0.46, + "learning_rate": 3.5868807667946876e-05, + "loss": 0.8788, + "step": 6570 + }, + { + "epoch": 0.46, + "learning_rate": 3.585487168827355e-05, + "loss": 0.8685, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 3.584091495955563e-05, + "loss": 0.8677, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 3.58269375000581e-05, + "loss": 0.8762, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 3.581293932807304e-05, + "loss": 0.8684, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 3.579892046191967e-05, + "loss": 0.8732, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 3.578488091994428e-05, + "loss": 0.8729, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 3.5770820720520216e-05, + "loss": 0.8734, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 3.575673988204786e-05, + "loss": 0.8739, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 3.57426384229546e-05, + "loss": 0.88, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 3.572851636169484e-05, + "loss": 0.8732, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 3.5714373716749886e-05, + "loss": 0.8738, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 3.5700210506628026e-05, + "loss": 0.8734, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 3.5686026749864466e-05, + "loss": 0.8692, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 3.567182246502127e-05, + "loss": 0.8713, + "step": 6710 + }, + { + "epoch": 0.47, + "learning_rate": 3.56575976706874e-05, + "loss": 0.8797, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 3.5643352385478616e-05, + "loss": 0.8732, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 3.562908662803754e-05, + "loss": 0.8665, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 3.5614800417033565e-05, + "loss": 0.8634, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 3.560049377116284e-05, + "loss": 0.872, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 3.558616670914828e-05, + "loss": 0.8687, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 3.55718192497395e-05, + "loss": 0.8679, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 3.555745141171282e-05, + "loss": 0.8648, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 3.554306321387122e-05, + "loss": 0.87, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 3.552865467504432e-05, + "loss": 0.8689, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 3.55142258140884e-05, + "loss": 0.8704, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 3.549977664988628e-05, + "loss": 0.8718, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 3.548530720134738e-05, + "loss": 0.8733, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 3.547081748740766e-05, + "loss": 0.8699, + "step": 6850 + }, + { + "epoch": 0.48, + "learning_rate": 3.5456307527029606e-05, + "loss": 0.8721, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 3.54417773392022e-05, + "loss": 0.8687, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 3.542722694294088e-05, + "loss": 0.8551, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 3.541265635728755e-05, + "loss": 0.8632, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 3.539806560131053e-05, + "loss": 0.8739, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 3.538345469410453e-05, + "loss": 0.8674, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 3.536882365479063e-05, + "loss": 0.8617, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 3.535417250251627e-05, + "loss": 0.8618, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 3.5339501256455216e-05, + "loss": 0.8731, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 3.53248099358075e-05, + "loss": 0.8584, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 3.531009855979945e-05, + "loss": 0.8692, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 3.529536714768363e-05, + "loss": 0.8698, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 3.528061571873883e-05, + "loss": 0.8577, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 3.526584429227005e-05, + "loss": 0.8652, + "step": 6990 + }, + { + "epoch": 0.49, + "learning_rate": 3.525105288760843e-05, + "loss": 0.8676, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 3.5236241524111264e-05, + "loss": 0.8682, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 3.522141022116199e-05, + "loss": 0.8664, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 3.520655899817012e-05, + "loss": 0.8714, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 3.519168787457122e-05, + "loss": 0.8672, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 3.5176796869826943e-05, + "loss": 0.8731, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 3.5161886003424915e-05, + "loss": 0.8679, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 3.5146955294878784e-05, + "loss": 0.8692, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 3.5132004763728144e-05, + "loss": 0.8751, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 3.511703442953856e-05, + "loss": 0.8656, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 3.510204431190149e-05, + "loss": 0.8742, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 3.508703443043427e-05, + "loss": 0.87, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 3.5072004804780135e-05, + "loss": 0.8608, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 3.505695545460814e-05, + "loss": 0.8727, + "step": 7130 + }, + { + "epoch": 0.5, + "learning_rate": 3.504188639961315e-05, + "loss": 0.8731, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 3.502679765951583e-05, + "loss": 0.871, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 3.501168925406259e-05, + "loss": 0.8688, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 3.49965612030256e-05, + "loss": 0.8682, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 3.4981413526202704e-05, + "loss": 0.8718, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 3.4966246243417465e-05, + "loss": 0.8663, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 3.495105937451908e-05, + "loss": 0.861, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 3.4935852939382396e-05, + "loss": 0.8612, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 3.492062695790786e-05, + "loss": 0.8704, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 3.4905381450021484e-05, + "loss": 0.8608, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 3.489011643567486e-05, + "loss": 0.8659, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 3.4874831934845095e-05, + "loss": 0.8692, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 3.485952796753479e-05, + "loss": 0.8565, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 3.484420455377202e-05, + "loss": 0.8642, + "step": 7270 + }, + { + "epoch": 0.51, + "learning_rate": 3.482886171361034e-05, + "loss": 0.8631, + "step": 7280 + }, + { + "epoch": 0.51, + "learning_rate": 3.481349946712869e-05, + "loss": 0.8671, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 3.4798117834431436e-05, + "loss": 0.8676, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 3.478271683564829e-05, + "loss": 0.8689, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 3.4767296490934324e-05, + "loss": 0.869, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 3.475185682046992e-05, + "loss": 0.8651, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 3.4736397844460766e-05, + "loss": 0.8637, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 3.4720919583137786e-05, + "loss": 0.8707, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 3.470542205675717e-05, + "loss": 0.8607, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 3.4689905285600314e-05, + "loss": 0.8707, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 3.467436928997379e-05, + "loss": 0.8762, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 3.465881409020933e-05, + "loss": 0.8809, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 3.4643239706663813e-05, + "loss": 0.8796, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 3.46276461597192e-05, + "loss": 0.8746, + "step": 7410 + }, + { + "epoch": 0.52, + "learning_rate": 3.4612033469782555e-05, + "loss": 0.8796, + "step": 7420 + }, + { + "epoch": 0.52, + "learning_rate": 3.4596401657285975e-05, + "loss": 0.8775, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 3.458075074268659e-05, + "loss": 0.871, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 3.456508074646654e-05, + "loss": 0.8694, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 3.4549391689132914e-05, + "loss": 0.8706, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 3.453368359121775e-05, + "loss": 0.8639, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 3.4517956473278037e-05, + "loss": 0.8688, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 3.4502210355895615e-05, + "loss": 0.8723, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 3.44864452596772e-05, + "loss": 0.8668, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 3.4470661205254354e-05, + "loss": 0.8661, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 3.445485821328345e-05, + "loss": 0.8662, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 3.443903630444562e-05, + "loss": 0.8715, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 3.4423195499446796e-05, + "loss": 0.8683, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 3.440733581901759e-05, + "loss": 0.868, + "step": 7550 + }, + { + "epoch": 0.53, + "learning_rate": 3.439145728391335e-05, + "loss": 0.8663, + "step": 7560 + }, + { + "epoch": 0.53, + "learning_rate": 3.437555991491409e-05, + "loss": 0.8629, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 3.435964373282447e-05, + "loss": 0.8564, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 3.434370875847377e-05, + "loss": 0.8574, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 3.432775501271586e-05, + "loss": 0.8704, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 3.431178251642918e-05, + "loss": 0.8659, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 3.429579129051672e-05, + "loss": 0.865, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 3.4279781355905956e-05, + "loss": 0.8623, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 3.4263752733548866e-05, + "loss": 0.8667, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 3.424770544442187e-05, + "loss": 0.8646, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 3.423163950952585e-05, + "loss": 0.8646, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 3.421555494988605e-05, + "loss": 0.8655, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 3.4199451786552094e-05, + "loss": 0.8669, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 3.418333004059798e-05, + "loss": 0.8621, + "step": 7690 + }, + { + "epoch": 0.54, + "learning_rate": 3.4167189733122e-05, + "loss": 0.8618, + "step": 7700 + }, + { + "epoch": 0.54, + "learning_rate": 3.415103088524675e-05, + "loss": 0.8609, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 3.413485351811908e-05, + "loss": 0.8595, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 3.411865765291008e-05, + "loss": 0.8593, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 3.410244331081503e-05, + "loss": 0.8648, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 3.4086210513053446e-05, + "loss": 0.8623, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 3.4069959280868934e-05, + "loss": 0.8567, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 3.405368963552925e-05, + "loss": 0.8696, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 3.403740159832625e-05, + "loss": 0.8615, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 3.4021095190575865e-05, + "loss": 0.8553, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 3.400477043361805e-05, + "loss": 0.8628, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 3.3988427348816775e-05, + "loss": 0.8558, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 3.397206595756001e-05, + "loss": 0.8637, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 3.395568628125968e-05, + "loss": 0.8641, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 3.393928834135163e-05, + "loss": 0.8641, + "step": 7840 + }, + { + "epoch": 0.55, + "learning_rate": 3.39228721592956e-05, + "loss": 0.8638, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 3.390643775657523e-05, + "loss": 0.8666, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 3.388998515469798e-05, + "loss": 0.8678, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 3.387351437519513e-05, + "loss": 0.8633, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 3.385702543962176e-05, + "loss": 0.857, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 3.384051836955672e-05, + "loss": 0.8629, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 3.382399318660255e-05, + "loss": 0.8648, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 3.3807449912385535e-05, + "loss": 0.8627, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 3.379088856855562e-05, + "loss": 0.8617, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 3.377430917678641e-05, + "loss": 0.855, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 3.37577117587751e-05, + "loss": 0.8582, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 3.374109633624251e-05, + "loss": 0.861, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 3.3724462930933e-05, + "loss": 0.8596, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 3.370781156461447e-05, + "loss": 0.8588, + "step": 7980 + }, + { + "epoch": 0.56, + "learning_rate": 3.369114225907833e-05, + "loss": 0.857, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 3.3674455036139455e-05, + "loss": 0.8584, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 3.365774991763618e-05, + "loss": 0.861, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 3.364102692543026e-05, + "loss": 0.8559, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 3.362428608140682e-05, + "loss": 0.8608, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 3.3607527407474395e-05, + "loss": 0.8532, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 3.3590750925564794e-05, + "loss": 0.8544, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 3.357395665763317e-05, + "loss": 0.8605, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 3.355714462565795e-05, + "loss": 0.8473, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 3.354031485164078e-05, + "loss": 0.8579, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 3.352346735760656e-05, + "loss": 0.8528, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 3.3506602165603365e-05, + "loss": 0.8618, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 3.348971929770243e-05, + "loss": 0.8677, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 3.347281877599813e-05, + "loss": 0.8551, + "step": 8120 + }, + { + "epoch": 0.57, + "learning_rate": 3.345590062260792e-05, + "loss": 0.8561, + "step": 8130 + }, + { + "epoch": 0.57, + "learning_rate": 3.3438964859672364e-05, + "loss": 0.8535, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 3.342201150935504e-05, + "loss": 0.8597, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 3.340504059384256e-05, + "loss": 0.8583, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 3.338805213534453e-05, + "loss": 0.8661, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 3.337104615609349e-05, + "loss": 0.863, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 3.335402267834492e-05, + "loss": 0.8563, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 3.3336981724377214e-05, + "loss": 0.8523, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 3.331992331649163e-05, + "loss": 0.8642, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 3.3302847477012246e-05, + "loss": 0.858, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 3.328575422828598e-05, + "loss": 0.8653, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 3.326864359268251e-05, + "loss": 0.8537, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 3.325151559259431e-05, + "loss": 0.8556, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 3.323437025043653e-05, + "loss": 0.8584, + "step": 8260 + }, + { + "epoch": 0.58, + "learning_rate": 3.321720758864703e-05, + "loss": 0.8562, + "step": 8270 + }, + { + "epoch": 0.58, + "learning_rate": 3.320002762968635e-05, + "loss": 0.8572, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 3.318283039603765e-05, + "loss": 0.8592, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 3.316561591020671e-05, + "loss": 0.8488, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 3.314838419472189e-05, + "loss": 0.8493, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 3.3131135272134076e-05, + "loss": 0.8506, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 3.31138691650167e-05, + "loss": 0.8557, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 3.309658589596565e-05, + "loss": 0.8631, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.3079285487599326e-05, + "loss": 0.8636, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.306196796255849e-05, + "loss": 0.8544, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.304463334350637e-05, + "loss": 0.8587, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.302728165312852e-05, + "loss": 0.8607, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.300991291413285e-05, + "loss": 0.8569, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.299252714924958e-05, + "loss": 0.8508, + "step": 8400 + }, + { + "epoch": 0.59, + "learning_rate": 3.297512438123122e-05, + "loss": 0.8474, + "step": 8410 + }, + { + "epoch": 0.59, + "learning_rate": 3.295770463285252e-05, + "loss": 0.8497, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.2940267926910455e-05, + "loss": 0.859, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.29228142862242e-05, + "loss": 0.8549, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.290534373363507e-05, + "loss": 0.8596, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.288785629200655e-05, + "loss": 0.8524, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.28703519842242e-05, + "loss": 0.8589, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.285283083319565e-05, + "loss": 0.8562, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.283529286185058e-05, + "loss": 0.8591, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.2817738093140685e-05, + "loss": 0.8594, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.280016655003964e-05, + "loss": 0.8512, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.2782578255543084e-05, + "loss": 0.8542, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.2764973232668555e-05, + "loss": 0.8552, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.27473515044555e-05, + "loss": 0.8498, + "step": 8540 + }, + { + "epoch": 0.6, + "learning_rate": 3.272971309396522e-05, + "loss": 0.8549, + "step": 8550 + }, + { + "epoch": 0.6, + "learning_rate": 3.271205802428086e-05, + "loss": 0.8541, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.269438631850735e-05, + "loss": 0.8507, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.26766979997714e-05, + "loss": 0.86, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.265899309122147e-05, + "loss": 0.8541, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.2641271616027705e-05, + "loss": 0.8491, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.262353359738196e-05, + "loss": 0.8591, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.260577905849772e-05, + "loss": 0.8558, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.258800802261011e-05, + "loss": 0.8472, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.257022051297581e-05, + "loss": 0.8519, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.2552416552873096e-05, + "loss": 0.8617, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.253459616560175e-05, + "loss": 0.8605, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.2516759374483066e-05, + "loss": 0.8592, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.249890620285979e-05, + "loss": 0.861, + "step": 8680 + }, + { + "epoch": 0.61, + "learning_rate": 3.2481036674096116e-05, + "loss": 0.8666, + "step": 8690 + }, + { + "epoch": 0.61, + "learning_rate": 3.246315081157765e-05, + "loss": 0.8559, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.244524863871135e-05, + "loss": 0.8587, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.242733017892555e-05, + "loss": 0.8553, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.2409395455669866e-05, + "loss": 0.8505, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.239144449241523e-05, + "loss": 0.8571, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.23734773126538e-05, + "loss": 0.8586, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.2355493939898976e-05, + "loss": 0.87, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.233749439768534e-05, + "loss": 0.8534, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.231947870956864e-05, + "loss": 0.8524, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.2301446899125746e-05, + "loss": 0.8516, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.2283398989954623e-05, + "loss": 0.8598, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.226533500567433e-05, + "loss": 0.8614, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.224725496992493e-05, + "loss": 0.8446, + "step": 8820 + }, + { + "epoch": 0.62, + "learning_rate": 3.2229158906367515e-05, + "loss": 0.8587, + "step": 8830 + }, + { + "epoch": 0.62, + "learning_rate": 3.221104683868415e-05, + "loss": 0.8602, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.219291879057783e-05, + "loss": 0.8543, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.2174774785772487e-05, + "loss": 0.8479, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.2156614848012905e-05, + "loss": 0.8525, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.2138439001064745e-05, + "loss": 0.852, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.2120247268714473e-05, + "loss": 0.8557, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.210203967476936e-05, + "loss": 0.8597, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.2083816243057415e-05, + "loss": 0.8491, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.206557699742739e-05, + "loss": 0.855, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.204732196174872e-05, + "loss": 0.8586, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.2029051159911513e-05, + "loss": 0.8527, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.201076461582651e-05, + "loss": 0.8521, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.199246235342504e-05, + "loss": 0.8504, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.197414439665902e-05, + "loss": 0.8455, + "step": 8970 + }, + { + "epoch": 0.63, + "learning_rate": 3.1955810769500915e-05, + "loss": 0.8501, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.193746149594365e-05, + "loss": 0.8529, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.1919096600000674e-05, + "loss": 0.8473, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.190071610570587e-05, + "loss": 0.8482, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.188232003711353e-05, + "loss": 0.8533, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.1863908418298304e-05, + "loss": 0.8582, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.184548127335524e-05, + "loss": 0.8495, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.1827038626399665e-05, + "loss": 0.8522, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.180858050156722e-05, + "loss": 0.8487, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.1790106923013784e-05, + "loss": 0.8523, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.177161791491546e-05, + "loss": 0.8602, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.175311350146856e-05, + "loss": 0.8475, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.173459370688954e-05, + "loss": 0.8514, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.1716058555414985e-05, + "loss": 0.8554, + "step": 9110 + }, + { + "epoch": 0.64, + "learning_rate": 3.16975080713016e-05, + "loss": 0.8494, + "step": 9120 + }, + { + "epoch": 0.64, + "learning_rate": 3.167894227882613e-05, + "loss": 0.8458, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.166036120228536e-05, + "loss": 0.8438, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.1641764865996084e-05, + "loss": 0.8474, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.162315329429506e-05, + "loss": 0.8478, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.1604526511538996e-05, + "loss": 0.8474, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.1585884542104476e-05, + "loss": 0.8415, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.1567227410388004e-05, + "loss": 0.8496, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.154855514080589e-05, + "loss": 0.8499, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.152986775779426e-05, + "loss": 0.8456, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.151116528580904e-05, + "loss": 0.8463, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.149244774932588e-05, + "loss": 0.8463, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.1473715172840154e-05, + "loss": 0.8533, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.145496758086693e-05, + "loss": 0.8517, + "step": 9250 + }, + { + "epoch": 0.65, + "learning_rate": 3.143620499794089e-05, + "loss": 0.8505, + "step": 9260 + }, + { + "epoch": 0.65, + "learning_rate": 3.141742744861638e-05, + "loss": 0.8543, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.139863495746731e-05, + "loss": 0.8473, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.137982754908713e-05, + "loss": 0.8493, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.136100524808884e-05, + "loss": 0.853, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.1342168079104914e-05, + "loss": 0.8481, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.132331606678728e-05, + "loss": 0.8475, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.130444923580732e-05, + "loss": 0.8443, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.128556761085576e-05, + "loss": 0.8536, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.126667121664272e-05, + "loss": 0.862, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.1247760077897656e-05, + "loss": 0.846, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.122883421936929e-05, + "loss": 0.8382, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.1209893665825625e-05, + "loss": 0.8503, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.119093844205391e-05, + "loss": 0.85, + "step": 9390 + }, + { + "epoch": 0.66, + "learning_rate": 3.117196857286055e-05, + "loss": 0.845, + "step": 9400 + }, + { + "epoch": 0.66, + "learning_rate": 3.1152984083071165e-05, + "loss": 0.8503, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.1133984997530475e-05, + "loss": 0.8424, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.1114971341102306e-05, + "loss": 0.855, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.109594313866958e-05, + "loss": 0.853, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.107690041513422e-05, + "loss": 0.8463, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.1057843195417166e-05, + "loss": 0.8502, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.103877150445834e-05, + "loss": 0.8467, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.1019685367216575e-05, + "loss": 0.8472, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.100058480866964e-05, + "loss": 0.8547, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.098146985381416e-05, + "loss": 0.8431, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.096234052766561e-05, + "loss": 0.8443, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.0943196855258255e-05, + "loss": 0.8513, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.0924038861645155e-05, + "loss": 0.8471, + "step": 9530 + }, + { + "epoch": 0.67, + "learning_rate": 3.0904866571898096e-05, + "loss": 0.8477, + "step": 9540 + }, + { + "epoch": 0.67, + "learning_rate": 3.088568001110759e-05, + "loss": 0.8484, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.0866479204382805e-05, + "loss": 0.8471, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.0847264176851575e-05, + "loss": 0.8402, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.0828034953660324e-05, + "loss": 0.8387, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.080879155997406e-05, + "loss": 0.8419, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.078953402097635e-05, + "loss": 0.8425, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.077026236186925e-05, + "loss": 0.8519, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.07509766078733e-05, + "loss": 0.8449, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.073167678422752e-05, + "loss": 0.8422, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.0712362916189274e-05, + "loss": 0.847, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.069303502903437e-05, + "loss": 0.8407, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.0673693148056946e-05, + "loss": 0.8537, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.065433729856943e-05, + "loss": 0.8499, + "step": 9670 + }, + { + "epoch": 0.68, + "learning_rate": 3.063496750590255e-05, + "loss": 0.847, + "step": 9680 + }, + { + "epoch": 0.68, + "learning_rate": 3.061558379540527e-05, + "loss": 0.8511, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.059618619244479e-05, + "loss": 0.8594, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.0576774722406466e-05, + "loss": 0.8452, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.055734941069383e-05, + "loss": 0.8435, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.053791028272849e-05, + "loss": 0.8431, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.0518457363950154e-05, + "loss": 0.8477, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.0498990679816602e-05, + "loss": 0.8436, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.0479510255803587e-05, + "loss": 0.8414, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.046001611740487e-05, + "loss": 0.8352, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.044050829013215e-05, + "loss": 0.8465, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.0420986799515037e-05, + "loss": 0.8419, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.0401451671101036e-05, + "loss": 0.8477, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.038190293045548e-05, + "loss": 0.8451, + "step": 9810 + }, + { + "epoch": 0.69, + "learning_rate": 3.0362340603161552e-05, + "loss": 0.8537, + "step": 9820 + }, + { + "epoch": 0.69, + "learning_rate": 3.0342764714820166e-05, + "loss": 0.8401, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.0323175291050014e-05, + "loss": 0.8411, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.03035723574875e-05, + "loss": 0.8516, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.02839559397867e-05, + "loss": 0.8453, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.0264326063619346e-05, + "loss": 0.8418, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.024468275467477e-05, + "loss": 0.8473, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.0225026038659892e-05, + "loss": 0.8435, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.0205355941299187e-05, + "loss": 0.8538, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.0185672488334607e-05, + "loss": 0.8559, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.0165975705525628e-05, + "loss": 0.8616, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.014626561864914e-05, + "loss": 0.8619, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.0126542253499463e-05, + "loss": 0.8567, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.010680563588827e-05, + "loss": 0.8562, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.0087055791644594e-05, + "loss": 0.8564, + "step": 9960 + }, + { + "epoch": 0.7, + "learning_rate": 3.0067292746614776e-05, + "loss": 0.8443, + "step": 9970 + }, + { + "epoch": 0.7, + "learning_rate": 3.0047516526662437e-05, + "loss": 0.8505, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.0027727157668428e-05, + "loss": 0.8452, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.000792466553082e-05, + "loss": 0.8473, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 2.9988109076164843e-05, + "loss": 0.8509, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 2.9968280415502886e-05, + "loss": 0.8457, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 2.9948438709494424e-05, + "loss": 0.8476, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 2.9928583984106026e-05, + "loss": 0.85, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 2.990871626532128e-05, + "loss": 0.8502, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 2.988883557914079e-05, + "loss": 0.8528, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 2.986894195158213e-05, + "loss": 0.8436, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 2.98490354086798e-05, + "loss": 0.838, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 2.9829115976485213e-05, + "loss": 0.85, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 2.9809183681066646e-05, + "loss": 0.848, + "step": 10100 + }, + { + "epoch": 0.71, + "learning_rate": 2.9789238548509202e-05, + "loss": 0.8383, + "step": 10110 + }, + { + "epoch": 0.71, + "learning_rate": 2.9769280604914805e-05, + "loss": 0.8451, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 2.9749309876402122e-05, + "loss": 0.8435, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 2.9729326389106564e-05, + "loss": 0.8475, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 2.9709330169180238e-05, + "loss": 0.8423, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 2.9689321242791906e-05, + "loss": 0.846, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 2.9669299636126973e-05, + "loss": 0.845, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 2.964926537538742e-05, + "loss": 0.843, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 2.9629218486791807e-05, + "loss": 0.847, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 2.9609158996575207e-05, + "loss": 0.846, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 2.9589086930989196e-05, + "loss": 0.8472, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 2.956900231630179e-05, + "loss": 0.8405, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 2.9548905178797442e-05, + "loss": 0.8372, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 2.952879554477699e-05, + "loss": 0.844, + "step": 10240 + }, + { + "epoch": 0.72, + "learning_rate": 2.9508673440557625e-05, + "loss": 0.8434, + "step": 10250 + }, + { + "epoch": 0.72, + "learning_rate": 2.948853889247286e-05, + "loss": 0.8393, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 2.9468391926872487e-05, + "loss": 0.8438, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 2.9448232570122557e-05, + "loss": 0.8412, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 2.942806084860533e-05, + "loss": 0.8415, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 2.9407876788719253e-05, + "loss": 0.8446, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 2.9387680416878915e-05, + "loss": 0.8449, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 2.936747175951502e-05, + "loss": 0.8492, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 2.934725084307435e-05, + "loss": 0.8442, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 2.9327017694019737e-05, + "loss": 0.843, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 2.930677233883e-05, + "loss": 0.848, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 2.9286514803999963e-05, + "loss": 0.8436, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 2.926624511604036e-05, + "loss": 0.8414, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 2.924596330147785e-05, + "loss": 0.8418, + "step": 10380 + }, + { + "epoch": 0.73, + "learning_rate": 2.9225669386854955e-05, + "loss": 0.8421, + "step": 10390 + }, + { + "epoch": 0.73, + "learning_rate": 2.9205363398730032e-05, + "loss": 0.8387, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 2.918504536367723e-05, + "loss": 0.8409, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 2.9164715308286474e-05, + "loss": 0.8445, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 2.914437325916343e-05, + "loss": 0.8413, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 2.912401924292944e-05, + "loss": 0.8371, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 2.9103653286221508e-05, + "loss": 0.8427, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 2.908327541569228e-05, + "loss": 0.838, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 2.9062885658009978e-05, + "loss": 0.8401, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 2.9042484039858397e-05, + "loss": 0.8333, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 2.9022070587936837e-05, + "loss": 0.8424, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 2.9001645328960092e-05, + "loss": 0.8416, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 2.898120828965841e-05, + "loss": 0.8489, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 2.8960759496777444e-05, + "loss": 0.8442, + "step": 10520 + }, + { + "epoch": 0.74, + "learning_rate": 2.8940298977078254e-05, + "loss": 0.8352, + "step": 10530 + }, + { + "epoch": 0.74, + "learning_rate": 2.8919826757337218e-05, + "loss": 0.8413, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 2.889934286434604e-05, + "loss": 0.8369, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 2.88788473249117e-05, + "loss": 0.8431, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 2.885834016585642e-05, + "loss": 0.8444, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 2.883782141401763e-05, + "loss": 0.8423, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 2.8817291096247918e-05, + "loss": 0.837, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 2.8796749239415027e-05, + "loss": 0.8352, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 2.8776195870401787e-05, + "loss": 0.8348, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 2.8755631016106095e-05, + "loss": 0.8442, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 2.8735054703440903e-05, + "loss": 0.8427, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 2.871446695933411e-05, + "loss": 0.8401, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 2.869386781072863e-05, + "loss": 0.8367, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 2.867325728458225e-05, + "loss": 0.8501, + "step": 10660 + }, + { + "epoch": 0.75, + "learning_rate": 2.86526354078677e-05, + "loss": 0.8445, + "step": 10670 + }, + { + "epoch": 0.75, + "learning_rate": 2.863200220757251e-05, + "loss": 0.8357, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 2.8611357710699065e-05, + "loss": 0.8394, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 2.859070194426452e-05, + "loss": 0.8384, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 2.85700349353008e-05, + "loss": 0.83, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 2.85493567108545e-05, + "loss": 0.8343, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 2.852866729798693e-05, + "loss": 0.8314, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 2.8507966723774033e-05, + "loss": 0.8405, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 2.848725501530635e-05, + "loss": 0.843, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 2.8466532199689e-05, + "loss": 0.8309, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 2.8445798304041635e-05, + "loss": 0.8388, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 2.8425053355498408e-05, + "loss": 0.8385, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 2.8404297381207944e-05, + "loss": 0.8487, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 2.8383530408333285e-05, + "loss": 0.8389, + "step": 10800 + }, + { + "epoch": 0.76, + "learning_rate": 2.8362752464051873e-05, + "loss": 0.836, + "step": 10810 + }, + { + "epoch": 0.76, + "learning_rate": 2.8341963575555513e-05, + "loss": 0.8313, + "step": 10820 + }, + { + "epoch": 0.76, + "learning_rate": 2.8321163770050312e-05, + "loss": 0.8407, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 2.8300353074756698e-05, + "loss": 0.8298, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 2.8279531516909314e-05, + "loss": 0.8368, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 2.8258699123757048e-05, + "loss": 0.8335, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 2.8237855922562956e-05, + "loss": 0.8334, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 2.8217001940604234e-05, + "loss": 0.8394, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 2.819613720517219e-05, + "loss": 0.836, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 2.8175261743572207e-05, + "loss": 0.8328, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 2.8154375583123706e-05, + "loss": 0.8367, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 2.8133478751160104e-05, + "loss": 0.8341, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 2.811257127502879e-05, + "loss": 0.8346, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 2.8091653182091083e-05, + "loss": 0.8342, + "step": 10940 + }, + { + "epoch": 0.77, + "learning_rate": 2.8070724499722186e-05, + "loss": 0.8395, + "step": 10950 + }, + { + "epoch": 0.77, + "learning_rate": 2.804978525531117e-05, + "loss": 0.8364, + "step": 10960 + }, + { + "epoch": 0.77, + "learning_rate": 2.8028835476260928e-05, + "loss": 0.8338, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 2.8007875189988136e-05, + "loss": 0.8367, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 2.7986904423923218e-05, + "loss": 0.8436, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 2.7965923205510324e-05, + "loss": 0.8386, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 2.794493156220727e-05, + "loss": 0.8293, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 2.7923929521485525e-05, + "loss": 0.8347, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 2.790291711083015e-05, + "loss": 0.8416, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 2.7881894357739803e-05, + "loss": 0.8335, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 2.7860861289726647e-05, + "loss": 0.8368, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 2.783981793431637e-05, + "loss": 0.8385, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 2.7818764319048102e-05, + "loss": 0.8316, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 2.7797700471474415e-05, + "loss": 0.839, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 2.7776626419161263e-05, + "loss": 0.8286, + "step": 11090 + }, + { + "epoch": 0.78, + "learning_rate": 2.7755542189687957e-05, + "loss": 0.8357, + "step": 11100 + }, + { + "epoch": 0.78, + "learning_rate": 2.773444781064713e-05, + "loss": 0.8369, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 2.7713343309644693e-05, + "loss": 0.8322, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 2.7692228714299814e-05, + "loss": 0.8313, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 2.767110405224485e-05, + "loss": 0.8384, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 2.764996935112535e-05, + "loss": 0.8403, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 2.7628824638600004e-05, + "loss": 0.8576, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 2.7607669942340588e-05, + "loss": 0.8731, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 2.7586505290031946e-05, + "loss": 0.8715, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 2.756533070937197e-05, + "loss": 0.8995, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 2.7544146228071524e-05, + "loss": 0.8774, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 2.752295187385444e-05, + "loss": 0.8696, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 2.7501747674457462e-05, + "loss": 0.8675, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 2.748053365763023e-05, + "loss": 0.8541, + "step": 11230 + }, + { + "epoch": 0.79, + "learning_rate": 2.745930985113522e-05, + "loss": 0.8546, + "step": 11240 + }, + { + "epoch": 0.79, + "learning_rate": 2.743807628274773e-05, + "loss": 0.8482, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 2.7416832980255823e-05, + "loss": 0.8488, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 2.7395579971460313e-05, + "loss": 0.8427, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 2.7374317284174705e-05, + "loss": 0.8478, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 2.7353044946225172e-05, + "loss": 0.8459, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 2.733176298545053e-05, + "loss": 0.8439, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 2.731047142970216e-05, + "loss": 0.8401, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 2.728917030684404e-05, + "loss": 0.8487, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 2.726785964475262e-05, + "loss": 0.848, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 2.7246539471316885e-05, + "loss": 0.8441, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 2.722520981443823e-05, + "loss": 0.8377, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 2.720387070203047e-05, + "loss": 0.8417, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 2.7182522162019808e-05, + "loss": 0.8355, + "step": 11370 + }, + { + "epoch": 0.8, + "learning_rate": 2.716116422234476e-05, + "loss": 0.8411, + "step": 11380 + }, + { + "epoch": 0.8, + "learning_rate": 2.7139796910956165e-05, + "loss": 0.8437, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 2.7118420255817122e-05, + "loss": 0.8393, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 2.7097034284902952e-05, + "loss": 0.8429, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 2.7075639026201167e-05, + "loss": 0.84, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 2.705423450771144e-05, + "loss": 0.8401, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 2.703282075744556e-05, + "loss": 0.8366, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 2.7011397803427395e-05, + "loss": 0.8404, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 2.6989965673692857e-05, + "loss": 0.8454, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 2.6968524396289874e-05, + "loss": 0.8451, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 2.694707399927834e-05, + "loss": 0.8412, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 2.692561451073008e-05, + "loss": 0.8341, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 2.6904145958728826e-05, + "loss": 0.8362, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 2.6882668371370156e-05, + "loss": 0.8362, + "step": 11510 + }, + { + "epoch": 0.81, + "learning_rate": 2.6861181776761496e-05, + "loss": 0.8395, + "step": 11520 + }, + { + "epoch": 0.81, + "learning_rate": 2.6839686203022032e-05, + "loss": 0.8384, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 2.6818181678282725e-05, + "loss": 0.8412, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 2.679666823068624e-05, + "loss": 0.8325, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 2.677514588838692e-05, + "loss": 0.8353, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 2.6753614679550738e-05, + "loss": 0.8426, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 2.6732074632355286e-05, + "loss": 0.8359, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 2.6710525774989718e-05, + "loss": 0.8344, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 2.6688968135654712e-05, + "loss": 0.8332, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 2.6667401742562452e-05, + "loss": 0.8356, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 2.664582662393656e-05, + "loss": 0.8238, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 2.6624242808012098e-05, + "loss": 0.8394, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 2.6602650323035484e-05, + "loss": 0.8319, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 2.6581049197264503e-05, + "loss": 0.8348, + "step": 11650 + }, + { + "epoch": 0.82, + "learning_rate": 2.6559439458968233e-05, + "loss": 0.8315, + "step": 11660 + }, + { + "epoch": 0.82, + "learning_rate": 2.653782113642705e-05, + "loss": 0.8328, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 2.6516194257932522e-05, + "loss": 0.8345, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 2.6494558851787445e-05, + "loss": 0.8422, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 2.6472914946305768e-05, + "loss": 0.8417, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 2.6451262569812567e-05, + "loss": 0.8416, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 2.642960175064399e-05, + "loss": 0.8346, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 2.6407932517147244e-05, + "loss": 0.8358, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 2.6386254897680546e-05, + "loss": 0.8367, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 2.6364568920613094e-05, + "loss": 0.8325, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 2.6342874614325004e-05, + "loss": 0.8356, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 2.632117200720732e-05, + "loss": 0.8281, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 2.629946112766192e-05, + "loss": 0.8346, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 2.6277742004101536e-05, + "loss": 0.8351, + "step": 11790 + }, + { + "epoch": 0.83, + "learning_rate": 2.625601466494967e-05, + "loss": 0.8401, + "step": 11800 + }, + { + "epoch": 0.83, + "learning_rate": 2.623427913864058e-05, + "loss": 0.8367, + "step": 11810 + }, + { + "epoch": 0.83, + "learning_rate": 2.621253545361924e-05, + "loss": 0.8282, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 2.6190783638341302e-05, + "loss": 0.8396, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 2.6169023721273047e-05, + "loss": 0.8293, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 2.6147255730891384e-05, + "loss": 0.836, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 2.612547969568377e-05, + "loss": 0.8323, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 2.6103695644148182e-05, + "loss": 0.8256, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 2.6081903604793098e-05, + "loss": 0.8355, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 2.6060103606137458e-05, + "loss": 0.8313, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 2.6038295676710607e-05, + "loss": 0.8319, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 2.6016479845052266e-05, + "loss": 0.8282, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 2.5994656139712504e-05, + "loss": 0.8308, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 2.59728245892517e-05, + "loss": 0.8287, + "step": 11930 + }, + { + "epoch": 0.84, + "learning_rate": 2.595098522224049e-05, + "loss": 0.8318, + "step": 11940 + }, + { + "epoch": 0.84, + "learning_rate": 2.5929138067259735e-05, + "loss": 0.8346, + "step": 11950 + }, + { + "epoch": 0.84, + "learning_rate": 2.5907283152900508e-05, + "loss": 0.8366, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 2.588542050776401e-05, + "loss": 0.8334, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 2.5863550160461587e-05, + "loss": 0.8316, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 2.584167213961464e-05, + "loss": 0.8317, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 2.5819786473854627e-05, + "loss": 0.8327, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 2.5797893191823e-05, + "loss": 0.8366, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 2.5775992322171207e-05, + "loss": 0.8319, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 2.575408389356058e-05, + "loss": 0.8367, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 2.573216793466238e-05, + "loss": 0.8278, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 2.57102444741577e-05, + "loss": 0.8318, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 2.5688313540737473e-05, + "loss": 0.83, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 2.5666375163102388e-05, + "loss": 0.8265, + "step": 12070 + }, + { + "epoch": 0.85, + "learning_rate": 2.5644429369962894e-05, + "loss": 0.8296, + "step": 12080 + }, + { + "epoch": 0.85, + "learning_rate": 2.5622476190039135e-05, + "loss": 0.8326, + "step": 12090 + }, + { + "epoch": 0.85, + "learning_rate": 2.560051565206092e-05, + "loss": 0.8325, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 2.5578547784767702e-05, + "loss": 0.8348, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 2.555657261690851e-05, + "loss": 0.8244, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 2.5534590177241936e-05, + "loss": 0.8249, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 2.5512600494536077e-05, + "loss": 0.8305, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 2.5490603597568528e-05, + "loss": 0.8331, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 2.546859951512631e-05, + "loss": 0.8354, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 2.5446588276005857e-05, + "loss": 0.8248, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 2.542456990901295e-05, + "loss": 0.8329, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 2.5402544442962724e-05, + "loss": 0.8352, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 2.5380511906679587e-05, + "loss": 0.8318, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 2.5358472328997214e-05, + "loss": 0.8321, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 2.533642573875848e-05, + "loss": 0.8292, + "step": 12220 + }, + { + "epoch": 0.86, + "learning_rate": 2.531437216481544e-05, + "loss": 0.8294, + "step": 12230 + }, + { + "epoch": 0.86, + "learning_rate": 2.5292311636029293e-05, + "loss": 0.8348, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 2.527024418127035e-05, + "loss": 0.8381, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 2.5248169829417957e-05, + "loss": 0.8278, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 2.522608860936051e-05, + "loss": 0.8273, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 2.5204000549995397e-05, + "loss": 0.829, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 2.5181905680228935e-05, + "loss": 0.8233, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 2.5159804028976367e-05, + "loss": 0.8286, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 2.513769562516181e-05, + "loss": 0.8304, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.5115580497718223e-05, + "loss": 0.8371, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.5093458675587344e-05, + "loss": 0.8297, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.50713301877197e-05, + "loss": 0.8313, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.5049195063074522e-05, + "loss": 0.8318, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.5027053330619735e-05, + "loss": 0.8312, + "step": 12360 + }, + { + "epoch": 0.87, + "learning_rate": 2.5004905019331903e-05, + "loss": 0.8293, + "step": 12370 + }, + { + "epoch": 0.87, + "learning_rate": 2.498275015819621e-05, + "loss": 0.8235, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.49605887762064e-05, + "loss": 0.8282, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.4938420902364774e-05, + "loss": 0.8285, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.4916246565682097e-05, + "loss": 0.8278, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.4894065795177607e-05, + "loss": 0.8361, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.4871878619878973e-05, + "loss": 0.833, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.484968506882223e-05, + "loss": 0.8351, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.4827485171051756e-05, + "loss": 0.8288, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.480527895562025e-05, + "loss": 0.8269, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.478306645158867e-05, + "loss": 0.824, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.4760847688026198e-05, + "loss": 0.8285, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.473862269401021e-05, + "loss": 0.8401, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.4716391498626244e-05, + "loss": 0.8317, + "step": 12500 + }, + { + "epoch": 0.88, + "learning_rate": 2.4694154130967945e-05, + "loss": 0.8345, + "step": 12510 + }, + { + "epoch": 0.88, + "learning_rate": 2.4671910620137053e-05, + "loss": 0.8313, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.4649660995243312e-05, + "loss": 0.8277, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.46274052854045e-05, + "loss": 0.8225, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.4605143519746352e-05, + "loss": 0.8263, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.4582875727402516e-05, + "loss": 0.8237, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.4560601937514537e-05, + "loss": 0.8312, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.4538322179231804e-05, + "loss": 0.8357, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.451603648171153e-05, + "loss": 0.8255, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.4493744874118668e-05, + "loss": 0.8312, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.4471447385625945e-05, + "loss": 0.8232, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.4449144045413763e-05, + "loss": 0.8255, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.4426834882670182e-05, + "loss": 0.8213, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.4404519926590888e-05, + "loss": 0.8305, + "step": 12640 + }, + { + "epoch": 0.89, + "learning_rate": 2.4382199206379146e-05, + "loss": 0.8276, + "step": 12650 + }, + { + "epoch": 0.89, + "learning_rate": 2.4359872751245763e-05, + "loss": 0.8259, + "step": 12660 + }, + { + "epoch": 0.89, + "learning_rate": 2.4337540590409054e-05, + "loss": 0.8283, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.4315202753094794e-05, + "loss": 0.8322, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.42928592685362e-05, + "loss": 0.8296, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.4270510165973865e-05, + "loss": 0.8211, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.424815547465575e-05, + "loss": 0.8254, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.4225795223837114e-05, + "loss": 0.8312, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.42034294427805e-05, + "loss": 0.8276, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.4181058160755682e-05, + "loss": 0.8308, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.415868140703965e-05, + "loss": 0.82, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.4136299210916532e-05, + "loss": 0.8272, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.4113911601677592e-05, + "loss": 0.8316, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.4091518608621183e-05, + "loss": 0.8289, + "step": 12780 + }, + { + "epoch": 0.9, + "learning_rate": 2.4069120261052682e-05, + "loss": 0.8267, + "step": 12790 + }, + { + "epoch": 0.9, + "learning_rate": 2.4046716588284492e-05, + "loss": 0.824, + "step": 12800 + }, + { + "epoch": 0.9, + "learning_rate": 2.4024307619635984e-05, + "loss": 0.821, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.400189338443345e-05, + "loss": 0.8322, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.3979473912010094e-05, + "loss": 0.8339, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.3957049231705946e-05, + "loss": 0.8256, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.3934619372867866e-05, + "loss": 0.8233, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.3912184364849492e-05, + "loss": 0.8185, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.388974423701121e-05, + "loss": 0.8257, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.3867299018720084e-05, + "loss": 0.8285, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.3844848739349846e-05, + "loss": 0.8218, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.3822393428280873e-05, + "loss": 0.8207, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.3799933114900096e-05, + "loss": 0.8256, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.3777467828601016e-05, + "loss": 0.8304, + "step": 12920 + }, + { + "epoch": 0.91, + "learning_rate": 2.3754997598783637e-05, + "loss": 0.8314, + "step": 12930 + }, + { + "epoch": 0.91, + "learning_rate": 2.373252245485441e-05, + "loss": 0.8228, + "step": 12940 + }, + { + "epoch": 0.91, + "learning_rate": 2.3710042426226254e-05, + "loss": 0.8339, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.3687557542318447e-05, + "loss": 0.8282, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.366506783255665e-05, + "loss": 0.8255, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.3642573326372825e-05, + "loss": 0.8301, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.3620074053205202e-05, + "loss": 0.8217, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.3597570042498262e-05, + "loss": 0.8214, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.357506132370269e-05, + "loss": 0.8173, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.3552547926275313e-05, + "loss": 0.8229, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.3530029879679104e-05, + "loss": 0.8241, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.3507507213383108e-05, + "loss": 0.8256, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.3484979956862413e-05, + "loss": 0.8285, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.3462448139598123e-05, + "loss": 0.8183, + "step": 13060 + }, + { + "epoch": 0.92, + "learning_rate": 2.3439911791077303e-05, + "loss": 0.8159, + "step": 13070 + }, + { + "epoch": 0.92, + "learning_rate": 2.3417370940792944e-05, + "loss": 0.8265, + "step": 13080 + }, + { + "epoch": 0.92, + "learning_rate": 2.3394825618243954e-05, + "loss": 0.8214, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.337227585293506e-05, + "loss": 0.8272, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.3349721674376826e-05, + "loss": 0.822, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.332716311208558e-05, + "loss": 0.8174, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.3304600195583394e-05, + "loss": 0.8198, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.3282032954398032e-05, + "loss": 0.831, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.3259461418062923e-05, + "loss": 0.8243, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.3236885616117112e-05, + "loss": 0.8253, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.3214305578105237e-05, + "loss": 0.8201, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.319172133357746e-05, + "loss": 0.8184, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.3169132912089468e-05, + "loss": 0.8208, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.3146540343202408e-05, + "loss": 0.8271, + "step": 13200 + }, + { + "epoch": 0.93, + "learning_rate": 2.3123943656482845e-05, + "loss": 0.83, + "step": 13210 + }, + { + "epoch": 0.93, + "learning_rate": 2.310134288150275e-05, + "loss": 0.819, + "step": 13220 + }, + { + "epoch": 0.93, + "learning_rate": 2.3078738047839425e-05, + "loss": 0.8219, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.3056129185075503e-05, + "loss": 0.8263, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.3033516322798875e-05, + "loss": 0.821, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.3010899490602673e-05, + "loss": 0.8181, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.2988278718085223e-05, + "loss": 0.8123, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.296565403485001e-05, + "loss": 0.8258, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.2943025470505625e-05, + "loss": 0.8197, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.2920393054665757e-05, + "loss": 0.8257, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.2897756816949128e-05, + "loss": 0.822, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.2875116786979454e-05, + "loss": 0.8286, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.2852472994385416e-05, + "loss": 0.8185, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.282982546880063e-05, + "loss": 0.8149, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.280717423986359e-05, + "loss": 0.8205, + "step": 13350 + }, + { + "epoch": 0.94, + "learning_rate": 2.2784519337217637e-05, + "loss": 0.8162, + "step": 13360 + }, + { + "epoch": 0.94, + "learning_rate": 2.2761860790510907e-05, + "loss": 0.8217, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.273919862939633e-05, + "loss": 0.8192, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.2716532883531545e-05, + "loss": 0.8173, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.2693863582578905e-05, + "loss": 0.8156, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.2671190756205384e-05, + "loss": 0.8208, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.2648514434082593e-05, + "loss": 0.82, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.262583464588671e-05, + "loss": 0.8197, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.260315142129846e-05, + "loss": 0.8213, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.2580464790003046e-05, + "loss": 0.8208, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.255777478169014e-05, + "loss": 0.8168, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.2535081426053834e-05, + "loss": 0.825, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.2512384752792605e-05, + "loss": 0.8188, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.248968479160925e-05, + "loss": 0.8199, + "step": 13490 + }, + { + "epoch": 0.95, + "learning_rate": 2.24669815722109e-05, + "loss": 0.8231, + "step": 13500 + }, + { + "epoch": 0.95, + "learning_rate": 2.244427512430893e-05, + "loss": 0.8239, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.2421565477618938e-05, + "loss": 0.8165, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.2398852661860725e-05, + "loss": 0.8146, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.2376136706758222e-05, + "loss": 0.8142, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.2353417642039483e-05, + "loss": 0.8221, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.2330695497436618e-05, + "loss": 0.8191, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.2307970302685775e-05, + "loss": 0.8216, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.2285242087527092e-05, + "loss": 0.8183, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.2262510881704662e-05, + "loss": 0.8228, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.2239776714966492e-05, + "loss": 0.8149, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.221703961706446e-05, + "loss": 0.8148, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.2194299617754274e-05, + "loss": 0.8123, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.217155674679546e-05, + "loss": 0.814, + "step": 13630 + }, + { + "epoch": 0.96, + "learning_rate": 2.2148811033951283e-05, + "loss": 0.8214, + "step": 13640 + }, + { + "epoch": 0.96, + "learning_rate": 2.2126062508988736e-05, + "loss": 0.8275, + "step": 13650 + }, + { + "epoch": 0.96, + "learning_rate": 2.210331120167848e-05, + "loss": 0.8457, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.2080557141794836e-05, + "loss": 0.8277, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.2057800359115716e-05, + "loss": 0.8376, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.2035040883422595e-05, + "loss": 0.9098, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.2012278744500482e-05, + "loss": 0.8815, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.1989513972137852e-05, + "loss": 0.8618, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.1966746596126643e-05, + "loss": 0.8511, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.1943976646262198e-05, + "loss": 0.847, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.1921204152343233e-05, + "loss": 0.8395, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.1898429144171763e-05, + "loss": 0.838, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.187565165155314e-05, + "loss": 0.832, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.185287170429593e-05, + "loss": 0.8328, + "step": 13770 + }, + { + "epoch": 0.97, + "learning_rate": 2.1830089332211936e-05, + "loss": 0.8363, + "step": 13780 + }, + { + "epoch": 0.97, + "learning_rate": 2.180730456511611e-05, + "loss": 0.8291, + "step": 13790 + }, + { + "epoch": 0.97, + "learning_rate": 2.1784517432826563e-05, + "loss": 0.8242, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.1761727965164488e-05, + "loss": 0.8305, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.1738936191954134e-05, + "loss": 0.8369, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.1716142143022772e-05, + "loss": 0.8263, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.1693345848200647e-05, + "loss": 0.8374, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.1670547337320948e-05, + "loss": 0.8276, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.1647746640219762e-05, + "loss": 0.8313, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.162494378673603e-05, + "loss": 0.8276, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.1602138806711524e-05, + "loss": 0.8181, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.1579331729990795e-05, + "loss": 0.8233, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.155652258642115e-05, + "loss": 0.8214, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.1533711405852578e-05, + "loss": 0.8231, + "step": 13910 + }, + { + "epoch": 0.98, + "learning_rate": 2.151089821813775e-05, + "loss": 0.8134, + "step": 13920 + }, + { + "epoch": 0.98, + "learning_rate": 2.1488083053131956e-05, + "loss": 0.8192, + "step": 13930 + }, + { + "epoch": 0.98, + "learning_rate": 2.1465265940693084e-05, + "loss": 0.8182, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.1442446910681557e-05, + "loss": 0.8205, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.141962599296032e-05, + "loss": 0.8181, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.1396803217394777e-05, + "loss": 0.8177, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.137397861385278e-05, + "loss": 0.8257, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.135115221220455e-05, + "loss": 0.8191, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.1328324042322678e-05, + "loss": 0.8218, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.1305494134082068e-05, + "loss": 0.814, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.1282662517359885e-05, + "loss": 0.8173, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.1259829222035554e-05, + "loss": 0.8182, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.123699427799067e-05, + "loss": 0.8199, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.121415771510902e-05, + "loss": 0.8215, + "step": 14050 + }, + { + "epoch": 0.99, + "learning_rate": 2.119131956327646e-05, + "loss": 0.8149, + "step": 14060 + }, + { + "epoch": 0.99, + "learning_rate": 2.1168479852380973e-05, + "loss": 0.8138, + "step": 14070 + }, + { + "epoch": 0.99, + "learning_rate": 2.114563861231256e-05, + "loss": 0.8089, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.112279587296322e-05, + "loss": 0.8176, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.1099951664226927e-05, + "loss": 0.8194, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.1077106015999566e-05, + "loss": 0.8233, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.1054258958178914e-05, + "loss": 0.823, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.1031410520664597e-05, + "loss": 0.8173, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.1008560733358027e-05, + "loss": 0.8159, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.0985709626162404e-05, + "loss": 0.8173, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.0962857228982636e-05, + "loss": 0.8057, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.0940003571725346e-05, + "loss": 0.8023, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.0917148684298773e-05, + "loss": 0.7996, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.089429259661279e-05, + "loss": 0.7989, + "step": 14190 + }, + { + "epoch": 1.0, + "learning_rate": 2.0871435338578833e-05, + "loss": 0.8005, + "step": 14200 + }, + { + "epoch": 1.0, + "learning_rate": 2.084857694010987e-05, + "loss": 0.8064, + "step": 14210 + }, + { + "epoch": 1.0, + "learning_rate": 2.0825717431120362e-05, + "loss": 0.8033, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.0802856841526217e-05, + "loss": 0.8011, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.0779995201244773e-05, + "loss": 0.7976, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.0757132540194722e-05, + "loss": 0.7957, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.0734268888296105e-05, + "loss": 0.7972, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.071140427547026e-05, + "loss": 0.8002, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.068853873163979e-05, + "loss": 0.8021, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.0665672286728484e-05, + "loss": 0.7892, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.064280497066135e-05, + "loss": 0.799, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.0619936813364506e-05, + "loss": 0.7956, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.0597067844765202e-05, + "loss": 0.7962, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.0574198094791713e-05, + "loss": 0.8008, + "step": 14330 + }, + { + "epoch": 1.01, + "learning_rate": 2.0551327593373357e-05, + "loss": 0.7955, + "step": 14340 + }, + { + "epoch": 1.01, + "learning_rate": 2.0528456370440445e-05, + "loss": 0.799, + "step": 14350 + }, + { + "epoch": 1.01, + "learning_rate": 2.0505584455924214e-05, + "loss": 0.7979, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.0482711879756808e-05, + "loss": 0.7982, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.0459838671871247e-05, + "loss": 0.8023, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.0436964862201365e-05, + "loss": 0.7957, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.04140904806818e-05, + "loss": 0.7962, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.0391215557247933e-05, + "loss": 0.7968, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.036834012183583e-05, + "loss": 0.7918, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.0345464204382262e-05, + "loss": 0.7985, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.032258783482462e-05, + "loss": 0.7925, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.0299711043100867e-05, + "loss": 0.7933, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.0276833859149553e-05, + "loss": 0.7961, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.025395631290971e-05, + "loss": 0.7971, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.023107843432086e-05, + "loss": 0.7971, + "step": 14480 + }, + { + "epoch": 1.02, + "learning_rate": 2.0208200253322957e-05, + "loss": 0.7985, + "step": 14490 + }, + { + "epoch": 1.02, + "learning_rate": 2.0185321799856345e-05, + "loss": 0.7994, + "step": 14500 + }, + { + "epoch": 1.02, + "learning_rate": 2.0162443103861746e-05, + "loss": 0.7939, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.0139564195280164e-05, + "loss": 0.7873, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.0116685104052908e-05, + "loss": 0.7951, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.009380586012152e-05, + "loss": 0.7948, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.007092649342775e-05, + "loss": 0.7902, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.0048047033913475e-05, + "loss": 0.7946, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.002516751152074e-05, + "loss": 0.7934, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.000228795619164e-05, + "loss": 0.7947, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 1.9979408397868325e-05, + "loss": 0.7976, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 1.9956528866492944e-05, + "loss": 0.7934, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 1.9933649392007616e-05, + "loss": 0.7982, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 1.991077000435438e-05, + "loss": 0.7887, + "step": 14620 + }, + { + "epoch": 1.03, + "learning_rate": 1.988789073347517e-05, + "loss": 0.7873, + "step": 14630 + }, + { + "epoch": 1.03, + "learning_rate": 1.986501160931176e-05, + "loss": 0.7951, + "step": 14640 + }, + { + "epoch": 1.03, + "learning_rate": 1.984213266180574e-05, + "loss": 0.7933, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 1.981925392089845e-05, + "loss": 0.7912, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 1.979637541653097e-05, + "loss": 0.7907, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 1.9773497178644085e-05, + "loss": 0.791, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 1.9750619237178216e-05, + "loss": 0.7937, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 1.9727741622073393e-05, + "loss": 0.7979, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 1.9704864363269232e-05, + "loss": 0.7989, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 1.9681987490704875e-05, + "loss": 0.7939, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 1.9659111034318952e-05, + "loss": 0.7917, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 1.9636235024049562e-05, + "loss": 0.7988, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 1.961335948983421e-05, + "loss": 0.794, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 1.959048446160978e-05, + "loss": 0.7931, + "step": 14760 + }, + { + "epoch": 1.04, + "learning_rate": 1.9567609969312497e-05, + "loss": 0.7941, + "step": 14770 + }, + { + "epoch": 1.04, + "learning_rate": 1.9544736042877886e-05, + "loss": 0.7937, + "step": 14780 + }, + { + "epoch": 1.04, + "learning_rate": 1.9521862712240728e-05, + "loss": 0.7994, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 1.949899000733503e-05, + "loss": 0.7856, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 1.947611795809396e-05, + "loss": 0.7982, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 1.945324659444985e-05, + "loss": 0.7924, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 1.9430375946334134e-05, + "loss": 0.7956, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 1.9407506043677294e-05, + "loss": 0.7907, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 1.938463691640885e-05, + "loss": 0.7919, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 1.9361768594457305e-05, + "loss": 0.7909, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 1.93389011077501e-05, + "loss": 0.7928, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 1.9316034486213604e-05, + "loss": 0.7941, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 1.9293168759773015e-05, + "loss": 0.7874, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 1.9270303958352398e-05, + "loss": 0.7867, + "step": 14900 + }, + { + "epoch": 1.05, + "learning_rate": 1.9247440111874588e-05, + "loss": 0.7871, + "step": 14910 + }, + { + "epoch": 1.05, + "learning_rate": 1.922457725026118e-05, + "loss": 0.7918, + "step": 14920 + }, + { + "epoch": 1.05, + "learning_rate": 1.920171540343247e-05, + "loss": 0.7964, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 1.9178854601307433e-05, + "loss": 0.7904, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 1.915599487380369e-05, + "loss": 0.7914, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 1.913313625083741e-05, + "loss": 0.7967, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 1.9110278762323366e-05, + "loss": 0.8, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 1.9087422438174822e-05, + "loss": 0.7965, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 1.9064567308303527e-05, + "loss": 0.7912, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 1.9041713402619662e-05, + "loss": 0.7936, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 1.901886075103181e-05, + "loss": 0.791, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 1.899600938344691e-05, + "loss": 0.7937, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 1.8973159329770226e-05, + "loss": 0.797, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 1.8950310619905282e-05, + "loss": 0.7926, + "step": 15040 + }, + { + "epoch": 1.06, + "learning_rate": 1.8927463283753872e-05, + "loss": 0.7952, + "step": 15050 + }, + { + "epoch": 1.06, + "learning_rate": 1.890461735121597e-05, + "loss": 0.7869, + "step": 15060 + }, + { + "epoch": 1.06, + "learning_rate": 1.888177285218973e-05, + "loss": 0.7847, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 1.885892981657142e-05, + "loss": 0.7882, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 1.8836088274255395e-05, + "loss": 0.7913, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 1.8813248255134055e-05, + "loss": 0.7918, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 1.8790409789097815e-05, + "loss": 0.7948, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 1.8767572906035036e-05, + "loss": 0.7938, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 1.8744737635832036e-05, + "loss": 0.7946, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 1.8721904008372994e-05, + "loss": 0.7965, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 1.8699072053539962e-05, + "loss": 0.7981, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 1.8676241801212786e-05, + "loss": 0.8019, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 1.86534132812691e-05, + "loss": 0.7994, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 1.863058652358426e-05, + "loss": 0.7903, + "step": 15180 + }, + { + "epoch": 1.07, + "learning_rate": 1.8607761558031325e-05, + "loss": 0.7847, + "step": 15190 + }, + { + "epoch": 1.07, + "learning_rate": 1.8584938414480983e-05, + "loss": 0.7934, + "step": 15200 + }, + { + "epoch": 1.07, + "learning_rate": 1.8562117122801576e-05, + "loss": 0.8028, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 1.853929771285899e-05, + "loss": 0.7888, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 1.8516480214516674e-05, + "loss": 0.7946, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 1.8493664657635554e-05, + "loss": 0.7962, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 1.8470851072074026e-05, + "loss": 0.7919, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 1.8448039487687907e-05, + "loss": 0.7944, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 1.8425229934330386e-05, + "loss": 0.7953, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 1.8402422441852005e-05, + "loss": 0.7964, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 1.8379617040100602e-05, + "loss": 0.7941, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 1.8356813758921282e-05, + "loss": 0.7925, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 1.8334012628156378e-05, + "loss": 0.7921, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 1.83112136776454e-05, + "loss": 0.7871, + "step": 15320 + }, + { + "epoch": 1.08, + "learning_rate": 1.8288416937225015e-05, + "loss": 0.7935, + "step": 15330 + }, + { + "epoch": 1.08, + "learning_rate": 1.826562243672899e-05, + "loss": 0.798, + "step": 15340 + }, + { + "epoch": 1.08, + "learning_rate": 1.8242830205988157e-05, + "loss": 0.7912, + "step": 15350 + }, + { + "epoch": 1.08, + "learning_rate": 1.8220040274830385e-05, + "loss": 0.7991, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 1.819725267308054e-05, + "loss": 0.7856, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 1.8174467430560423e-05, + "loss": 0.8025, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 1.8151684577088762e-05, + "loss": 0.8004, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 1.812890414248115e-05, + "loss": 0.7877, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 1.8106126156550016e-05, + "loss": 0.7864, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 1.8083350649104597e-05, + "loss": 0.7816, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 1.8060577649950856e-05, + "loss": 0.7897, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 1.80378071888915e-05, + "loss": 0.7888, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 1.801503929572591e-05, + "loss": 0.7932, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 1.7992274000250098e-05, + "loss": 0.7967, + "step": 15460 + }, + { + "epoch": 1.09, + "learning_rate": 1.7969511332256688e-05, + "loss": 0.7956, + "step": 15470 + }, + { + "epoch": 1.09, + "learning_rate": 1.7946751321534857e-05, + "loss": 0.7927, + "step": 15480 + }, + { + "epoch": 1.09, + "learning_rate": 1.7923993997870312e-05, + "loss": 0.7871, + "step": 15490 + }, + { + "epoch": 1.09, + "learning_rate": 1.7901239391045226e-05, + "loss": 0.7951, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 1.7878487530838234e-05, + "loss": 0.7905, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 1.7855738447024372e-05, + "loss": 0.7808, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 1.783299216937504e-05, + "loss": 0.7829, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 1.781024872765797e-05, + "loss": 0.7944, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 1.778750815163718e-05, + "loss": 0.7938, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 1.7764770471072936e-05, + "loss": 0.7893, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 1.7742035715721725e-05, + "loss": 0.7819, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 1.771930391533618e-05, + "loss": 0.7923, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 1.7696575099665096e-05, + "loss": 0.7904, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 1.7673849298453347e-05, + "loss": 0.7904, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 1.7651126541441866e-05, + "loss": 0.7931, + "step": 15610 + }, + { + "epoch": 1.1, + "learning_rate": 1.76284068583676e-05, + "loss": 0.7949, + "step": 15620 + }, + { + "epoch": 1.1, + "learning_rate": 1.7605690278963473e-05, + "loss": 0.7868, + "step": 15630 + }, + { + "epoch": 1.1, + "learning_rate": 1.7582976832958355e-05, + "loss": 0.7861, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 1.7560266550077015e-05, + "loss": 0.7889, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 1.7537559460040054e-05, + "loss": 0.7905, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 1.7514855592563932e-05, + "loss": 0.7918, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 1.7492154977360875e-05, + "loss": 0.7877, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 1.746945764413885e-05, + "loss": 0.7881, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 1.744676362260154e-05, + "loss": 0.7921, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 1.7424072942448286e-05, + "loss": 0.7937, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 1.7401385633374065e-05, + "loss": 0.7783, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 1.7378701725069418e-05, + "loss": 0.7909, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 1.7356021247220462e-05, + "loss": 0.7827, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 1.7333344229508814e-05, + "loss": 0.7882, + "step": 15750 + }, + { + "epoch": 1.11, + "learning_rate": 1.7310670701611564e-05, + "loss": 0.7846, + "step": 15760 + }, + { + "epoch": 1.11, + "learning_rate": 1.7288000693201235e-05, + "loss": 0.7869, + "step": 15770 + }, + { + "epoch": 1.11, + "learning_rate": 1.726533423394574e-05, + "loss": 0.7937, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 1.724267135350836e-05, + "loss": 0.7895, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 1.7220012081547688e-05, + "loss": 0.7871, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 1.719735644771757e-05, + "loss": 0.7858, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 1.7174704481667123e-05, + "loss": 0.7838, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 1.715205621304065e-05, + "loss": 0.7846, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 1.712941167147762e-05, + "loss": 0.7829, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 1.7106770886612616e-05, + "loss": 0.7894, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 1.7084133888075317e-05, + "loss": 0.7889, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 1.7061500705490438e-05, + "loss": 0.7868, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 1.7038871368477707e-05, + "loss": 0.7835, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 1.7016245906651806e-05, + "loss": 0.7901, + "step": 15890 + }, + { + "epoch": 1.12, + "learning_rate": 1.6993624349622365e-05, + "loss": 0.7892, + "step": 15900 + }, + { + "epoch": 1.12, + "learning_rate": 1.6971006726993878e-05, + "loss": 0.7878, + "step": 15910 + }, + { + "epoch": 1.12, + "learning_rate": 1.694839306836572e-05, + "loss": 0.7886, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 1.692578340333205e-05, + "loss": 0.7834, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 1.690317776148183e-05, + "loss": 0.7907, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.6880576172398733e-05, + "loss": 0.7897, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.6857978665661137e-05, + "loss": 0.7907, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.6835385270842075e-05, + "loss": 0.7827, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.6812796017509203e-05, + "loss": 0.7916, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.6790210935224752e-05, + "loss": 0.7851, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.676763005354551e-05, + "loss": 0.7883, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.6745053402022736e-05, + "loss": 0.7907, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.6722481010202182e-05, + "loss": 0.7848, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.6699912907624018e-05, + "loss": 0.7879, + "step": 16030 + }, + { + "epoch": 1.13, + "learning_rate": 1.667734912382279e-05, + "loss": 0.7878, + "step": 16040 + }, + { + "epoch": 1.13, + "learning_rate": 1.665478968832741e-05, + "loss": 0.7869, + "step": 16050 + }, + { + "epoch": 1.13, + "learning_rate": 1.663223463066108e-05, + "loss": 0.7861, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.6609683980341288e-05, + "loss": 0.7858, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.6587137766879748e-05, + "loss": 0.7858, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.6564596019782373e-05, + "loss": 0.795, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.6542058768549216e-05, + "loss": 0.7886, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.6519526042674466e-05, + "loss": 0.7834, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.649699787164637e-05, + "loss": 0.7832, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.6474474284947232e-05, + "loss": 0.7863, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.6451955312053346e-05, + "loss": 0.7867, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.642944098243497e-05, + "loss": 0.7882, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.640693132555629e-05, + "loss": 0.7855, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.638442637087537e-05, + "loss": 0.7836, + "step": 16170 + }, + { + "epoch": 1.14, + "learning_rate": 1.6361926147844137e-05, + "loss": 0.7834, + "step": 16180 + }, + { + "epoch": 1.14, + "learning_rate": 1.6339430685908287e-05, + "loss": 0.7902, + "step": 16190 + }, + { + "epoch": 1.14, + "learning_rate": 1.6316940014507325e-05, + "loss": 0.7874, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.629445416307447e-05, + "loss": 0.7877, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.6271973161036636e-05, + "loss": 0.7844, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.624949703781439e-05, + "loss": 0.7867, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.622702582282191e-05, + "loss": 0.7911, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.6204559545466963e-05, + "loss": 0.7912, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.6182098235150847e-05, + "loss": 0.7861, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.6159641921268347e-05, + "loss": 0.786, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.613719063320772e-05, + "loss": 0.7801, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.611474440035066e-05, + "loss": 0.7904, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.6092303252072224e-05, + "loss": 0.781, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.6069867217740824e-05, + "loss": 0.791, + "step": 16310 + }, + { + "epoch": 1.15, + "learning_rate": 1.604743632671818e-05, + "loss": 0.7813, + "step": 16320 + }, + { + "epoch": 1.15, + "learning_rate": 1.6025010608359277e-05, + "loss": 0.7837, + "step": 16330 + }, + { + "epoch": 1.15, + "learning_rate": 1.6002590092012343e-05, + "loss": 0.7894, + "step": 16340 + }, + { + "epoch": 1.15, + "learning_rate": 1.598017480701877e-05, + "loss": 0.7827, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.595776478271313e-05, + "loss": 0.7891, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.593536004842311e-05, + "loss": 0.7884, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.5912960633469455e-05, + "loss": 0.7927, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.5890566567165967e-05, + "loss": 0.7881, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.5868177878819436e-05, + "loss": 0.7984, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.5845794597729636e-05, + "loss": 0.8095, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.5823416753189224e-05, + "loss": 0.7979, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.5801044374483776e-05, + "loss": 0.7975, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.5778677490891706e-05, + "loss": 0.7938, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.575631613168424e-05, + "loss": 0.7905, + "step": 16450 + }, + { + "epoch": 1.16, + "learning_rate": 1.5733960326125363e-05, + "loss": 0.7916, + "step": 16460 + }, + { + "epoch": 1.16, + "learning_rate": 1.57116101034718e-05, + "loss": 0.7958, + "step": 16470 + }, + { + "epoch": 1.16, + "learning_rate": 1.5689265492972975e-05, + "loss": 0.7927, + "step": 16480 + }, + { + "epoch": 1.16, + "learning_rate": 1.5666926523870964e-05, + "loss": 0.7879, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.5644593225400445e-05, + "loss": 0.7854, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.562226562678869e-05, + "loss": 0.7952, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.559994375725551e-05, + "loss": 0.7925, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.5577627646013223e-05, + "loss": 0.7793, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.55553173222666e-05, + "loss": 0.7753, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.5533012815212848e-05, + "loss": 0.7837, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.5510714154041553e-05, + "loss": 0.7857, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.5488421367934666e-05, + "loss": 0.7915, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.5466134486066425e-05, + "loss": 0.7869, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.5443853537603356e-05, + "loss": 0.789, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.5421578551704222e-05, + "loss": 0.7842, + "step": 16600 + }, + { + "epoch": 1.17, + "learning_rate": 1.539930955751998e-05, + "loss": 0.7865, + "step": 16610 + }, + { + "epoch": 1.17, + "learning_rate": 1.537704658419375e-05, + "loss": 0.7831, + "step": 16620 + }, + { + "epoch": 1.17, + "learning_rate": 1.5354789660860758e-05, + "loss": 0.7936, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.5332538816648327e-05, + "loss": 0.7854, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.5310294080675828e-05, + "loss": 0.7841, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.528805548205462e-05, + "loss": 0.7887, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.5265823049888037e-05, + "loss": 0.7822, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.5243596813271349e-05, + "loss": 0.7826, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.5221376801291719e-05, + "loss": 0.7818, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.5199163043028158e-05, + "loss": 0.7868, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.5176955567551495e-05, + "loss": 0.788, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.515475440392434e-05, + "loss": 0.7819, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.5132559581201031e-05, + "loss": 0.7826, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.5110371128427623e-05, + "loss": 0.7797, + "step": 16740 + }, + { + "epoch": 1.18, + "learning_rate": 1.5088189074641826e-05, + "loss": 0.7831, + "step": 16750 + }, + { + "epoch": 1.18, + "learning_rate": 1.5066013448872981e-05, + "loss": 0.7848, + "step": 16760 + }, + { + "epoch": 1.18, + "learning_rate": 1.5043844280142005e-05, + "loss": 0.777, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.502168159746138e-05, + "loss": 0.786, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.499952542983509e-05, + "loss": 0.7905, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.4977375806258599e-05, + "loss": 0.7827, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.4955232755718795e-05, + "loss": 0.7797, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.4933096307193986e-05, + "loss": 0.7855, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.4910966489653814e-05, + "loss": 0.7775, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.4888843332059267e-05, + "loss": 0.7747, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.4866726863362595e-05, + "loss": 0.7908, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.4844617112507317e-05, + "loss": 0.7838, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.482251410842814e-05, + "loss": 0.7815, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.4800417880050955e-05, + "loss": 0.777, + "step": 16880 + }, + { + "epoch": 1.19, + "learning_rate": 1.4778328456292776e-05, + "loss": 0.7814, + "step": 16890 + }, + { + "epoch": 1.19, + "learning_rate": 1.475624586606172e-05, + "loss": 0.7774, + "step": 16900 + }, + { + "epoch": 1.19, + "learning_rate": 1.4734170138256958e-05, + "loss": 0.776, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.4712101301768681e-05, + "loss": 0.7724, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.469003938547806e-05, + "loss": 0.7892, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.4667984418257211e-05, + "loss": 0.7826, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.4645936428969165e-05, + "loss": 0.7769, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.4623895446467789e-05, + "loss": 0.7726, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.4601861499597818e-05, + "loss": 0.7739, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.4579834617194762e-05, + "loss": 0.7842, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.4557814828084884e-05, + "loss": 0.78, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.4535802161085175e-05, + "loss": 0.7809, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.4513796645003293e-05, + "loss": 0.7807, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.4491798308637544e-05, + "loss": 0.7799, + "step": 17020 + }, + { + "epoch": 1.2, + "learning_rate": 1.4469807180776849e-05, + "loss": 0.7815, + "step": 17030 + }, + { + "epoch": 1.2, + "learning_rate": 1.4447823290200664e-05, + "loss": 0.7719, + "step": 17040 + }, + { + "epoch": 1.2, + "learning_rate": 1.4425846665679001e-05, + "loss": 0.7802, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.4403877335972359e-05, + "loss": 0.7795, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.4381915329831682e-05, + "loss": 0.7861, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.4359960675998337e-05, + "loss": 0.7819, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.4338013403204067e-05, + "loss": 0.7751, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.4316073540170953e-05, + "loss": 0.788, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.4294141115611393e-05, + "loss": 0.7822, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.4272216158228017e-05, + "loss": 0.7749, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.4250298696713716e-05, + "loss": 0.7748, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.4228388759751557e-05, + "loss": 0.7839, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.4206486376014765e-05, + "loss": 0.7822, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.4184591574166677e-05, + "loss": 0.787, + "step": 17160 + }, + { + "epoch": 1.21, + "learning_rate": 1.4162704382860707e-05, + "loss": 0.7797, + "step": 17170 + }, + { + "epoch": 1.21, + "learning_rate": 1.4140824830740317e-05, + "loss": 0.7831, + "step": 17180 + }, + { + "epoch": 1.21, + "learning_rate": 1.411895294643895e-05, + "loss": 0.7822, + "step": 17190 + }, + { + "epoch": 1.21, + "learning_rate": 1.4097088758580037e-05, + "loss": 0.7835, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.4075232295776932e-05, + "loss": 0.7766, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.4053383586632871e-05, + "loss": 0.7819, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.4031542659740955e-05, + "loss": 0.7788, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.400970954368409e-05, + "loss": 0.7751, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.3987884267034961e-05, + "loss": 0.7889, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.396606685835601e-05, + "loss": 0.7736, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.3944257346199347e-05, + "loss": 0.7837, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.3922455759106783e-05, + "loss": 0.7826, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.3900662125609738e-05, + "loss": 0.7753, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.3878876474229239e-05, + "loss": 0.7858, + "step": 17300 + }, + { + "epoch": 1.22, + "learning_rate": 1.3857098833475849e-05, + "loss": 0.7784, + "step": 17310 + }, + { + "epoch": 1.22, + "learning_rate": 1.383532923184966e-05, + "loss": 0.7766, + "step": 17320 + }, + { + "epoch": 1.22, + "learning_rate": 1.3813567697840237e-05, + "loss": 0.7798, + "step": 17330 + }, + { + "epoch": 1.22, + "learning_rate": 1.37918142599266e-05, + "loss": 0.7743, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.3770068946577147e-05, + "loss": 0.7714, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.3748331786249665e-05, + "loss": 0.7757, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.3726602807391267e-05, + "loss": 0.7759, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.3704882038438361e-05, + "loss": 0.7728, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.3683169507816603e-05, + "loss": 0.7767, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.3661465243940875e-05, + "loss": 0.7778, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.3639769275215238e-05, + "loss": 0.7761, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.3618081630032895e-05, + "loss": 0.7779, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.3596402336776164e-05, + "loss": 0.7819, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.3574731423816419e-05, + "loss": 0.7729, + "step": 17440 + }, + { + "epoch": 1.23, + "learning_rate": 1.3553068919514076e-05, + "loss": 0.7782, + "step": 17450 + }, + { + "epoch": 1.23, + "learning_rate": 1.353141485221855e-05, + "loss": 0.7793, + "step": 17460 + }, + { + "epoch": 1.23, + "learning_rate": 1.3509769250268208e-05, + "loss": 0.779, + "step": 17470 + }, + { + "epoch": 1.23, + "learning_rate": 1.348813214199034e-05, + "loss": 0.7814, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.3466503555701126e-05, + "loss": 0.7801, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.3444883519705583e-05, + "loss": 0.7762, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.3423272062297543e-05, + "loss": 0.7824, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.3401669211759615e-05, + "loss": 0.7857, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.3380074996363148e-05, + "loss": 0.7791, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.3358489444368173e-05, + "loss": 0.7753, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.3336912584023395e-05, + "loss": 0.776, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.3315344443566152e-05, + "loss": 0.7805, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.329378505122236e-05, + "loss": 0.779, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.327223443520648e-05, + "loss": 0.7764, + "step": 17580 + }, + { + "epoch": 1.24, + "learning_rate": 1.3250692623721504e-05, + "loss": 0.7822, + "step": 17590 + }, + { + "epoch": 1.24, + "learning_rate": 1.322915964495889e-05, + "loss": 0.7789, + "step": 17600 + }, + { + "epoch": 1.24, + "learning_rate": 1.3207635527098543e-05, + "loss": 0.7817, + "step": 17610 + }, + { + "epoch": 1.24, + "learning_rate": 1.3186120298308767e-05, + "loss": 0.7808, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.316461398674624e-05, + "loss": 0.7765, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.3143116620555963e-05, + "loss": 0.7795, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.3121628227871227e-05, + "loss": 0.7727, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.3100148836813591e-05, + "loss": 0.7779, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.3078678475492823e-05, + "loss": 0.7786, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.3057217172006887e-05, + "loss": 0.7838, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.3035764954441876e-05, + "loss": 0.778, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.301432185087201e-05, + "loss": 0.7826, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.2992887889359565e-05, + "loss": 0.7774, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.2971463097954874e-05, + "loss": 0.7824, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.295004750469624e-05, + "loss": 0.7718, + "step": 17730 + }, + { + "epoch": 1.25, + "learning_rate": 1.2928641137609952e-05, + "loss": 0.776, + "step": 17740 + }, + { + "epoch": 1.25, + "learning_rate": 1.2907244024710215e-05, + "loss": 0.783, + "step": 17750 + }, + { + "epoch": 1.25, + "learning_rate": 1.2885856193999128e-05, + "loss": 0.788, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.286447767346664e-05, + "loss": 0.7777, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.2843108491090517e-05, + "loss": 0.7823, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.2821748674836303e-05, + "loss": 0.7774, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.2800398252657292e-05, + "loss": 0.781, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.277905725249446e-05, + "loss": 0.7709, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.275772570227648e-05, + "loss": 0.774, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.273640362991965e-05, + "loss": 0.7757, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.2715091063327854e-05, + "loss": 0.7785, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.2693788030392553e-05, + "loss": 0.7691, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.2672494558992719e-05, + "loss": 0.7802, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.2651210676994821e-05, + "loss": 0.7814, + "step": 17870 + }, + { + "epoch": 1.26, + "learning_rate": 1.262993641225276e-05, + "loss": 0.7712, + "step": 17880 + }, + { + "epoch": 1.26, + "learning_rate": 1.2608671792607866e-05, + "loss": 0.7732, + "step": 17890 + }, + { + "epoch": 1.26, + "learning_rate": 1.2587416845888844e-05, + "loss": 0.7754, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.2566171599911748e-05, + "loss": 0.7761, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.2544936082479917e-05, + "loss": 0.7728, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.252371032138398e-05, + "loss": 0.7741, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.2502494344401783e-05, + "loss": 0.7775, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.2481288179298378e-05, + "loss": 0.7684, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.2460091853825962e-05, + "loss": 0.7746, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.2438905395723867e-05, + "loss": 0.7686, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.241772883271851e-05, + "loss": 0.7773, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.2396562192523356e-05, + "loss": 0.769, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.2375405502838889e-05, + "loss": 0.7747, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.2354258791352562e-05, + "loss": 0.7804, + "step": 18010 + }, + { + "epoch": 1.27, + "learning_rate": 1.233312208573878e-05, + "loss": 0.774, + "step": 18020 + }, + { + "epoch": 1.27, + "learning_rate": 1.2311995413658853e-05, + "loss": 0.7788, + "step": 18030 + }, + { + "epoch": 1.27, + "learning_rate": 1.2290878802760942e-05, + "loss": 0.775, + "step": 18040 + }, + { + "epoch": 1.27, + "learning_rate": 1.2269772280680058e-05, + "loss": 0.7803, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.2248675875038015e-05, + "loss": 0.7619, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.2227589613443367e-05, + "loss": 0.7816, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.2206513523491413e-05, + "loss": 0.7729, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.2185447632764124e-05, + "loss": 0.7765, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.2164391968830138e-05, + "loss": 0.7675, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.2143346559244705e-05, + "loss": 0.7817, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.212231143154964e-05, + "loss": 0.7745, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.2101286613273322e-05, + "loss": 0.7725, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.2080272131930628e-05, + "loss": 0.7739, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.2059268015022916e-05, + "loss": 0.7725, + "step": 18150 + }, + { + "epoch": 1.28, + "learning_rate": 1.2038274290037971e-05, + "loss": 0.763, + "step": 18160 + }, + { + "epoch": 1.28, + "learning_rate": 1.2017290984449979e-05, + "loss": 0.7796, + "step": 18170 + }, + { + "epoch": 1.28, + "learning_rate": 1.19963181257195e-05, + "loss": 0.7732, + "step": 18180 + }, + { + "epoch": 1.28, + "learning_rate": 1.1975355741293412e-05, + "loss": 0.7786, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.1954403858604885e-05, + "loss": 0.7692, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.193346250507335e-05, + "loss": 0.773, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.191253170810446e-05, + "loss": 0.7784, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.1891611495090051e-05, + "loss": 0.7644, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.1870701893408105e-05, + "loss": 0.778, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.1849802930422726e-05, + "loss": 0.771, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.1828914633484087e-05, + "loss": 0.7774, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.1808037029928397e-05, + "loss": 0.77, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.1787170147077884e-05, + "loss": 0.769, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.1766314012240745e-05, + "loss": 0.7719, + "step": 18290 + }, + { + "epoch": 1.29, + "learning_rate": 1.1745468652711094e-05, + "loss": 0.7754, + "step": 18300 + }, + { + "epoch": 1.29, + "learning_rate": 1.1724634095768965e-05, + "loss": 0.7764, + "step": 18310 + }, + { + "epoch": 1.29, + "learning_rate": 1.1703810368680244e-05, + "loss": 0.7751, + "step": 18320 + }, + { + "epoch": 1.29, + "learning_rate": 1.1682997498696645e-05, + "loss": 0.78, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.1662195513055679e-05, + "loss": 0.7641, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.1641404438980604e-05, + "loss": 0.7689, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.1620624303680393e-05, + "loss": 0.7726, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.1599855134349731e-05, + "loss": 0.7788, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.1579096958168914e-05, + "loss": 0.7699, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.1558349802303895e-05, + "loss": 0.7722, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.1537613693906158e-05, + "loss": 0.7759, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.151688866011278e-05, + "loss": 0.7749, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.1496174728046281e-05, + "loss": 0.7713, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.1475471924814717e-05, + "loss": 0.7802, + "step": 18430 + }, + { + "epoch": 1.3, + "learning_rate": 1.1454780277511537e-05, + "loss": 0.7752, + "step": 18440 + }, + { + "epoch": 1.3, + "learning_rate": 1.1434099813215616e-05, + "loss": 0.7708, + "step": 18450 + }, + { + "epoch": 1.3, + "learning_rate": 1.1413430558991169e-05, + "loss": 0.7765, + "step": 18460 + }, + { + "epoch": 1.3, + "learning_rate": 1.1392772541887772e-05, + "loss": 0.769, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.1372125788940257e-05, + "loss": 0.7721, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.1351490327168765e-05, + "loss": 0.7872, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.1330866183578596e-05, + "loss": 0.7731, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.1310253385160296e-05, + "loss": 0.7751, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.1289651958889527e-05, + "loss": 0.7734, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.1269061931727094e-05, + "loss": 0.7737, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.1248483330618858e-05, + "loss": 0.779, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.122791618249576e-05, + "loss": 0.7709, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.1207360514273714e-05, + "loss": 0.7696, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.1186816352853644e-05, + "loss": 0.7815, + "step": 18570 + }, + { + "epoch": 1.31, + "learning_rate": 1.1166283725121398e-05, + "loss": 0.7693, + "step": 18580 + }, + { + "epoch": 1.31, + "learning_rate": 1.114576265794773e-05, + "loss": 0.7704, + "step": 18590 + }, + { + "epoch": 1.31, + "learning_rate": 1.1125253178188263e-05, + "loss": 0.7757, + "step": 18600 + }, + { + "epoch": 1.31, + "learning_rate": 1.1104755312683476e-05, + "loss": 0.7717, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.1084269088258614e-05, + "loss": 0.7692, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.1063794531723729e-05, + "loss": 0.7643, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.1043331669873566e-05, + "loss": 0.7724, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.1022880529487581e-05, + "loss": 0.7716, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.1002441137329898e-05, + "loss": 0.7751, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.0982013520149248e-05, + "loss": 0.7748, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.0961597704678974e-05, + "loss": 0.7717, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.094119371763696e-05, + "loss": 0.7685, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.0920801585725597e-05, + "loss": 0.7703, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.0900421335631802e-05, + "loss": 0.7715, + "step": 18710 + }, + { + "epoch": 1.32, + "learning_rate": 1.0880052994026906e-05, + "loss": 0.762, + "step": 18720 + }, + { + "epoch": 1.32, + "learning_rate": 1.0859696587566657e-05, + "loss": 0.7681, + "step": 18730 + }, + { + "epoch": 1.32, + "learning_rate": 1.0839352142891214e-05, + "loss": 0.771, + "step": 18740 + }, + { + "epoch": 1.32, + "learning_rate": 1.0819019686625046e-05, + "loss": 0.7667, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.0798699245376959e-05, + "loss": 0.7657, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.0778390845740017e-05, + "loss": 0.7642, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.0758094514291541e-05, + "loss": 0.7644, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.0737810277593046e-05, + "loss": 0.7765, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.0717538162190227e-05, + "loss": 0.7701, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.0697278194612899e-05, + "loss": 0.767, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.0677030401375012e-05, + "loss": 0.7637, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.065679480897455e-05, + "loss": 0.7754, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.0636571443893555e-05, + "loss": 0.7745, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.0616360332598044e-05, + "loss": 0.7705, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.0596161501538027e-05, + "loss": 0.7735, + "step": 18860 + }, + { + "epoch": 1.33, + "learning_rate": 1.0575974977147419e-05, + "loss": 0.7734, + "step": 18870 + }, + { + "epoch": 1.33, + "learning_rate": 1.055580078584403e-05, + "loss": 0.7665, + "step": 18880 + }, + { + "epoch": 1.33, + "learning_rate": 1.0535638954029536e-05, + "loss": 0.7688, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.0515489508089453e-05, + "loss": 0.7694, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.0495352474393057e-05, + "loss": 0.7779, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.0475227879293413e-05, + "loss": 0.7756, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.0455115749127276e-05, + "loss": 0.7666, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.0435016110215122e-05, + "loss": 0.7694, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.0414928988861051e-05, + "loss": 0.7677, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.0394854411352791e-05, + "loss": 0.7609, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.0374792403961652e-05, + "loss": 0.7709, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.035474299294251e-05, + "loss": 0.7764, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.033470620453373e-05, + "loss": 0.7632, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.0314682064957182e-05, + "loss": 0.7778, + "step": 19000 + }, + { + "epoch": 1.34, + "learning_rate": 1.0294670600418164e-05, + "loss": 0.7681, + "step": 19010 + }, + { + "epoch": 1.34, + "learning_rate": 1.0274671837105403e-05, + "loss": 0.7676, + "step": 19020 + }, + { + "epoch": 1.34, + "learning_rate": 1.0254685801190993e-05, + "loss": 0.7673, + "step": 19030 + }, + { + "epoch": 1.34, + "learning_rate": 1.023471251883037e-05, + "loss": 0.7622, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.0214752016162281e-05, + "loss": 0.7703, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.0194804319308762e-05, + "loss": 0.7665, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.017486945437507e-05, + "loss": 0.7621, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.0154947447449686e-05, + "loss": 0.7698, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.0135038324604248e-05, + "loss": 0.7778, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.011514211189356e-05, + "loss": 0.7713, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.0095258835355482e-05, + "loss": 0.767, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.0075388521010998e-05, + "loss": 0.7692, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.0055531194864092e-05, + "loss": 0.7625, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.0035686882901778e-05, + "loss": 0.7691, + "step": 19140 + }, + { + "epoch": 1.35, + "learning_rate": 1.0015855611094007e-05, + "loss": 0.7696, + "step": 19150 + }, + { + "epoch": 1.35, + "learning_rate": 9.996037405393702e-06, + "loss": 0.7806, + "step": 19160 + }, + { + "epoch": 1.35, + "learning_rate": 9.976232291736645e-06, + "loss": 0.7691, + "step": 19170 + }, + { + "epoch": 1.35, + "learning_rate": 9.956440296041531e-06, + "loss": 0.7702, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 9.936661444209832e-06, + "loss": 0.7712, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 9.916895762125873e-06, + "loss": 0.7694, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 9.897143275656706e-06, + "loss": 0.7669, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 9.877404010652143e-06, + "loss": 0.7648, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 9.857677992944663e-06, + "loss": 0.7595, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 9.837965248349439e-06, + "loss": 0.7695, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 9.818265802664245e-06, + "loss": 0.76, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 9.798579681669484e-06, + "loss": 0.7691, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 9.778906911128078e-06, + "loss": 0.7775, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 9.75924751678552e-06, + "loss": 0.7691, + "step": 19280 + }, + { + "epoch": 1.36, + "learning_rate": 9.739601524369765e-06, + "loss": 0.7622, + "step": 19290 + }, + { + "epoch": 1.36, + "learning_rate": 9.719968959591257e-06, + "loss": 0.76, + "step": 19300 + }, + { + "epoch": 1.36, + "learning_rate": 9.70034984814284e-06, + "loss": 0.759, + "step": 19310 + }, + { + "epoch": 1.36, + "learning_rate": 9.68074421569978e-06, + "loss": 0.7698, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 9.661152087919682e-06, + "loss": 0.7735, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 9.641573490442484e-06, + "loss": 0.7696, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 9.62200844889041e-06, + "loss": 0.7689, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 9.602456988867965e-06, + "loss": 0.7662, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 9.582919135961852e-06, + "loss": 0.7684, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 9.563394915740996e-06, + "loss": 0.7652, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 9.54388435375645e-06, + "loss": 0.769, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 9.524387475541423e-06, + "loss": 0.7684, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 9.504904306611196e-06, + "loss": 0.7692, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 9.485434872463104e-06, + "loss": 0.7589, + "step": 19420 + }, + { + "epoch": 1.37, + "learning_rate": 9.465979198576535e-06, + "loss": 0.7709, + "step": 19430 + }, + { + "epoch": 1.37, + "learning_rate": 9.446537310412845e-06, + "loss": 0.7696, + "step": 19440 + }, + { + "epoch": 1.37, + "learning_rate": 9.427109233415348e-06, + "loss": 0.7601, + "step": 19450 + }, + { + "epoch": 1.37, + "learning_rate": 9.407694993009304e-06, + "loss": 0.7661, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 9.388294614601843e-06, + "loss": 0.7671, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 9.368908123581974e-06, + "loss": 0.7629, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 9.349535545320516e-06, + "loss": 0.7619, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 9.330176905170077e-06, + "loss": 0.7654, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 9.31083222846505e-06, + "loss": 0.7666, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 9.291501540521524e-06, + "loss": 0.7641, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 9.272184866637307e-06, + "loss": 0.7625, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 9.252882232091847e-06, + "loss": 0.7728, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 9.23359366214622e-06, + "loss": 0.7611, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 9.21431918204312e-06, + "loss": 0.7709, + "step": 19560 + }, + { + "epoch": 1.38, + "learning_rate": 9.195058817006772e-06, + "loss": 0.7546, + "step": 19570 + }, + { + "epoch": 1.38, + "learning_rate": 9.175812592242939e-06, + "loss": 0.7669, + "step": 19580 + }, + { + "epoch": 1.38, + "learning_rate": 9.156580532938895e-06, + "loss": 0.7693, + "step": 19590 + }, + { + "epoch": 1.38, + "learning_rate": 9.137362664263343e-06, + "loss": 0.7698, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 9.118159011366452e-06, + "loss": 0.7722, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 9.098969599379754e-06, + "loss": 0.7673, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 9.079794453416174e-06, + "loss": 0.7636, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 9.060633598569942e-06, + "loss": 0.7657, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 9.041487059916596e-06, + "loss": 0.7651, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 9.022354862512935e-06, + "loss": 0.7616, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 9.003237031397002e-06, + "loss": 0.7649, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 8.984133591588022e-06, + "loss": 0.7676, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 8.965044568086405e-06, + "loss": 0.7708, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 8.94596998587367e-06, + "loss": 0.7642, + "step": 19700 + }, + { + "epoch": 1.39, + "learning_rate": 8.926909869912463e-06, + "loss": 0.7674, + "step": 19710 + }, + { + "epoch": 1.39, + "learning_rate": 8.907864245146485e-06, + "loss": 0.7593, + "step": 19720 + }, + { + "epoch": 1.39, + "learning_rate": 8.888833136500468e-06, + "loss": 0.7691, + "step": 19730 + }, + { + "epoch": 1.39, + "learning_rate": 8.869816568880144e-06, + "loss": 0.7614, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 8.850814567172245e-06, + "loss": 0.7645, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 8.831827156244403e-06, + "loss": 0.7625, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 8.812854360945185e-06, + "loss": 0.7614, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 8.793896206104002e-06, + "loss": 0.7694, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 8.77495271653114e-06, + "loss": 0.7626, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 8.756023917017662e-06, + "loss": 0.7673, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 8.737109832335419e-06, + "loss": 0.763, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 8.718210487237e-06, + "loss": 0.7682, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 8.69932590645572e-06, + "loss": 0.7612, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 8.680456114705546e-06, + "loss": 0.7552, + "step": 19840 + }, + { + "epoch": 1.4, + "learning_rate": 8.661601136681124e-06, + "loss": 0.7688, + "step": 19850 + }, + { + "epoch": 1.4, + "learning_rate": 8.642760997057675e-06, + "loss": 0.7632, + "step": 19860 + }, + { + "epoch": 1.4, + "learning_rate": 8.623935720491046e-06, + "loss": 0.7629, + "step": 19870 + }, + { + "epoch": 1.4, + "learning_rate": 8.605125331617578e-06, + "loss": 0.7661, + "step": 19880 + }, + { + "epoch": 1.4, + "learning_rate": 8.586329855054184e-06, + "loss": 0.7627, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 8.567549315398216e-06, + "loss": 0.763, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 8.548783737227518e-06, + "loss": 0.7632, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 8.530033145100318e-06, + "loss": 0.7639, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 8.511297563555263e-06, + "loss": 0.7601, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 8.492577017111327e-06, + "loss": 0.7529, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 8.473871530267846e-06, + "loss": 0.7673, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 8.455181127504391e-06, + "loss": 0.7625, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 8.43650583328085e-06, + "loss": 0.764, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 8.417845672037301e-06, + "loss": 0.7681, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 8.399200668194045e-06, + "loss": 0.7608, + "step": 19990 + }, + { + "epoch": 1.41, + "learning_rate": 8.380570846151517e-06, + "loss": 0.7686, + "step": 20000 + }, + { + "epoch": 1.41, + "learning_rate": 8.361956230290313e-06, + "loss": 0.7665, + "step": 20010 + }, + { + "epoch": 1.41, + "learning_rate": 8.343356844971105e-06, + "loss": 0.7683, + "step": 20020 + }, + { + "epoch": 1.41, + "learning_rate": 8.324772714534662e-06, + "loss": 0.7626, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 8.306203863301743e-06, + "loss": 0.7639, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 8.28765031557316e-06, + "loss": 0.7715, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 8.269112095629662e-06, + "loss": 0.7636, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 8.250589227731967e-06, + "loss": 0.7688, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 8.232081736120676e-06, + "loss": 0.7656, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 8.213589645016291e-06, + "loss": 0.7715, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 8.195112978619145e-06, + "loss": 0.7648, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 8.176651761109381e-06, + "loss": 0.7658, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 8.15820601664693e-06, + "loss": 0.7699, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 8.13977576937149e-06, + "loss": 0.7604, + "step": 20130 + }, + { + "epoch": 1.42, + "learning_rate": 8.121361043402442e-06, + "loss": 0.7697, + "step": 20140 + }, + { + "epoch": 1.42, + "learning_rate": 8.102961862838899e-06, + "loss": 0.7677, + "step": 20150 + }, + { + "epoch": 1.42, + "learning_rate": 8.084578251759583e-06, + "loss": 0.7661, + "step": 20160 + }, + { + "epoch": 1.42, + "learning_rate": 8.066210234222882e-06, + "loss": 0.7636, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 8.047857834266755e-06, + "loss": 0.7559, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 8.029521075908713e-06, + "loss": 0.7571, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 8.011199983145827e-06, + "loss": 0.7604, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 7.992894579954644e-06, + "loss": 0.7662, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 7.974604890291175e-06, + "loss": 0.7659, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 7.956330938090892e-06, + "loss": 0.7634, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 7.938072747268644e-06, + "loss": 0.7661, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 7.919830341718673e-06, + "loss": 0.7678, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 7.901603745314552e-06, + "loss": 0.7629, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 7.88339298190916e-06, + "loss": 0.763, + "step": 20270 + }, + { + "epoch": 1.43, + "learning_rate": 7.865198075334682e-06, + "loss": 0.7637, + "step": 20280 + }, + { + "epoch": 1.43, + "learning_rate": 7.84701904940251e-06, + "loss": 0.758, + "step": 20290 + }, + { + "epoch": 1.43, + "learning_rate": 7.828855927903298e-06, + "loss": 0.7609, + "step": 20300 + }, + { + "epoch": 1.43, + "learning_rate": 7.810708734606854e-06, + "loss": 0.7663, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 7.792577493262143e-06, + "loss": 0.7644, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 7.774462227597278e-06, + "loss": 0.7542, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 7.756362961319442e-06, + "loss": 0.7616, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 7.738279718114878e-06, + "loss": 0.7644, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 7.720212521648885e-06, + "loss": 0.7574, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 7.70216139556573e-06, + "loss": 0.7637, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 7.68412636348868e-06, + "loss": 0.7657, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 7.666107449019909e-06, + "loss": 0.7583, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 7.648104675740527e-06, + "loss": 0.7612, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 7.630118067210498e-06, + "loss": 0.7544, + "step": 20410 + }, + { + "epoch": 1.44, + "learning_rate": 7.612147646968646e-06, + "loss": 0.7626, + "step": 20420 + }, + { + "epoch": 1.44, + "learning_rate": 7.594193438532596e-06, + "loss": 0.7672, + "step": 20430 + }, + { + "epoch": 1.44, + "learning_rate": 7.576255465398772e-06, + "loss": 0.756, + "step": 20440 + }, + { + "epoch": 1.44, + "learning_rate": 7.558333751042335e-06, + "loss": 0.7611, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 7.540428318917192e-06, + "loss": 0.7595, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 7.522539192455909e-06, + "loss": 0.7578, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 7.504666395069749e-06, + "loss": 0.7634, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 7.486809950148575e-06, + "loss": 0.7602, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 7.468969881060868e-06, + "loss": 0.7594, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 7.451146211153659e-06, + "loss": 0.7609, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 7.43333896375255e-06, + "loss": 0.7702, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 7.415548162161617e-06, + "loss": 0.7633, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 7.39777382966344e-06, + "loss": 0.7584, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 7.38001598951902e-06, + "loss": 0.7572, + "step": 20550 + }, + { + "epoch": 1.45, + "learning_rate": 7.362274664967814e-06, + "loss": 0.7625, + "step": 20560 + }, + { + "epoch": 1.45, + "learning_rate": 7.3445498792276095e-06, + "loss": 0.7584, + "step": 20570 + }, + { + "epoch": 1.45, + "learning_rate": 7.326841655494605e-06, + "loss": 0.7545, + "step": 20580 + }, + { + "epoch": 1.45, + "learning_rate": 7.309150016943282e-06, + "loss": 0.7573, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 7.2914749867264545e-06, + "loss": 0.7566, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 7.273816587975167e-06, + "loss": 0.7626, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 7.256174843798727e-06, + "loss": 0.7629, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 7.238549777284618e-06, + "loss": 0.7531, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 7.220941411498539e-06, + "loss": 0.7574, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 7.203349769484278e-06, + "loss": 0.7552, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 7.185774874263785e-06, + "loss": 0.7556, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 7.16821674883706e-06, + "loss": 0.755, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 7.1506754161821915e-06, + "loss": 0.7573, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 7.1331508992552504e-06, + "loss": 0.7631, + "step": 20690 + }, + { + "epoch": 1.46, + "learning_rate": 7.1156432209903405e-06, + "loss": 0.7516, + "step": 20700 + }, + { + "epoch": 1.46, + "learning_rate": 7.098152404299496e-06, + "loss": 0.7592, + "step": 20710 + }, + { + "epoch": 1.46, + "learning_rate": 7.080678472072719e-06, + "loss": 0.7655, + "step": 20720 + }, + { + "epoch": 1.46, + "learning_rate": 7.06322144717787e-06, + "loss": 0.7659, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 7.045781352460728e-06, + "loss": 0.7642, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 7.028358210744881e-06, + "loss": 0.7553, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 7.010952044831765e-06, + "loss": 0.7536, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 6.993562877500562e-06, + "loss": 0.7636, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 6.976190731508243e-06, + "loss": 0.7579, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 6.958835629589482e-06, + "loss": 0.7663, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 6.941497594456654e-06, + "loss": 0.7531, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 6.924176648799789e-06, + "loss": 0.761, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 6.906872815286578e-06, + "loss": 0.755, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 6.889586116562288e-06, + "loss": 0.7614, + "step": 20830 + }, + { + "epoch": 1.47, + "learning_rate": 6.872316575249785e-06, + "loss": 0.754, + "step": 20840 + }, + { + "epoch": 1.47, + "learning_rate": 6.855064213949461e-06, + "loss": 0.7567, + "step": 20850 + }, + { + "epoch": 1.47, + "learning_rate": 6.837829055239249e-06, + "loss": 0.762, + "step": 20860 + }, + { + "epoch": 1.47, + "learning_rate": 6.820611121674548e-06, + "loss": 0.7569, + "step": 20870 + }, + { + "epoch": 1.47, + "learning_rate": 6.80341043578822e-06, + "loss": 0.7598, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 6.786227020090554e-06, + "loss": 0.762, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 6.769060897069255e-06, + "loss": 0.7578, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 6.75191208918937e-06, + "loss": 0.7582, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 6.734780618893308e-06, + "loss": 0.7628, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 6.7176665086007735e-06, + "loss": 0.7613, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 6.700569780708766e-06, + "loss": 0.7644, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 6.683490457591528e-06, + "loss": 0.7628, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 6.666428561600515e-06, + "loss": 0.7623, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 6.649384115064405e-06, + "loss": 0.7609, + "step": 20970 + }, + { + "epoch": 1.48, + "learning_rate": 6.632357140289012e-06, + "loss": 0.7645, + "step": 20980 + }, + { + "epoch": 1.48, + "learning_rate": 6.6153476595572894e-06, + "loss": 0.7595, + "step": 20990 + }, + { + "epoch": 1.48, + "learning_rate": 6.598355695129317e-06, + "loss": 0.7635, + "step": 21000 + }, + { + "epoch": 1.48, + "learning_rate": 6.581381269242222e-06, + "loss": 0.7535, + "step": 21010 + }, + { + "epoch": 1.48, + "learning_rate": 6.564424404110206e-06, + "loss": 0.7519, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 6.547485121924473e-06, + "loss": 0.7651, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 6.530563444853211e-06, + "loss": 0.7534, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 6.5136593950415895e-06, + "loss": 0.7519, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 6.4967729946116885e-06, + "loss": 0.7578, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 6.479904265662509e-06, + "loss": 0.7548, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 6.463053230269911e-06, + "loss": 0.7487, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 6.4462199104866e-06, + "loss": 0.7577, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 6.42940432834211e-06, + "loss": 0.7521, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 6.412606505842751e-06, + "loss": 0.7612, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 6.395826464971586e-06, + "loss": 0.7577, + "step": 21120 + }, + { + "epoch": 1.49, + "learning_rate": 6.379064227688427e-06, + "loss": 0.7522, + "step": 21130 + }, + { + "epoch": 1.49, + "learning_rate": 6.362319815929765e-06, + "loss": 0.758, + "step": 21140 + }, + { + "epoch": 1.49, + "learning_rate": 6.345593251608784e-06, + "loss": 0.759, + "step": 21150 + }, + { + "epoch": 1.49, + "learning_rate": 6.328884556615289e-06, + "loss": 0.7535, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 6.31219375281572e-06, + "loss": 0.7531, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 6.295520862053093e-06, + "loss": 0.753, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 6.278865906146978e-06, + "loss": 0.7554, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 6.262228906893475e-06, + "loss": 0.7584, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 6.2456098860652e-06, + "loss": 0.7546, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 6.2290088654112144e-06, + "loss": 0.7531, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 6.212425866657051e-06, + "loss": 0.7481, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 6.19586091150463e-06, + "loss": 0.7578, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 6.179314021632286e-06, + "loss": 0.7568, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 6.162785218694693e-06, + "loss": 0.7618, + "step": 21260 + }, + { + "epoch": 1.5, + "learning_rate": 6.146274524322857e-06, + "loss": 0.7497, + "step": 21270 + }, + { + "epoch": 1.5, + "learning_rate": 6.1297819601240835e-06, + "loss": 0.7547, + "step": 21280 + }, + { + "epoch": 1.5, + "learning_rate": 6.113307547681971e-06, + "loss": 0.7524, + "step": 21290 + }, + { + "epoch": 1.5, + "learning_rate": 6.09685130855633e-06, + "loss": 0.7544, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 6.080413264283225e-06, + "loss": 0.7559, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 6.063993436374873e-06, + "loss": 0.7596, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 6.0475918463196895e-06, + "loss": 0.755, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 6.031208515582176e-06, + "loss": 0.7565, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 6.014843465602984e-06, + "loss": 0.7614, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 5.998496717798807e-06, + "loss": 0.7482, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 5.982168293562416e-06, + "loss": 0.7538, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 5.96585821426257e-06, + "loss": 0.7568, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 5.949566501244053e-06, + "loss": 0.7567, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 5.933293175827586e-06, + "loss": 0.7613, + "step": 21400 + }, + { + "epoch": 1.51, + "learning_rate": 5.917038259309853e-06, + "loss": 0.7526, + "step": 21410 + }, + { + "epoch": 1.51, + "learning_rate": 5.900801772963409e-06, + "loss": 0.755, + "step": 21420 + }, + { + "epoch": 1.51, + "learning_rate": 5.884583738036733e-06, + "loss": 0.7552, + "step": 21430 + }, + { + "epoch": 1.51, + "learning_rate": 5.8683841757541205e-06, + "loss": 0.7619, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 5.852203107315721e-06, + "loss": 0.759, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 5.836040553897455e-06, + "loss": 0.7568, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 5.8198965366510415e-06, + "loss": 0.7597, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 5.8037710767039106e-06, + "loss": 0.7506, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 5.787664195159239e-06, + "loss": 0.7603, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 5.771575913095853e-06, + "loss": 0.7516, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 5.75550625156827e-06, + "loss": 0.753, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 5.739455231606621e-06, + "loss": 0.7581, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 5.723422874216656e-06, + "loss": 0.7536, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 5.707409200379681e-06, + "loss": 0.7588, + "step": 21540 + }, + { + "epoch": 1.52, + "learning_rate": 5.691414231052577e-06, + "loss": 0.7596, + "step": 21550 + }, + { + "epoch": 1.52, + "learning_rate": 5.67543798716772e-06, + "loss": 0.7553, + "step": 21560 + }, + { + "epoch": 1.52, + "learning_rate": 5.659480489633e-06, + "loss": 0.759, + "step": 21570 + }, + { + "epoch": 1.52, + "learning_rate": 5.643541759331756e-06, + "loss": 0.7557, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 5.627621817122793e-06, + "loss": 0.7551, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 5.611720683840296e-06, + "loss": 0.7595, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 5.595838380293865e-06, + "loss": 0.7495, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 5.579974927268434e-06, + "loss": 0.7483, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 5.56413034552429e-06, + "loss": 0.7447, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 5.548304655797003e-06, + "loss": 0.7487, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 5.532497878797429e-06, + "loss": 0.7528, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 5.516710035211665e-06, + "loss": 0.7585, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 5.500941145701048e-06, + "loss": 0.7533, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 5.485191230902089e-06, + "loss": 0.7564, + "step": 21680 + }, + { + "epoch": 1.53, + "learning_rate": 5.469460311426489e-06, + "loss": 0.7536, + "step": 21690 + }, + { + "epoch": 1.53, + "learning_rate": 5.453748407861066e-06, + "loss": 0.7473, + "step": 21700 + }, + { + "epoch": 1.53, + "learning_rate": 5.438055540767772e-06, + "loss": 0.7541, + "step": 21710 + }, + { + "epoch": 1.53, + "learning_rate": 5.422381730683639e-06, + "loss": 0.7522, + "step": 21720 + }, + { + "epoch": 1.53, + "learning_rate": 5.406726998120751e-06, + "loss": 0.7535, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 5.391091363566241e-06, + "loss": 0.7538, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 5.375474847482241e-06, + "loss": 0.7602, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 5.359877470305852e-06, + "loss": 0.7511, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 5.344299252449156e-06, + "loss": 0.7533, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 5.328740214299129e-06, + "loss": 0.7595, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 5.3132003762176755e-06, + "loss": 0.7527, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 5.297679758541554e-06, + "loss": 0.7514, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 5.28217838158237e-06, + "loss": 0.7525, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 5.2666962656265674e-06, + "loss": 0.7539, + "step": 21820 + }, + { + "epoch": 1.54, + "learning_rate": 5.251233430935358e-06, + "loss": 0.7488, + "step": 21830 + }, + { + "epoch": 1.54, + "learning_rate": 5.235789897744743e-06, + "loss": 0.759, + "step": 21840 + }, + { + "epoch": 1.54, + "learning_rate": 5.220365686265452e-06, + "loss": 0.7564, + "step": 21850 + }, + { + "epoch": 1.54, + "learning_rate": 5.204960816682922e-06, + "loss": 0.7494, + "step": 21860 + }, + { + "epoch": 1.54, + "learning_rate": 5.1895753091572995e-06, + "loss": 0.7569, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 5.174209183823373e-06, + "loss": 0.7549, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 5.1588624607905634e-06, + "loss": 0.7524, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.143535160142923e-06, + "loss": 0.7531, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.128227301939059e-06, + "loss": 0.7551, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.1129389062121615e-06, + "loss": 0.755, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.097669992969918e-06, + "loss": 0.754, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.082420582194556e-06, + "loss": 0.7501, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.067190693842752e-06, + "loss": 0.7556, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.051980347845644e-06, + "loss": 0.7575, + "step": 21960 + }, + { + "epoch": 1.55, + "learning_rate": 5.0367895641087925e-06, + "loss": 0.7593, + "step": 21970 + }, + { + "epoch": 1.55, + "learning_rate": 5.0216183625121685e-06, + "loss": 0.7473, + "step": 21980 + }, + { + "epoch": 1.55, + "learning_rate": 5.006466762910096e-06, + "loss": 0.7568, + "step": 21990 + }, + { + "epoch": 1.55, + "learning_rate": 4.99133478513127e-06, + "loss": 0.7501, + "step": 22000 + }, + { + "epoch": 1.55, + "learning_rate": 4.9762224489786805e-06, + "loss": 0.7537, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 4.961129774229645e-06, + "loss": 0.7477, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 4.94605678063571e-06, + "loss": 0.7489, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 4.931003487922703e-06, + "loss": 0.7607, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 4.915969915790644e-06, + "loss": 0.7579, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 4.900956083913764e-06, + "loss": 0.7555, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 4.885962011940439e-06, + "loss": 0.7547, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 4.870987719493212e-06, + "loss": 0.7514, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 4.85603322616871e-06, + "loss": 0.7536, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 4.841098551537686e-06, + "loss": 0.7564, + "step": 22100 + }, + { + "epoch": 1.56, + "learning_rate": 4.826183715144912e-06, + "loss": 0.7546, + "step": 22110 + }, + { + "epoch": 1.56, + "learning_rate": 4.8112887365092385e-06, + "loss": 0.7494, + "step": 22120 + }, + { + "epoch": 1.56, + "learning_rate": 4.7964136351235024e-06, + "loss": 0.7507, + "step": 22130 + }, + { + "epoch": 1.56, + "learning_rate": 4.781558430454544e-06, + "loss": 0.7543, + "step": 22140 + }, + { + "epoch": 1.56, + "learning_rate": 4.7667231419431505e-06, + "loss": 0.7511, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 4.75190778900406e-06, + "loss": 0.7466, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 4.737112391025906e-06, + "loss": 0.7514, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 4.722336967371233e-06, + "loss": 0.747, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 4.7075815373764e-06, + "loss": 0.7498, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 4.692846120351646e-06, + "loss": 0.7574, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 4.678130735580995e-06, + "loss": 0.7525, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 4.663435402322265e-06, + "loss": 0.744, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 4.648760139807025e-06, + "loss": 0.747, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 4.634104967240587e-06, + "loss": 0.7524, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 4.619469903801965e-06, + "loss": 0.7509, + "step": 22250 + }, + { + "epoch": 1.57, + "learning_rate": 4.604854968643857e-06, + "loss": 0.7456, + "step": 22260 + }, + { + "epoch": 1.57, + "learning_rate": 4.590260180892614e-06, + "loss": 0.7488, + "step": 22270 + }, + { + "epoch": 1.57, + "learning_rate": 4.575685559648238e-06, + "loss": 0.7551, + "step": 22280 + }, + { + "epoch": 1.57, + "learning_rate": 4.561131123984315e-06, + "loss": 0.7491, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 4.546596892948043e-06, + "loss": 0.7442, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 4.5320828855601495e-06, + "loss": 0.7534, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 4.51758912081492e-06, + "loss": 0.7554, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 4.503115617680134e-06, + "loss": 0.7552, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 4.488662395097056e-06, + "loss": 0.7464, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 4.4742294719804095e-06, + "loss": 0.7507, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 4.4598168672183655e-06, + "loss": 0.7532, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 4.445424599672481e-06, + "loss": 0.7538, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 4.431052688177724e-06, + "loss": 0.7575, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 4.416701151542402e-06, + "loss": 0.7544, + "step": 22390 + }, + { + "epoch": 1.58, + "learning_rate": 4.4023700085481735e-06, + "loss": 0.7555, + "step": 22400 + }, + { + "epoch": 1.58, + "learning_rate": 4.3880592779499965e-06, + "loss": 0.7504, + "step": 22410 + }, + { + "epoch": 1.58, + "learning_rate": 4.373768978476123e-06, + "loss": 0.7582, + "step": 22420 + }, + { + "epoch": 1.58, + "learning_rate": 4.359499128828055e-06, + "loss": 0.7472, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 4.345249747680553e-06, + "loss": 0.7517, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 4.331020853681571e-06, + "loss": 0.7523, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 4.316812465452267e-06, + "loss": 0.75, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 4.302624601586946e-06, + "loss": 0.7565, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.288457280653077e-06, + "loss": 0.7487, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.274310521191225e-06, + "loss": 0.7505, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.260184341715042e-06, + "loss": 0.7546, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.246078760711274e-06, + "loss": 0.7602, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.231993796639686e-06, + "loss": 0.7505, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.217929467933064e-06, + "loss": 0.7526, + "step": 22530 + }, + { + "epoch": 1.59, + "learning_rate": 4.203885792997207e-06, + "loss": 0.7463, + "step": 22540 + }, + { + "epoch": 1.59, + "learning_rate": 4.1898627902108615e-06, + "loss": 0.7544, + "step": 22550 + }, + { + "epoch": 1.59, + "learning_rate": 4.175860477925739e-06, + "loss": 0.7487, + "step": 22560 + }, + { + "epoch": 1.59, + "learning_rate": 4.1618788744664654e-06, + "loss": 0.7534, + "step": 22570 + }, + { + "epoch": 1.59, + "learning_rate": 4.147917998130555e-06, + "loss": 0.752, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.133977867188423e-06, + "loss": 0.7478, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.120058499883308e-06, + "loss": 0.7511, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.106159914431298e-06, + "loss": 0.7515, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.092282129021268e-06, + "loss": 0.752, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.078425161814874e-06, + "loss": 0.7467, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.06458903094654e-06, + "loss": 0.7492, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.050773754523407e-06, + "loss": 0.7421, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.03697935062533e-06, + "loss": 0.7538, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.023205837304855e-06, + "loss": 0.7488, + "step": 22670 + }, + { + "epoch": 1.6, + "learning_rate": 4.009453232587175e-06, + "loss": 0.7499, + "step": 22680 + }, + { + "epoch": 1.6, + "learning_rate": 3.995721554470135e-06, + "loss": 0.7542, + "step": 22690 + }, + { + "epoch": 1.6, + "learning_rate": 3.982010820924178e-06, + "loss": 0.7567, + "step": 22700 + }, + { + "epoch": 1.6, + "learning_rate": 3.968321049892356e-06, + "loss": 0.7429, + "step": 22710 + }, + { + "epoch": 1.6, + "learning_rate": 3.954652259290272e-06, + "loss": 0.752, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 3.941004467006075e-06, + "loss": 0.7506, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 3.927377690900436e-06, + "loss": 0.7478, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 3.913771948806529e-06, + "loss": 0.7505, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 3.900187258529986e-06, + "loss": 0.7535, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 3.886623637848908e-06, + "loss": 0.7531, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 3.8730811045138e-06, + "loss": 0.7439, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 3.8595596762476e-06, + "loss": 0.7466, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 3.846059370745585e-06, + "loss": 0.745, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 3.832580205675431e-06, + "loss": 0.7564, + "step": 22810 + }, + { + "epoch": 1.61, + "learning_rate": 3.819122198677119e-06, + "loss": 0.7542, + "step": 22820 + }, + { + "epoch": 1.61, + "learning_rate": 3.805685367362957e-06, + "loss": 0.7505, + "step": 22830 + }, + { + "epoch": 1.61, + "learning_rate": 3.792269729317528e-06, + "loss": 0.7534, + "step": 22840 + }, + { + "epoch": 1.61, + "learning_rate": 3.7788753020976975e-06, + "loss": 0.7504, + "step": 22850 + }, + { + "epoch": 1.61, + "learning_rate": 3.7655021032325477e-06, + "loss": 0.7537, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 3.7521501502234105e-06, + "loss": 0.7492, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 3.7388194605437765e-06, + "loss": 0.755, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 3.725510051639345e-06, + "loss": 0.7457, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 3.712221940927938e-06, + "loss": 0.7531, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 3.698955145799528e-06, + "loss": 0.7467, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 3.6857096836161676e-06, + "loss": 0.7501, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 3.6724855717120187e-06, + "loss": 0.744, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 3.6592828273932756e-06, + "loss": 0.7464, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 3.646101467938199e-06, + "loss": 0.7469, + "step": 22950 + }, + { + "epoch": 1.62, + "learning_rate": 3.632941510597021e-06, + "loss": 0.7519, + "step": 22960 + }, + { + "epoch": 1.62, + "learning_rate": 3.61980297259201e-06, + "loss": 0.7477, + "step": 22970 + }, + { + "epoch": 1.62, + "learning_rate": 3.6066858711173723e-06, + "loss": 0.7529, + "step": 22980 + }, + { + "epoch": 1.62, + "learning_rate": 3.593590223339283e-06, + "loss": 0.7442, + "step": 22990 + }, + { + "epoch": 1.62, + "learning_rate": 3.5805160463958145e-06, + "loss": 0.7517, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 3.567463357396972e-06, + "loss": 0.7467, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 3.5544321734246133e-06, + "loss": 0.7475, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 3.541422511532466e-06, + "loss": 0.7469, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 3.5284343887460846e-06, + "loss": 0.752, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 3.5154678220628503e-06, + "loss": 0.7487, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 3.502522828451913e-06, + "loss": 0.752, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 3.4895994248542107e-06, + "loss": 0.7544, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 3.4766976281824106e-06, + "loss": 0.7512, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 3.4638174553209146e-06, + "loss": 0.7582, + "step": 23090 + }, + { + "epoch": 1.63, + "learning_rate": 3.4509589231258223e-06, + "loss": 0.7454, + "step": 23100 + }, + { + "epoch": 1.63, + "learning_rate": 3.438122048424908e-06, + "loss": 0.7476, + "step": 23110 + }, + { + "epoch": 1.63, + "learning_rate": 3.425306848017602e-06, + "loss": 0.7484, + "step": 23120 + }, + { + "epoch": 1.63, + "learning_rate": 3.4125133386749856e-06, + "loss": 0.7507, + "step": 23130 + }, + { + "epoch": 1.63, + "learning_rate": 3.3997415371397313e-06, + "loss": 0.7465, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.3869914601261235e-06, + "loss": 0.7483, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.3742631243199964e-06, + "loss": 0.744, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.3615565463787523e-06, + "loss": 0.743, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.3488717429313034e-06, + "loss": 0.744, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.336208730578072e-06, + "loss": 0.7441, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.323567525890954e-06, + "loss": 0.7523, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.3109481454133262e-06, + "loss": 0.7553, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.2983506056599834e-06, + "loss": 0.7473, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.285774923117151e-06, + "loss": 0.7498, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.273221114242442e-06, + "loss": 0.753, + "step": 23240 + }, + { + "epoch": 1.64, + "learning_rate": 3.2606891954648524e-06, + "loss": 0.7515, + "step": 23250 + }, + { + "epoch": 1.64, + "learning_rate": 3.248179183184723e-06, + "loss": 0.7489, + "step": 23260 + }, + { + "epoch": 1.64, + "learning_rate": 3.2356910937737273e-06, + "loss": 0.7497, + "step": 23270 + }, + { + "epoch": 1.64, + "learning_rate": 3.223224943574845e-06, + "loss": 0.7461, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.21078074890236e-06, + "loss": 0.7511, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.1983585260418026e-06, + "loss": 0.7473, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.1859582912499663e-06, + "loss": 0.742, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.173580060754857e-06, + "loss": 0.7606, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.1612238507556925e-06, + "loss": 0.7481, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.1488896774228682e-06, + "loss": 0.7473, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.136577556897933e-06, + "loss": 0.753, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.124287505293595e-06, + "loss": 0.7524, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.112019538693665e-06, + "loss": 0.7407, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.0997736731530504e-06, + "loss": 0.7406, + "step": 23380 + }, + { + "epoch": 1.65, + "learning_rate": 3.087549924697748e-06, + "loss": 0.7502, + "step": 23390 + }, + { + "epoch": 1.65, + "learning_rate": 3.0753483093247993e-06, + "loss": 0.7511, + "step": 23400 + }, + { + "epoch": 1.65, + "learning_rate": 3.0631688430022886e-06, + "loss": 0.7491, + "step": 23410 + }, + { + "epoch": 1.65, + "learning_rate": 3.051011541669309e-06, + "loss": 0.7446, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.038876421235939e-06, + "loss": 0.7499, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.0267634975832517e-06, + "loss": 0.7489, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.014672786563244e-06, + "loss": 0.7445, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.0026043039988707e-06, + "loss": 0.7436, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 2.9905580656839728e-06, + "loss": 0.748, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 2.9785340873832956e-06, + "loss": 0.7476, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 2.966532384832441e-06, + "loss": 0.7489, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 2.954552973737874e-06, + "loss": 0.7518, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 2.942595869776874e-06, + "loss": 0.7474, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 2.9306610885975394e-06, + "loss": 0.7465, + "step": 23520 + }, + { + "epoch": 1.66, + "learning_rate": 2.9187486458187385e-06, + "loss": 0.7451, + "step": 23530 + }, + { + "epoch": 1.66, + "learning_rate": 2.9068585570301256e-06, + "loss": 0.7474, + "step": 23540 + }, + { + "epoch": 1.66, + "learning_rate": 2.894990837792082e-06, + "loss": 0.7497, + "step": 23550 + }, + { + "epoch": 1.66, + "learning_rate": 2.8831455036357338e-06, + "loss": 0.7509, + "step": 23560 + }, + { + "epoch": 1.66, + "learning_rate": 2.8713225700628844e-06, + "loss": 0.7473, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 2.859522052546051e-06, + "loss": 0.7461, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 2.8477439665283956e-06, + "loss": 0.7453, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 2.83598832742374e-06, + "loss": 0.7396, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 2.8242551506165108e-06, + "loss": 0.7526, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 2.8125444514617606e-06, + "loss": 0.7451, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 2.8008562452851042e-06, + "loss": 0.7437, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 2.789190547382743e-06, + "loss": 0.7421, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 2.7775473730213942e-06, + "loss": 0.7453, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 2.7659267374383247e-06, + "loss": 0.7381, + "step": 23660 + }, + { + "epoch": 1.67, + "learning_rate": 2.7543286558412874e-06, + "loss": 0.7502, + "step": 23670 + }, + { + "epoch": 1.67, + "learning_rate": 2.7427531434085365e-06, + "loss": 0.7403, + "step": 23680 + }, + { + "epoch": 1.67, + "learning_rate": 2.7312002152887673e-06, + "loss": 0.7411, + "step": 23690 + }, + { + "epoch": 1.67, + "learning_rate": 2.7196698866011437e-06, + "loss": 0.7453, + "step": 23700 + }, + { + "epoch": 1.67, + "learning_rate": 2.708162172435238e-06, + "loss": 0.7438, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 2.6966770878510317e-06, + "loss": 0.7451, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 2.6852146478788886e-06, + "loss": 0.7396, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 2.673774867519545e-06, + "loss": 0.745, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 2.662357761744072e-06, + "loss": 0.7442, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 2.650963345493882e-06, + "loss": 0.7468, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 2.6395916336806784e-06, + "loss": 0.752, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 2.6282426411864625e-06, + "loss": 0.7467, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 2.616916382863499e-06, + "loss": 0.7407, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 2.605612873534298e-06, + "loss": 0.7452, + "step": 23800 + }, + { + "epoch": 1.68, + "learning_rate": 2.594332127991599e-06, + "loss": 0.7442, + "step": 23810 + }, + { + "epoch": 1.68, + "learning_rate": 2.5830741609983577e-06, + "loss": 0.7419, + "step": 23820 + }, + { + "epoch": 1.68, + "learning_rate": 2.5718389872877114e-06, + "loss": 0.7486, + "step": 23830 + }, + { + "epoch": 1.68, + "learning_rate": 2.5606266215629783e-06, + "loss": 0.7488, + "step": 23840 + }, + { + "epoch": 1.68, + "learning_rate": 2.5494370784976095e-06, + "loss": 0.7454, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.5382703727352144e-06, + "loss": 0.7504, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.5271265188894957e-06, + "loss": 0.7443, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.5160055315442542e-06, + "loss": 0.749, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.5049074252533667e-06, + "loss": 0.7426, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.49383221454077e-06, + "loss": 0.7457, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.482779913900433e-06, + "loss": 0.7461, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.4717505377963467e-06, + "loss": 0.7436, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.460744100662491e-06, + "loss": 0.7469, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.4497606169028474e-06, + "loss": 0.7501, + "step": 23940 + }, + { + "epoch": 1.69, + "learning_rate": 2.4388001008913296e-06, + "loss": 0.7466, + "step": 23950 + }, + { + "epoch": 1.69, + "learning_rate": 2.427862566971817e-06, + "loss": 0.7483, + "step": 23960 + }, + { + "epoch": 1.69, + "learning_rate": 2.416948029458097e-06, + "loss": 0.7462, + "step": 23970 + }, + { + "epoch": 1.69, + "learning_rate": 2.40605650263388e-06, + "loss": 0.7428, + "step": 23980 + }, + { + "epoch": 1.69, + "learning_rate": 2.3951880007527417e-06, + "loss": 0.7382, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.3843425380381446e-06, + "loss": 0.7405, + "step": 24000 + }, + { + "epoch": 1.7, + "learning_rate": 2.373520128683382e-06, + "loss": 0.7453, + "step": 24010 + }, + { + "epoch": 1.7, + "learning_rate": 2.3627207868515956e-06, + "loss": 0.745, + "step": 24020 + }, + { + "epoch": 1.7, + "learning_rate": 2.3519445266757267e-06, + "loss": 0.7444, + "step": 24030 + }, + { + "epoch": 1.7, + "learning_rate": 2.3411913622585127e-06, + "loss": 0.7512, + "step": 24040 + }, + { + "epoch": 1.7, + "learning_rate": 2.3304613076724623e-06, + "loss": 0.7424, + "step": 24050 + }, + { + "epoch": 1.7, + "learning_rate": 2.319754376959853e-06, + "loss": 0.7418, + "step": 24060 + }, + { + "epoch": 1.7, + "learning_rate": 2.3090705841326844e-06, + "loss": 0.7506, + "step": 24070 + }, + { + "epoch": 1.7, + "learning_rate": 2.2984099431726927e-06, + "loss": 0.7404, + "step": 24080 + }, + { + "epoch": 1.7, + "learning_rate": 2.2877724680312996e-06, + "loss": 0.7451, + "step": 24090 + }, + { + "epoch": 1.7, + "learning_rate": 2.277158172629621e-06, + "loss": 0.7411, + "step": 24100 + }, + { + "epoch": 1.7, + "learning_rate": 2.2665670708584364e-06, + "loss": 0.7466, + "step": 24110 + }, + { + "epoch": 1.7, + "learning_rate": 2.2559991765781606e-06, + "loss": 0.7421, + "step": 24120 + }, + { + "epoch": 1.7, + "learning_rate": 2.2454545036188556e-06, + "loss": 0.744, + "step": 24130 + }, + { + "epoch": 1.71, + "learning_rate": 2.2349330657801804e-06, + "loss": 0.742, + "step": 24140 + }, + { + "epoch": 1.71, + "learning_rate": 2.224434876831387e-06, + "loss": 0.7427, + "step": 24150 + }, + { + "epoch": 1.71, + "learning_rate": 2.2139599505113153e-06, + "loss": 0.7437, + "step": 24160 + }, + { + "epoch": 1.71, + "learning_rate": 2.203508300528341e-06, + "loss": 0.7487, + "step": 24170 + }, + { + "epoch": 1.71, + "learning_rate": 2.1930799405604007e-06, + "loss": 0.7491, + "step": 24180 + }, + { + "epoch": 1.71, + "learning_rate": 2.182674884254934e-06, + "loss": 0.7418, + "step": 24190 + }, + { + "epoch": 1.71, + "learning_rate": 2.1722931452288877e-06, + "loss": 0.735, + "step": 24200 + }, + { + "epoch": 1.71, + "learning_rate": 2.1619347370687004e-06, + "loss": 0.7446, + "step": 24210 + }, + { + "epoch": 1.71, + "learning_rate": 2.151599673330269e-06, + "loss": 0.7482, + "step": 24220 + }, + { + "epoch": 1.71, + "learning_rate": 2.14128796753895e-06, + "loss": 0.7475, + "step": 24230 + }, + { + "epoch": 1.71, + "learning_rate": 2.1309996331895235e-06, + "loss": 0.7507, + "step": 24240 + }, + { + "epoch": 1.71, + "learning_rate": 2.120734683746184e-06, + "loss": 0.7439, + "step": 24250 + }, + { + "epoch": 1.71, + "learning_rate": 2.1104931326425194e-06, + "loss": 0.7481, + "step": 24260 + }, + { + "epoch": 1.71, + "learning_rate": 2.1002749932815146e-06, + "loss": 0.7429, + "step": 24270 + }, + { + "epoch": 1.72, + "learning_rate": 2.0900802790354914e-06, + "loss": 0.7458, + "step": 24280 + }, + { + "epoch": 1.72, + "learning_rate": 2.0799090032461346e-06, + "loss": 0.752, + "step": 24290 + }, + { + "epoch": 1.72, + "learning_rate": 2.069761179224441e-06, + "loss": 0.7465, + "step": 24300 + }, + { + "epoch": 1.72, + "learning_rate": 2.05963682025073e-06, + "loss": 0.7504, + "step": 24310 + }, + { + "epoch": 1.72, + "learning_rate": 2.0495359395746005e-06, + "loss": 0.7482, + "step": 24320 + }, + { + "epoch": 1.72, + "learning_rate": 2.039458550414941e-06, + "loss": 0.7421, + "step": 24330 + }, + { + "epoch": 1.72, + "learning_rate": 2.029404665959871e-06, + "loss": 0.7472, + "step": 24340 + }, + { + "epoch": 1.72, + "learning_rate": 2.0193742993667807e-06, + "loss": 0.7486, + "step": 24350 + }, + { + "epoch": 1.72, + "learning_rate": 2.009367463762255e-06, + "loss": 0.7429, + "step": 24360 + }, + { + "epoch": 1.72, + "learning_rate": 1.999384172242109e-06, + "loss": 0.7438, + "step": 24370 + }, + { + "epoch": 1.72, + "learning_rate": 1.9894244378713233e-06, + "loss": 0.7448, + "step": 24380 + }, + { + "epoch": 1.72, + "learning_rate": 1.9794882736840714e-06, + "loss": 0.7365, + "step": 24390 + }, + { + "epoch": 1.72, + "learning_rate": 1.9695756926836586e-06, + "loss": 0.7396, + "step": 24400 + }, + { + "epoch": 1.72, + "learning_rate": 1.959686707842554e-06, + "loss": 0.7328, + "step": 24410 + }, + { + "epoch": 1.72, + "learning_rate": 1.949821332102311e-06, + "loss": 0.7504, + "step": 24420 + }, + { + "epoch": 1.73, + "learning_rate": 1.9399795783736185e-06, + "loss": 0.742, + "step": 24430 + }, + { + "epoch": 1.73, + "learning_rate": 1.930161459536235e-06, + "loss": 0.7398, + "step": 24440 + }, + { + "epoch": 1.73, + "learning_rate": 1.9203669884389998e-06, + "loss": 0.7429, + "step": 24450 + }, + { + "epoch": 1.73, + "learning_rate": 1.910596177899786e-06, + "loss": 0.7433, + "step": 24460 + }, + { + "epoch": 1.73, + "learning_rate": 1.9008490407055258e-06, + "loss": 0.7433, + "step": 24470 + }, + { + "epoch": 1.73, + "learning_rate": 1.8911255896121528e-06, + "loss": 0.7407, + "step": 24480 + }, + { + "epoch": 1.73, + "learning_rate": 1.881425837344608e-06, + "loss": 0.7443, + "step": 24490 + }, + { + "epoch": 1.73, + "learning_rate": 1.87174979659682e-06, + "loss": 0.7432, + "step": 24500 + }, + { + "epoch": 1.73, + "learning_rate": 1.862097480031686e-06, + "loss": 0.7397, + "step": 24510 + }, + { + "epoch": 1.73, + "learning_rate": 1.8524689002810547e-06, + "loss": 0.747, + "step": 24520 + }, + { + "epoch": 1.73, + "learning_rate": 1.8428640699457135e-06, + "loss": 0.7472, + "step": 24530 + }, + { + "epoch": 1.73, + "learning_rate": 1.8332830015953606e-06, + "loss": 0.7493, + "step": 24540 + }, + { + "epoch": 1.73, + "learning_rate": 1.823725707768611e-06, + "loss": 0.7451, + "step": 24550 + }, + { + "epoch": 1.73, + "learning_rate": 1.8141922009729573e-06, + "loss": 0.7415, + "step": 24560 + }, + { + "epoch": 1.74, + "learning_rate": 1.8046824936847617e-06, + "loss": 0.7415, + "step": 24570 + }, + { + "epoch": 1.74, + "learning_rate": 1.7951965983492403e-06, + "loss": 0.7422, + "step": 24580 + }, + { + "epoch": 1.74, + "learning_rate": 1.7857345273804538e-06, + "loss": 0.7478, + "step": 24590 + }, + { + "epoch": 1.74, + "learning_rate": 1.776296293161277e-06, + "loss": 0.7417, + "step": 24600 + }, + { + "epoch": 1.74, + "learning_rate": 1.7668819080433962e-06, + "loss": 0.7413, + "step": 24610 + }, + { + "epoch": 1.74, + "learning_rate": 1.7574913843472763e-06, + "loss": 0.746, + "step": 24620 + }, + { + "epoch": 1.74, + "learning_rate": 1.7481247343621688e-06, + "loss": 0.7476, + "step": 24630 + }, + { + "epoch": 1.74, + "learning_rate": 1.738781970346073e-06, + "loss": 0.7466, + "step": 24640 + }, + { + "epoch": 1.74, + "learning_rate": 1.7294631045257283e-06, + "loss": 0.7496, + "step": 24650 + }, + { + "epoch": 1.74, + "learning_rate": 1.7201681490966016e-06, + "loss": 0.7462, + "step": 24660 + }, + { + "epoch": 1.74, + "learning_rate": 1.7108971162228716e-06, + "loss": 0.7426, + "step": 24670 + }, + { + "epoch": 1.74, + "learning_rate": 1.701650018037404e-06, + "loss": 0.7436, + "step": 24680 + }, + { + "epoch": 1.74, + "learning_rate": 1.6924268666417498e-06, + "loss": 0.7473, + "step": 24690 + }, + { + "epoch": 1.74, + "learning_rate": 1.683227674106107e-06, + "loss": 0.7454, + "step": 24700 + }, + { + "epoch": 1.75, + "learning_rate": 1.6740524524693413e-06, + "loss": 0.7382, + "step": 24710 + }, + { + "epoch": 1.75, + "learning_rate": 1.6649012137389164e-06, + "loss": 0.7424, + "step": 24720 + }, + { + "epoch": 1.75, + "learning_rate": 1.6557739698909436e-06, + "loss": 0.739, + "step": 24730 + }, + { + "epoch": 1.75, + "learning_rate": 1.6466707328701059e-06, + "loss": 0.7401, + "step": 24740 + }, + { + "epoch": 1.75, + "learning_rate": 1.6375915145896871e-06, + "loss": 0.7431, + "step": 24750 + }, + { + "epoch": 1.75, + "learning_rate": 1.6285363269315247e-06, + "loss": 0.7389, + "step": 24760 + }, + { + "epoch": 1.75, + "learning_rate": 1.6195051817460217e-06, + "loss": 0.7445, + "step": 24770 + }, + { + "epoch": 1.75, + "learning_rate": 1.6104980908521017e-06, + "loss": 0.744, + "step": 24780 + }, + { + "epoch": 1.75, + "learning_rate": 1.6015150660372225e-06, + "loss": 0.7454, + "step": 24790 + }, + { + "epoch": 1.75, + "learning_rate": 1.592556119057338e-06, + "loss": 0.7467, + "step": 24800 + }, + { + "epoch": 1.75, + "learning_rate": 1.5836212616368918e-06, + "loss": 0.751, + "step": 24810 + }, + { + "epoch": 1.75, + "learning_rate": 1.574710505468804e-06, + "loss": 0.7426, + "step": 24820 + }, + { + "epoch": 1.75, + "learning_rate": 1.5658238622144595e-06, + "loss": 0.7419, + "step": 24830 + }, + { + "epoch": 1.75, + "learning_rate": 1.5569613435036756e-06, + "loss": 0.7385, + "step": 24840 + }, + { + "epoch": 1.76, + "learning_rate": 1.5481229609347103e-06, + "loss": 0.7417, + "step": 24850 + }, + { + "epoch": 1.76, + "learning_rate": 1.5393087260742222e-06, + "loss": 0.7505, + "step": 24860 + }, + { + "epoch": 1.76, + "learning_rate": 1.5305186504572823e-06, + "loss": 0.7433, + "step": 24870 + }, + { + "epoch": 1.76, + "learning_rate": 1.5217527455873326e-06, + "loss": 0.7432, + "step": 24880 + }, + { + "epoch": 1.76, + "learning_rate": 1.5130110229361884e-06, + "loss": 0.7461, + "step": 24890 + }, + { + "epoch": 1.76, + "learning_rate": 1.504293493944018e-06, + "loss": 0.7399, + "step": 24900 + }, + { + "epoch": 1.76, + "learning_rate": 1.495600170019329e-06, + "loss": 0.744, + "step": 24910 + }, + { + "epoch": 1.76, + "learning_rate": 1.4869310625389433e-06, + "loss": 0.744, + "step": 24920 + }, + { + "epoch": 1.76, + "learning_rate": 1.47828618284801e-06, + "loss": 0.7471, + "step": 24930 + }, + { + "epoch": 1.76, + "learning_rate": 1.4696655422599525e-06, + "loss": 0.7407, + "step": 24940 + }, + { + "epoch": 1.76, + "learning_rate": 1.461069152056478e-06, + "loss": 0.7421, + "step": 24950 + }, + { + "epoch": 1.76, + "learning_rate": 1.4524970234875667e-06, + "loss": 0.7397, + "step": 24960 + }, + { + "epoch": 1.76, + "learning_rate": 1.4439491677714323e-06, + "loss": 0.7438, + "step": 24970 + }, + { + "epoch": 1.76, + "learning_rate": 1.435425596094544e-06, + "loss": 0.7448, + "step": 24980 + }, + { + "epoch": 1.77, + "learning_rate": 1.4269263196115657e-06, + "loss": 0.7468, + "step": 24990 + }, + { + "epoch": 1.77, + "learning_rate": 1.4184513494453888e-06, + "loss": 0.7461, + "step": 25000 + }, + { + "epoch": 1.77, + "learning_rate": 1.410000696687084e-06, + "loss": 0.7434, + "step": 25010 + }, + { + "epoch": 1.77, + "learning_rate": 1.4015743723958975e-06, + "loss": 0.7494, + "step": 25020 + }, + { + "epoch": 1.77, + "learning_rate": 1.3931723875992376e-06, + "loss": 0.7451, + "step": 25030 + }, + { + "epoch": 1.77, + "learning_rate": 1.3847947532926687e-06, + "loss": 0.7415, + "step": 25040 + }, + { + "epoch": 1.77, + "learning_rate": 1.3764414804398763e-06, + "loss": 0.7453, + "step": 25050 + }, + { + "epoch": 1.77, + "learning_rate": 1.3681125799726736e-06, + "loss": 0.7397, + "step": 25060 + }, + { + "epoch": 1.77, + "learning_rate": 1.3598080627909682e-06, + "loss": 0.7449, + "step": 25070 + }, + { + "epoch": 1.77, + "learning_rate": 1.351527939762769e-06, + "loss": 0.7433, + "step": 25080 + }, + { + "epoch": 1.77, + "learning_rate": 1.3432722217241501e-06, + "loss": 0.7437, + "step": 25090 + }, + { + "epoch": 1.77, + "learning_rate": 1.3350409194792625e-06, + "loss": 0.7451, + "step": 25100 + }, + { + "epoch": 1.77, + "learning_rate": 1.326834043800278e-06, + "loss": 0.7513, + "step": 25110 + }, + { + "epoch": 1.77, + "learning_rate": 1.31865160542743e-06, + "loss": 0.7445, + "step": 25120 + }, + { + "epoch": 1.78, + "learning_rate": 1.310493615068953e-06, + "loss": 0.7422, + "step": 25130 + }, + { + "epoch": 1.78, + "learning_rate": 1.3023600834010974e-06, + "loss": 0.7436, + "step": 25140 + }, + { + "epoch": 1.78, + "learning_rate": 1.2942510210680935e-06, + "loss": 0.738, + "step": 25150 + }, + { + "epoch": 1.78, + "learning_rate": 1.2861664386821638e-06, + "loss": 0.7458, + "step": 25160 + }, + { + "epoch": 1.78, + "learning_rate": 1.2781063468234823e-06, + "loss": 0.7391, + "step": 25170 + }, + { + "epoch": 1.78, + "learning_rate": 1.270070756040176e-06, + "loss": 0.7378, + "step": 25180 + }, + { + "epoch": 1.78, + "learning_rate": 1.262059676848304e-06, + "loss": 0.738, + "step": 25190 + }, + { + "epoch": 1.78, + "learning_rate": 1.2540731197318578e-06, + "loss": 0.7463, + "step": 25200 + }, + { + "epoch": 1.78, + "learning_rate": 1.2461110951427258e-06, + "loss": 0.7358, + "step": 25210 + }, + { + "epoch": 1.78, + "learning_rate": 1.2381736135007038e-06, + "loss": 0.7372, + "step": 25220 + }, + { + "epoch": 1.78, + "learning_rate": 1.2302606851934518e-06, + "loss": 0.7424, + "step": 25230 + }, + { + "epoch": 1.78, + "learning_rate": 1.2223723205765125e-06, + "loss": 0.7378, + "step": 25240 + }, + { + "epoch": 1.78, + "learning_rate": 1.214508529973275e-06, + "loss": 0.7418, + "step": 25250 + }, + { + "epoch": 1.78, + "learning_rate": 1.2066693236749671e-06, + "loss": 0.7413, + "step": 25260 + }, + { + "epoch": 1.78, + "learning_rate": 1.1988547119406468e-06, + "loss": 0.7428, + "step": 25270 + }, + { + "epoch": 1.79, + "learning_rate": 1.1910647049971913e-06, + "loss": 0.7375, + "step": 25280 + }, + { + "epoch": 1.79, + "learning_rate": 1.1832993130392612e-06, + "loss": 0.7435, + "step": 25290 + }, + { + "epoch": 1.79, + "learning_rate": 1.175558546229325e-06, + "loss": 0.7422, + "step": 25300 + }, + { + "epoch": 1.79, + "learning_rate": 1.1678424146976064e-06, + "loss": 0.7391, + "step": 25310 + }, + { + "epoch": 1.79, + "learning_rate": 1.1601509285421009e-06, + "loss": 0.7358, + "step": 25320 + }, + { + "epoch": 1.79, + "learning_rate": 1.1524840978285456e-06, + "loss": 0.7418, + "step": 25330 + }, + { + "epoch": 1.79, + "learning_rate": 1.1448419325904102e-06, + "loss": 0.7392, + "step": 25340 + }, + { + "epoch": 1.79, + "learning_rate": 1.1372244428288904e-06, + "loss": 0.7473, + "step": 25350 + }, + { + "epoch": 1.79, + "learning_rate": 1.1296316385128847e-06, + "loss": 0.7468, + "step": 25360 + }, + { + "epoch": 1.79, + "learning_rate": 1.1220635295789872e-06, + "loss": 0.7424, + "step": 25370 + }, + { + "epoch": 1.79, + "learning_rate": 1.1145201259314797e-06, + "loss": 0.7498, + "step": 25380 + }, + { + "epoch": 1.79, + "learning_rate": 1.1070014374422988e-06, + "loss": 0.7421, + "step": 25390 + }, + { + "epoch": 1.79, + "learning_rate": 1.099507473951056e-06, + "loss": 0.7492, + "step": 25400 + }, + { + "epoch": 1.79, + "learning_rate": 1.0920382452649814e-06, + "loss": 0.746, + "step": 25410 + }, + { + "epoch": 1.8, + "learning_rate": 1.0845937611589564e-06, + "loss": 0.7377, + "step": 25420 + }, + { + "epoch": 1.8, + "learning_rate": 1.0771740313754674e-06, + "loss": 0.7419, + "step": 25430 + }, + { + "epoch": 1.8, + "learning_rate": 1.0697790656246121e-06, + "loss": 0.7442, + "step": 25440 + }, + { + "epoch": 1.8, + "learning_rate": 1.0624088735840755e-06, + "loss": 0.7429, + "step": 25450 + }, + { + "epoch": 1.8, + "learning_rate": 1.055063464899122e-06, + "loss": 0.737, + "step": 25460 + }, + { + "epoch": 1.8, + "learning_rate": 1.0477428491825848e-06, + "loss": 0.7399, + "step": 25470 + }, + { + "epoch": 1.8, + "learning_rate": 1.0404470360148533e-06, + "loss": 0.7374, + "step": 25480 + }, + { + "epoch": 1.8, + "learning_rate": 1.0331760349438435e-06, + "loss": 0.7455, + "step": 25490 + }, + { + "epoch": 1.8, + "learning_rate": 1.0259298554850195e-06, + "loss": 0.7437, + "step": 25500 + }, + { + "epoch": 1.8, + "learning_rate": 1.0187085071213465e-06, + "loss": 0.7442, + "step": 25510 + }, + { + "epoch": 1.8, + "learning_rate": 1.0115119993033096e-06, + "loss": 0.744, + "step": 25520 + }, + { + "epoch": 1.8, + "learning_rate": 1.0043403414488662e-06, + "loss": 0.7413, + "step": 25530 + }, + { + "epoch": 1.8, + "learning_rate": 9.971935429434709e-07, + "loss": 0.7419, + "step": 25540 + }, + { + "epoch": 1.8, + "learning_rate": 9.90071613140029e-07, + "loss": 0.7504, + "step": 25550 + }, + { + "epoch": 1.81, + "learning_rate": 9.82974561358918e-07, + "loss": 0.7532, + "step": 25560 + }, + { + "epoch": 1.81, + "learning_rate": 9.7590239688794e-07, + "loss": 0.7443, + "step": 25570 + }, + { + "epoch": 1.81, + "learning_rate": 9.688551289823422e-07, + "loss": 0.7488, + "step": 25580 + }, + { + "epoch": 1.81, + "learning_rate": 9.618327668647765e-07, + "loss": 0.7493, + "step": 25590 + }, + { + "epoch": 1.81, + "learning_rate": 9.548353197253135e-07, + "loss": 0.7431, + "step": 25600 + }, + { + "epoch": 1.81, + "learning_rate": 9.478627967214104e-07, + "loss": 0.7412, + "step": 25610 + }, + { + "epoch": 1.81, + "learning_rate": 9.409152069779104e-07, + "loss": 0.7416, + "step": 25620 + }, + { + "epoch": 1.81, + "learning_rate": 9.339925595870225e-07, + "loss": 0.743, + "step": 25630 + }, + { + "epoch": 1.81, + "learning_rate": 9.270948636083221e-07, + "loss": 0.7482, + "step": 25640 + }, + { + "epoch": 1.81, + "learning_rate": 9.202221280687196e-07, + "loss": 0.7479, + "step": 25650 + }, + { + "epoch": 1.81, + "learning_rate": 9.133743619624669e-07, + "loss": 0.74, + "step": 25660 + }, + { + "epoch": 1.81, + "learning_rate": 9.065515742511421e-07, + "loss": 0.7431, + "step": 25670 + }, + { + "epoch": 1.81, + "learning_rate": 8.997537738636275e-07, + "loss": 0.7466, + "step": 25680 + }, + { + "epoch": 1.81, + "learning_rate": 8.929809696961044e-07, + "loss": 0.7428, + "step": 25690 + }, + { + "epoch": 1.82, + "learning_rate": 8.862331706120497e-07, + "loss": 0.7492, + "step": 25700 + }, + { + "epoch": 1.82, + "learning_rate": 8.795103854422127e-07, + "loss": 0.7444, + "step": 25710 + }, + { + "epoch": 1.82, + "learning_rate": 8.728126229846001e-07, + "loss": 0.7422, + "step": 25720 + }, + { + "epoch": 1.82, + "learning_rate": 8.66139892004485e-07, + "loss": 0.7455, + "step": 25730 + }, + { + "epoch": 1.82, + "learning_rate": 8.594922012343709e-07, + "loss": 0.7393, + "step": 25740 + }, + { + "epoch": 1.82, + "learning_rate": 8.528695593739988e-07, + "loss": 0.7382, + "step": 25750 + }, + { + "epoch": 1.82, + "learning_rate": 8.462719750903225e-07, + "loss": 0.7391, + "step": 25760 + }, + { + "epoch": 1.82, + "learning_rate": 8.396994570175132e-07, + "loss": 0.7367, + "step": 25770 + }, + { + "epoch": 1.82, + "learning_rate": 8.331520137569238e-07, + "loss": 0.7451, + "step": 25780 + }, + { + "epoch": 1.82, + "learning_rate": 8.266296538771046e-07, + "loss": 0.7361, + "step": 25790 + }, + { + "epoch": 1.82, + "learning_rate": 8.201323859137722e-07, + "loss": 0.7364, + "step": 25800 + }, + { + "epoch": 1.82, + "learning_rate": 8.136602183698094e-07, + "loss": 0.7475, + "step": 25810 + }, + { + "epoch": 1.82, + "learning_rate": 8.072131597152499e-07, + "loss": 0.7416, + "step": 25820 + }, + { + "epoch": 1.82, + "learning_rate": 8.007912183872712e-07, + "loss": 0.7433, + "step": 25830 + }, + { + "epoch": 1.83, + "learning_rate": 7.943944027901684e-07, + "loss": 0.7406, + "step": 25840 + }, + { + "epoch": 1.83, + "learning_rate": 7.880227212953717e-07, + "loss": 0.7435, + "step": 25850 + }, + { + "epoch": 1.83, + "learning_rate": 7.81676182241402e-07, + "loss": 0.7407, + "step": 25860 + }, + { + "epoch": 1.83, + "learning_rate": 7.753547939338912e-07, + "loss": 0.7368, + "step": 25870 + }, + { + "epoch": 1.83, + "learning_rate": 7.690585646455439e-07, + "loss": 0.7374, + "step": 25880 + }, + { + "epoch": 1.83, + "learning_rate": 7.627875026161514e-07, + "loss": 0.7376, + "step": 25890 + }, + { + "epoch": 1.83, + "learning_rate": 7.565416160525596e-07, + "loss": 0.7456, + "step": 25900 + }, + { + "epoch": 1.83, + "learning_rate": 7.503209131286727e-07, + "loss": 0.7358, + "step": 25910 + }, + { + "epoch": 1.83, + "learning_rate": 7.441254019854316e-07, + "loss": 0.7419, + "step": 25920 + }, + { + "epoch": 1.83, + "learning_rate": 7.379550907308219e-07, + "loss": 0.7456, + "step": 25930 + }, + { + "epoch": 1.83, + "learning_rate": 7.318099874398355e-07, + "loss": 0.7345, + "step": 25940 + }, + { + "epoch": 1.83, + "learning_rate": 7.256901001544836e-07, + "loss": 0.737, + "step": 25950 + }, + { + "epoch": 1.83, + "learning_rate": 7.195954368837732e-07, + "loss": 0.7425, + "step": 25960 + }, + { + "epoch": 1.83, + "learning_rate": 7.135260056037086e-07, + "loss": 0.733, + "step": 25970 + }, + { + "epoch": 1.84, + "learning_rate": 7.074818142572604e-07, + "loss": 0.7378, + "step": 25980 + }, + { + "epoch": 1.84, + "learning_rate": 7.014628707543836e-07, + "loss": 0.7504, + "step": 25990 + }, + { + "epoch": 1.84, + "learning_rate": 6.954691829719773e-07, + "loss": 0.7406, + "step": 26000 + }, + { + "epoch": 1.84, + "learning_rate": 6.895007587538982e-07, + "loss": 0.7431, + "step": 26010 + }, + { + "epoch": 1.84, + "learning_rate": 6.835576059109406e-07, + "loss": 0.7386, + "step": 26020 + }, + { + "epoch": 1.84, + "learning_rate": 6.776397322208205e-07, + "loss": 0.7413, + "step": 26030 + }, + { + "epoch": 1.84, + "learning_rate": 6.717471454281721e-07, + "loss": 0.7458, + "step": 26040 + }, + { + "epoch": 1.84, + "learning_rate": 6.658798532445465e-07, + "loss": 0.7445, + "step": 26050 + }, + { + "epoch": 1.84, + "learning_rate": 6.600378633483795e-07, + "loss": 0.736, + "step": 26060 + }, + { + "epoch": 1.84, + "learning_rate": 6.542211833850043e-07, + "loss": 0.7434, + "step": 26070 + }, + { + "epoch": 1.84, + "learning_rate": 6.484298209666229e-07, + "loss": 0.7394, + "step": 26080 + }, + { + "epoch": 1.84, + "learning_rate": 6.426637836723126e-07, + "loss": 0.738, + "step": 26090 + }, + { + "epoch": 1.84, + "learning_rate": 6.369230790480041e-07, + "loss": 0.735, + "step": 26100 + }, + { + "epoch": 1.84, + "learning_rate": 6.312077146064743e-07, + "loss": 0.7378, + "step": 26110 + }, + { + "epoch": 1.85, + "learning_rate": 6.255176978273358e-07, + "loss": 0.7453, + "step": 26120 + }, + { + "epoch": 1.85, + "learning_rate": 6.198530361570387e-07, + "loss": 0.7322, + "step": 26130 + }, + { + "epoch": 1.85, + "learning_rate": 6.142137370088397e-07, + "loss": 0.7367, + "step": 26140 + }, + { + "epoch": 1.85, + "learning_rate": 6.085998077628152e-07, + "loss": 0.7477, + "step": 26150 + }, + { + "epoch": 1.85, + "learning_rate": 6.030112557658264e-07, + "loss": 0.738, + "step": 26160 + }, + { + "epoch": 1.85, + "learning_rate": 5.974480883315425e-07, + "loss": 0.7499, + "step": 26170 + }, + { + "epoch": 1.85, + "learning_rate": 5.919103127403891e-07, + "loss": 0.745, + "step": 26180 + }, + { + "epoch": 1.85, + "learning_rate": 5.863979362395844e-07, + "loss": 0.746, + "step": 26190 + }, + { + "epoch": 1.85, + "learning_rate": 5.80910966043089e-07, + "loss": 0.7375, + "step": 26200 + }, + { + "epoch": 1.85, + "learning_rate": 5.754494093316276e-07, + "loss": 0.7428, + "step": 26210 + }, + { + "epoch": 1.85, + "learning_rate": 5.700132732526586e-07, + "loss": 0.7325, + "step": 26220 + }, + { + "epoch": 1.85, + "learning_rate": 5.646025649203801e-07, + "loss": 0.7417, + "step": 26230 + }, + { + "epoch": 1.85, + "learning_rate": 5.592172914157057e-07, + "loss": 0.7385, + "step": 26240 + }, + { + "epoch": 1.85, + "learning_rate": 5.538574597862689e-07, + "loss": 0.7347, + "step": 26250 + }, + { + "epoch": 1.85, + "learning_rate": 5.48523077046399e-07, + "loss": 0.7477, + "step": 26260 + }, + { + "epoch": 1.86, + "learning_rate": 5.432141501771316e-07, + "loss": 0.7356, + "step": 26270 + }, + { + "epoch": 1.86, + "learning_rate": 5.379306861261824e-07, + "loss": 0.7392, + "step": 26280 + }, + { + "epoch": 1.86, + "learning_rate": 5.326726918079472e-07, + "loss": 0.7339, + "step": 26290 + }, + { + "epoch": 1.86, + "learning_rate": 5.27440174103484e-07, + "loss": 0.7443, + "step": 26300 + }, + { + "epoch": 1.86, + "learning_rate": 5.222331398605174e-07, + "loss": 0.7421, + "step": 26310 + }, + { + "epoch": 1.86, + "learning_rate": 5.170515958934185e-07, + "loss": 0.7405, + "step": 26320 + }, + { + "epoch": 1.86, + "learning_rate": 5.118955489832012e-07, + "loss": 0.7418, + "step": 26330 + }, + { + "epoch": 1.86, + "learning_rate": 5.067650058775076e-07, + "loss": 0.7438, + "step": 26340 + }, + { + "epoch": 1.86, + "learning_rate": 5.016599732906091e-07, + "loss": 0.7483, + "step": 26350 + }, + { + "epoch": 1.86, + "learning_rate": 4.965804579033861e-07, + "loss": 0.7471, + "step": 26360 + }, + { + "epoch": 1.86, + "learning_rate": 4.915264663633301e-07, + "loss": 0.7394, + "step": 26370 + }, + { + "epoch": 1.86, + "learning_rate": 4.864980052845281e-07, + "loss": 0.7347, + "step": 26380 + }, + { + "epoch": 1.86, + "learning_rate": 4.814950812476559e-07, + "loss": 0.7383, + "step": 26390 + }, + { + "epoch": 1.86, + "learning_rate": 4.765177007999677e-07, + "loss": 0.7392, + "step": 26400 + }, + { + "epoch": 1.87, + "learning_rate": 4.7156587045528834e-07, + "loss": 0.7341, + "step": 26410 + }, + { + "epoch": 1.87, + "learning_rate": 4.666395966940096e-07, + "loss": 0.7451, + "step": 26420 + }, + { + "epoch": 1.87, + "learning_rate": 4.617388859630767e-07, + "loss": 0.7451, + "step": 26430 + }, + { + "epoch": 1.87, + "learning_rate": 4.5686374467597936e-07, + "loss": 0.7447, + "step": 26440 + }, + { + "epoch": 1.87, + "learning_rate": 4.5201417921274524e-07, + "loss": 0.7386, + "step": 26450 + }, + { + "epoch": 1.87, + "learning_rate": 4.47190195919931e-07, + "loss": 0.7451, + "step": 26460 + }, + { + "epoch": 1.87, + "learning_rate": 4.4239180111061763e-07, + "loss": 0.7415, + "step": 26470 + }, + { + "epoch": 1.87, + "learning_rate": 4.37619001064391e-07, + "loss": 0.7482, + "step": 26480 + }, + { + "epoch": 1.87, + "learning_rate": 4.3287180202735033e-07, + "loss": 0.7386, + "step": 26490 + }, + { + "epoch": 1.87, + "learning_rate": 4.2815021021208604e-07, + "loss": 0.7418, + "step": 26500 + }, + { + "epoch": 1.87, + "learning_rate": 4.234542317976753e-07, + "loss": 0.749, + "step": 26510 + }, + { + "epoch": 1.87, + "learning_rate": 4.187838729296845e-07, + "loss": 0.7392, + "step": 26520 + }, + { + "epoch": 1.87, + "learning_rate": 4.141391397201422e-07, + "loss": 0.7388, + "step": 26530 + }, + { + "epoch": 1.87, + "learning_rate": 4.0952003824754396e-07, + "loss": 0.7429, + "step": 26540 + }, + { + "epoch": 1.88, + "learning_rate": 4.0492657455684314e-07, + "loss": 0.7443, + "step": 26550 + }, + { + "epoch": 1.88, + "learning_rate": 4.003587546594401e-07, + "loss": 0.7467, + "step": 26560 + }, + { + "epoch": 1.88, + "learning_rate": 3.9581658453317294e-07, + "loss": 0.7502, + "step": 26570 + }, + { + "epoch": 1.88, + "learning_rate": 3.913000701223202e-07, + "loss": 0.7382, + "step": 26580 + }, + { + "epoch": 1.88, + "learning_rate": 3.8680921733757373e-07, + "loss": 0.7415, + "step": 26590 + }, + { + "epoch": 1.88, + "learning_rate": 3.823440320560545e-07, + "loss": 0.7477, + "step": 26600 + }, + { + "epoch": 1.88, + "learning_rate": 3.7790452012128167e-07, + "loss": 0.7433, + "step": 26610 + }, + { + "epoch": 1.88, + "learning_rate": 3.734906873431876e-07, + "loss": 0.7484, + "step": 26620 + }, + { + "epoch": 1.88, + "learning_rate": 3.6910253949808517e-07, + "loss": 0.7384, + "step": 26630 + }, + { + "epoch": 1.88, + "learning_rate": 3.6474008232868727e-07, + "loss": 0.7437, + "step": 26640 + }, + { + "epoch": 1.88, + "learning_rate": 3.604033215440739e-07, + "loss": 0.7392, + "step": 26650 + }, + { + "epoch": 1.88, + "learning_rate": 3.5609226281970497e-07, + "loss": 0.739, + "step": 26660 + }, + { + "epoch": 1.88, + "learning_rate": 3.51806911797401e-07, + "loss": 0.7391, + "step": 26670 + }, + { + "epoch": 1.88, + "learning_rate": 3.475472740853403e-07, + "loss": 0.7352, + "step": 26680 + }, + { + "epoch": 1.89, + "learning_rate": 3.433133552580503e-07, + "loss": 0.7362, + "step": 26690 + }, + { + "epoch": 1.89, + "learning_rate": 3.391051608563989e-07, + "loss": 0.7474, + "step": 26700 + }, + { + "epoch": 1.89, + "learning_rate": 3.3492269638759176e-07, + "loss": 0.7354, + "step": 26710 + }, + { + "epoch": 1.89, + "learning_rate": 3.307659673251595e-07, + "loss": 0.7399, + "step": 26720 + }, + { + "epoch": 1.89, + "learning_rate": 3.266349791089529e-07, + "loss": 0.7369, + "step": 26730 + }, + { + "epoch": 1.89, + "learning_rate": 3.225297371451408e-07, + "loss": 0.7404, + "step": 26740 + }, + { + "epoch": 1.89, + "learning_rate": 3.1845024680619007e-07, + "loss": 0.7451, + "step": 26750 + }, + { + "epoch": 1.89, + "learning_rate": 3.1439651343087683e-07, + "loss": 0.7424, + "step": 26760 + }, + { + "epoch": 1.89, + "learning_rate": 3.103685423242597e-07, + "loss": 0.7387, + "step": 26770 + }, + { + "epoch": 1.89, + "learning_rate": 3.0636633875769094e-07, + "loss": 0.7374, + "step": 26780 + }, + { + "epoch": 1.89, + "learning_rate": 3.023899079687942e-07, + "loss": 0.7432, + "step": 26790 + }, + { + "epoch": 1.89, + "learning_rate": 2.9843925516147123e-07, + "loss": 0.7372, + "step": 26800 + }, + { + "epoch": 1.89, + "learning_rate": 2.945143855058796e-07, + "loss": 0.74, + "step": 26810 + }, + { + "epoch": 1.89, + "learning_rate": 2.906153041384441e-07, + "loss": 0.7438, + "step": 26820 + }, + { + "epoch": 1.9, + "learning_rate": 2.8674201616183617e-07, + "loss": 0.7433, + "step": 26830 + }, + { + "epoch": 1.9, + "learning_rate": 2.8289452664497453e-07, + "loss": 0.7367, + "step": 26840 + }, + { + "epoch": 1.9, + "learning_rate": 2.790728406230092e-07, + "loss": 0.7389, + "step": 26850 + }, + { + "epoch": 1.9, + "learning_rate": 2.752769630973329e-07, + "loss": 0.7485, + "step": 26860 + }, + { + "epoch": 1.9, + "learning_rate": 2.715068990355496e-07, + "loss": 0.7371, + "step": 26870 + }, + { + "epoch": 1.9, + "learning_rate": 2.6776265337149277e-07, + "loss": 0.7361, + "step": 26880 + }, + { + "epoch": 1.9, + "learning_rate": 2.640442310052027e-07, + "loss": 0.74, + "step": 26890 + }, + { + "epoch": 1.9, + "learning_rate": 2.6035163680292464e-07, + "loss": 0.7406, + "step": 26900 + }, + { + "epoch": 1.9, + "learning_rate": 2.5668487559710184e-07, + "loss": 0.7445, + "step": 26910 + }, + { + "epoch": 1.9, + "learning_rate": 2.530439521863781e-07, + "loss": 0.7422, + "step": 26920 + }, + { + "epoch": 1.9, + "learning_rate": 2.494288713355708e-07, + "loss": 0.7398, + "step": 26930 + }, + { + "epoch": 1.9, + "learning_rate": 2.458396377756955e-07, + "loss": 0.7338, + "step": 26940 + }, + { + "epoch": 1.9, + "learning_rate": 2.4227625620391936e-07, + "loss": 0.7503, + "step": 26950 + }, + { + "epoch": 1.9, + "learning_rate": 2.3873873128359424e-07, + "loss": 0.7438, + "step": 26960 + }, + { + "epoch": 1.91, + "learning_rate": 2.3522706764422808e-07, + "loss": 0.7441, + "step": 26970 + }, + { + "epoch": 1.91, + "learning_rate": 2.3174126988148692e-07, + "loss": 0.7454, + "step": 26980 + }, + { + "epoch": 1.91, + "learning_rate": 2.2828134255718171e-07, + "loss": 0.744, + "step": 26990 + }, + { + "epoch": 1.91, + "learning_rate": 2.2484729019927265e-07, + "loss": 0.7411, + "step": 27000 + }, + { + "epoch": 1.91, + "learning_rate": 2.2143911730185152e-07, + "loss": 0.7417, + "step": 27010 + }, + { + "epoch": 1.91, + "learning_rate": 2.1805682832515053e-07, + "loss": 0.7407, + "step": 27020 + }, + { + "epoch": 1.91, + "learning_rate": 2.147004276955178e-07, + "loss": 0.7449, + "step": 27030 + }, + { + "epoch": 1.91, + "learning_rate": 2.1136991980543086e-07, + "loss": 0.7399, + "step": 27040 + }, + { + "epoch": 1.91, + "learning_rate": 2.0806530901347655e-07, + "loss": 0.7404, + "step": 27050 + }, + { + "epoch": 1.91, + "learning_rate": 2.047865996443532e-07, + "loss": 0.7438, + "step": 27060 + }, + { + "epoch": 1.91, + "learning_rate": 2.0153379598885748e-07, + "loss": 0.7461, + "step": 27070 + }, + { + "epoch": 1.91, + "learning_rate": 1.9830690230389082e-07, + "loss": 0.7495, + "step": 27080 + }, + { + "epoch": 1.91, + "learning_rate": 1.9510592281243968e-07, + "loss": 0.7421, + "step": 27090 + }, + { + "epoch": 1.91, + "learning_rate": 1.9193086170358643e-07, + "loss": 0.7392, + "step": 27100 + }, + { + "epoch": 1.91, + "learning_rate": 1.8878172313248067e-07, + "loss": 0.7402, + "step": 27110 + }, + { + "epoch": 1.92, + "learning_rate": 1.8565851122035904e-07, + "loss": 0.7453, + "step": 27120 + }, + { + "epoch": 1.92, + "learning_rate": 1.825612300545232e-07, + "loss": 0.735, + "step": 27130 + }, + { + "epoch": 1.92, + "learning_rate": 1.7948988368834408e-07, + "loss": 0.7369, + "step": 27140 + }, + { + "epoch": 1.92, + "learning_rate": 1.7644447614124427e-07, + "loss": 0.7383, + "step": 27150 + }, + { + "epoch": 1.92, + "learning_rate": 1.7342501139870904e-07, + "loss": 0.748, + "step": 27160 + }, + { + "epoch": 1.92, + "learning_rate": 1.704314934122686e-07, + "loss": 0.7387, + "step": 27170 + }, + { + "epoch": 1.92, + "learning_rate": 1.674639260994937e-07, + "loss": 0.7493, + "step": 27180 + }, + { + "epoch": 1.92, + "learning_rate": 1.6452231334399993e-07, + "loss": 0.7348, + "step": 27190 + }, + { + "epoch": 1.92, + "learning_rate": 1.616066589954346e-07, + "loss": 0.7424, + "step": 27200 + }, + { + "epoch": 1.92, + "learning_rate": 1.587169668694699e-07, + "loss": 0.7419, + "step": 27210 + }, + { + "epoch": 1.92, + "learning_rate": 1.5585324074780972e-07, + "loss": 0.7416, + "step": 27220 + }, + { + "epoch": 1.92, + "learning_rate": 1.5301548437816726e-07, + "loss": 0.7357, + "step": 27230 + }, + { + "epoch": 1.92, + "learning_rate": 1.502037014742763e-07, + "loss": 0.7384, + "step": 27240 + }, + { + "epoch": 1.92, + "learning_rate": 1.474178957158756e-07, + "loss": 0.7415, + "step": 27250 + }, + { + "epoch": 1.93, + "learning_rate": 1.4465807074870885e-07, + "loss": 0.7404, + "step": 27260 + }, + { + "epoch": 1.93, + "learning_rate": 1.419242301845225e-07, + "loss": 0.7369, + "step": 27270 + }, + { + "epoch": 1.93, + "learning_rate": 1.3921637760105022e-07, + "loss": 0.7335, + "step": 27280 + }, + { + "epoch": 1.93, + "learning_rate": 1.3653451654202622e-07, + "loss": 0.7362, + "step": 27290 + }, + { + "epoch": 1.93, + "learning_rate": 1.3387865051715854e-07, + "loss": 0.7372, + "step": 27300 + }, + { + "epoch": 1.93, + "learning_rate": 1.312487830021447e-07, + "loss": 0.7433, + "step": 27310 + }, + { + "epoch": 1.93, + "learning_rate": 1.286449174386517e-07, + "loss": 0.7417, + "step": 27320 + }, + { + "epoch": 1.93, + "learning_rate": 1.2606705723432476e-07, + "loss": 0.7447, + "step": 27330 + }, + { + "epoch": 1.93, + "learning_rate": 1.2351520576276755e-07, + "loss": 0.7474, + "step": 27340 + }, + { + "epoch": 1.93, + "learning_rate": 1.209893663635575e-07, + "loss": 0.7361, + "step": 27350 + }, + { + "epoch": 1.93, + "learning_rate": 1.1848954234222166e-07, + "loss": 0.7401, + "step": 27360 + }, + { + "epoch": 1.93, + "learning_rate": 1.1601573697024526e-07, + "loss": 0.742, + "step": 27370 + }, + { + "epoch": 1.93, + "learning_rate": 1.1356795348506088e-07, + "loss": 0.7335, + "step": 27380 + }, + { + "epoch": 1.93, + "learning_rate": 1.1114619509004831e-07, + "loss": 0.7407, + "step": 27390 + }, + { + "epoch": 1.94, + "learning_rate": 1.0875046495453012e-07, + "loss": 0.7432, + "step": 27400 + }, + { + "epoch": 1.94, + "learning_rate": 1.0638076621376059e-07, + "loss": 0.7333, + "step": 27410 + }, + { + "epoch": 1.94, + "learning_rate": 1.0403710196893235e-07, + "loss": 0.7341, + "step": 27420 + }, + { + "epoch": 1.94, + "learning_rate": 1.0171947528716753e-07, + "loss": 0.7447, + "step": 27430 + }, + { + "epoch": 1.94, + "learning_rate": 9.942788920150881e-08, + "loss": 0.7455, + "step": 27440 + }, + { + "epoch": 1.94, + "learning_rate": 9.716234671092173e-08, + "loss": 0.7393, + "step": 27450 + }, + { + "epoch": 1.94, + "learning_rate": 9.492285078029017e-08, + "loss": 0.7416, + "step": 27460 + }, + { + "epoch": 1.94, + "learning_rate": 9.270940434041198e-08, + "loss": 0.7408, + "step": 27470 + }, + { + "epoch": 1.94, + "learning_rate": 9.052201028799224e-08, + "loss": 0.7361, + "step": 27480 + }, + { + "epoch": 1.94, + "learning_rate": 8.836067148564109e-08, + "loss": 0.7428, + "step": 27490 + }, + { + "epoch": 1.94, + "learning_rate": 8.622539076187153e-08, + "loss": 0.7449, + "step": 27500 + }, + { + "epoch": 1.94, + "learning_rate": 8.411617091109714e-08, + "loss": 0.7376, + "step": 27510 + }, + { + "epoch": 1.94, + "learning_rate": 8.203301469362102e-08, + "loss": 0.7401, + "step": 27520 + }, + { + "epoch": 1.94, + "learning_rate": 7.997592483564465e-08, + "loss": 0.7405, + "step": 27530 + }, + { + "epoch": 1.95, + "learning_rate": 7.794490402924793e-08, + "loss": 0.7414, + "step": 27540 + }, + { + "epoch": 1.95, + "learning_rate": 7.593995493240025e-08, + "loss": 0.7392, + "step": 27550 + }, + { + "epoch": 1.95, + "learning_rate": 7.396108016894943e-08, + "loss": 0.7384, + "step": 27560 + }, + { + "epoch": 1.95, + "learning_rate": 7.200828232862388e-08, + "loss": 0.749, + "step": 27570 + }, + { + "epoch": 1.95, + "learning_rate": 7.008156396701716e-08, + "loss": 0.7342, + "step": 27580 + }, + { + "epoch": 1.95, + "learning_rate": 6.818092760560113e-08, + "loss": 0.7367, + "step": 27590 + }, + { + "epoch": 1.95, + "learning_rate": 6.630637573171061e-08, + "loss": 0.7325, + "step": 27600 + }, + { + "epoch": 1.95, + "learning_rate": 6.445791079854547e-08, + "loss": 0.746, + "step": 27610 + }, + { + "epoch": 1.95, + "learning_rate": 6.263553522516396e-08, + "loss": 0.7448, + "step": 27620 + }, + { + "epoch": 1.95, + "learning_rate": 6.083925139648727e-08, + "loss": 0.7466, + "step": 27630 + }, + { + "epoch": 1.95, + "learning_rate": 5.906906166328164e-08, + "loss": 0.7364, + "step": 27640 + }, + { + "epoch": 1.95, + "learning_rate": 5.732496834217172e-08, + "loss": 0.75, + "step": 27650 + }, + { + "epoch": 1.95, + "learning_rate": 5.560697371562507e-08, + "loss": 0.741, + "step": 27660 + }, + { + "epoch": 1.95, + "learning_rate": 5.391508003195878e-08, + "loss": 0.7501, + "step": 27670 + }, + { + "epoch": 1.96, + "learning_rate": 5.224928950533059e-08, + "loss": 0.7428, + "step": 27680 + }, + { + "epoch": 1.96, + "learning_rate": 5.0609604315736693e-08, + "loss": 0.7423, + "step": 27690 + }, + { + "epoch": 1.96, + "learning_rate": 4.8996026609007265e-08, + "loss": 0.7412, + "step": 27700 + }, + { + "epoch": 1.96, + "learning_rate": 4.740855849681314e-08, + "loss": 0.7376, + "step": 27710 + }, + { + "epoch": 1.96, + "learning_rate": 4.5847202056645836e-08, + "loss": 0.7412, + "step": 27720 + }, + { + "epoch": 1.96, + "learning_rate": 4.4311959331833075e-08, + "loss": 0.7412, + "step": 27730 + }, + { + "epoch": 1.96, + "learning_rate": 4.280283233152327e-08, + "loss": 0.7395, + "step": 27740 + }, + { + "epoch": 1.96, + "learning_rate": 4.131982303069437e-08, + "loss": 0.7357, + "step": 27750 + }, + { + "epoch": 1.96, + "learning_rate": 3.98629333701317e-08, + "loss": 0.7429, + "step": 27760 + }, + { + "epoch": 1.96, + "learning_rate": 3.8432165256454543e-08, + "loss": 0.7428, + "step": 27770 + }, + { + "epoch": 1.96, + "learning_rate": 3.70275205620807e-08, + "loss": 0.7393, + "step": 27780 + }, + { + "epoch": 1.96, + "learning_rate": 3.564900112525527e-08, + "loss": 0.7445, + "step": 27790 + }, + { + "epoch": 1.96, + "learning_rate": 3.429660875002405e-08, + "loss": 0.7409, + "step": 27800 + }, + { + "epoch": 1.96, + "learning_rate": 3.297034520624243e-08, + "loss": 0.7491, + "step": 27810 + }, + { + "epoch": 1.97, + "learning_rate": 3.1670212229575336e-08, + "loss": 0.7507, + "step": 27820 + }, + { + "epoch": 1.97, + "learning_rate": 3.03962115214862e-08, + "loss": 0.7378, + "step": 27830 + }, + { + "epoch": 1.97, + "learning_rate": 2.914834474924133e-08, + "loss": 0.7389, + "step": 27840 + }, + { + "epoch": 1.97, + "learning_rate": 2.7926613545907753e-08, + "loss": 0.7383, + "step": 27850 + }, + { + "epoch": 1.97, + "learning_rate": 2.6731019510348732e-08, + "loss": 0.7411, + "step": 27860 + }, + { + "epoch": 1.97, + "learning_rate": 2.5561564207217115e-08, + "loss": 0.7375, + "step": 27870 + }, + { + "epoch": 1.97, + "learning_rate": 2.4418249166966447e-08, + "loss": 0.7404, + "step": 27880 + }, + { + "epoch": 1.97, + "learning_rate": 2.3301075885835413e-08, + "loss": 0.7414, + "step": 27890 + }, + { + "epoch": 1.97, + "learning_rate": 2.221004582585007e-08, + "loss": 0.7382, + "step": 27900 + }, + { + "epoch": 1.97, + "learning_rate": 2.11451604148305e-08, + "loss": 0.741, + "step": 27910 + }, + { + "epoch": 1.97, + "learning_rate": 2.010642104637528e-08, + "loss": 0.7394, + "step": 27920 + }, + { + "epoch": 1.97, + "learning_rate": 1.90938290798659e-08, + "loss": 0.733, + "step": 27930 + }, + { + "epoch": 1.97, + "learning_rate": 1.8107385840469006e-08, + "loss": 0.7422, + "step": 27940 + }, + { + "epoch": 1.97, + "learning_rate": 1.7147092619127504e-08, + "loss": 0.733, + "step": 27950 + }, + { + "epoch": 1.98, + "learning_rate": 1.621295067256723e-08, + "loss": 0.7319, + "step": 27960 + }, + { + "epoch": 1.98, + "learning_rate": 1.53049612232814e-08, + "loss": 0.7442, + "step": 27970 + }, + { + "epoch": 1.98, + "learning_rate": 1.4423125459543941e-08, + "loss": 0.7435, + "step": 27980 + }, + { + "epoch": 1.98, + "learning_rate": 1.3567444535402818e-08, + "loss": 0.7445, + "step": 27990 + }, + { + "epoch": 1.98, + "learning_rate": 1.2737919570675606e-08, + "loss": 0.7415, + "step": 28000 + }, + { + "epoch": 1.98, + "learning_rate": 1.1934551650947257e-08, + "loss": 0.74, + "step": 28010 + }, + { + "epoch": 1.98, + "learning_rate": 1.1157341827574552e-08, + "loss": 0.7343, + "step": 28020 + }, + { + "epoch": 1.98, + "learning_rate": 1.0406291117683875e-08, + "loss": 0.7388, + "step": 28030 + }, + { + "epoch": 1.98, + "learning_rate": 9.68140050416011e-09, + "loss": 0.7415, + "step": 28040 + }, + { + "epoch": 1.98, + "learning_rate": 8.982670935659966e-09, + "loss": 0.7397, + "step": 28050 + }, + { + "epoch": 1.98, + "learning_rate": 8.310103326603092e-09, + "loss": 0.7381, + "step": 28060 + }, + { + "epoch": 1.98, + "learning_rate": 7.663698557163201e-09, + "loss": 0.7388, + "step": 28070 + }, + { + "epoch": 1.98, + "learning_rate": 7.043457473285831e-09, + "loss": 0.7467, + "step": 28080 + }, + { + "epoch": 1.98, + "learning_rate": 6.449380886668355e-09, + "loss": 0.7417, + "step": 28090 + }, + { + "epoch": 1.98, + "learning_rate": 5.881469574775533e-09, + "loss": 0.7335, + "step": 28100 + }, + { + "epoch": 1.99, + "learning_rate": 5.339724280817304e-09, + "loss": 0.7385, + "step": 28110 + }, + { + "epoch": 1.99, + "learning_rate": 4.82414571377543e-09, + "loss": 0.7382, + "step": 28120 + }, + { + "epoch": 1.99, + "learning_rate": 4.334734548374631e-09, + "loss": 0.739, + "step": 28130 + }, + { + "epoch": 1.99, + "learning_rate": 3.871491425102569e-09, + "loss": 0.7399, + "step": 28140 + }, + { + "epoch": 1.99, + "learning_rate": 3.434416950198749e-09, + "loss": 0.7416, + "step": 28150 + }, + { + "epoch": 1.99, + "learning_rate": 3.023511695658954e-09, + "loss": 0.7326, + "step": 28160 + }, + { + "epoch": 1.99, + "learning_rate": 2.6387761992241467e-09, + "loss": 0.7333, + "step": 28170 + }, + { + "epoch": 1.99, + "learning_rate": 2.280210964393792e-09, + "loss": 0.738, + "step": 28180 + }, + { + "epoch": 1.99, + "learning_rate": 1.947816460419194e-09, + "loss": 0.7372, + "step": 28190 + }, + { + "epoch": 1.99, + "learning_rate": 1.6415931222990567e-09, + "loss": 0.7342, + "step": 28200 + }, + { + "epoch": 1.99, + "learning_rate": 1.3615413507839237e-09, + "loss": 0.7401, + "step": 28210 + }, + { + "epoch": 1.99, + "learning_rate": 1.1076615123717381e-09, + "loss": 0.7416, + "step": 28220 + }, + { + "epoch": 1.99, + "learning_rate": 8.79953939314504e-10, + "loss": 0.7394, + "step": 28230 + }, + { + "epoch": 1.99, + "learning_rate": 6.78418929607183e-10, + "loss": 0.7403, + "step": 28240 + }, + { + "epoch": 2.0, + "learning_rate": 5.030567469965775e-10, + "loss": 0.7358, + "step": 28250 + }, + { + "epoch": 2.0, + "learning_rate": 3.538676209746683e-10, + "loss": 0.7399, + "step": 28260 + }, + { + "epoch": 2.0, + "learning_rate": 2.308517467874971e-10, + "loss": 0.7328, + "step": 28270 + }, + { + "epoch": 2.0, + "learning_rate": 1.3400928542184333e-10, + "loss": 0.7416, + "step": 28280 + }, + { + "epoch": 2.0, + "learning_rate": 6.334036361410612e-11, + "loss": 0.7446, + "step": 28290 + }, + { + "epoch": 2.0, + "learning_rate": 1.884507384808387e-11, + "loss": 0.7405, + "step": 28300 + }, + { + "epoch": 2.0, + "learning_rate": 5.234743527537944e-13, + "loss": 0.7479, + "step": 28310 + }, + { + "epoch": 2.0, + "step": 28312, + "total_flos": 4.0304188158043765e+23, + "train_loss": 0.8292061477166914, + "train_runtime": 175162.7697, + "train_samples_per_second": 41.38, + "train_steps_per_second": 0.162 + } + ], + "logging_steps": 10, + "max_steps": 28312, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 3000, + "total_flos": 4.0304188158043765e+23, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}