diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,17015 +1,875 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.9998763851485586, + "epoch": 4.999690962871396, "eval_steps": 500, - "global_step": 28312, + "global_step": 70780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0, - "learning_rate": 4.7058823529411767e-07, - "loss": 4.9244, - "step": 10 - }, - { - "epoch": 0.0, - "learning_rate": 9.411764705882353e-07, - "loss": 4.5069, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 1.4117647058823531e-06, - "loss": 3.6782, - "step": 30 - }, - { - "epoch": 0.0, - "learning_rate": 1.8823529411764707e-06, - "loss": 2.9642, - "step": 40 - }, - { - "epoch": 0.0, - "learning_rate": 2.3529411764705885e-06, - "loss": 2.5767, - "step": 50 - }, - { - "epoch": 0.0, - "learning_rate": 2.8235294117647062e-06, - "loss": 2.401, - "step": 60 - }, - { - "epoch": 0.0, - "learning_rate": 3.2941176470588236e-06, - "loss": 2.3275, - "step": 70 - }, - { - "epoch": 0.01, - "learning_rate": 3.7647058823529414e-06, - "loss": 2.2191, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 4.235294117647059e-06, - "loss": 2.0439, - "step": 90 - }, - { - "epoch": 0.01, - "learning_rate": 4.705882352941177e-06, - "loss": 1.7901, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 5.176470588235295e-06, - "loss": 1.4897, - "step": 110 - }, - { - "epoch": 0.01, - "learning_rate": 5.6470588235294125e-06, - "loss": 1.251, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 6.11764705882353e-06, - "loss": 1.1331, - "step": 130 - }, - { - "epoch": 0.01, - "learning_rate": 6.588235294117647e-06, - "loss": 1.1103, - "step": 140 - }, - { - "epoch": 0.01, - "learning_rate": 7.058823529411766e-06, - "loss": 1.1048, - "step": 150 - }, - { - "epoch": 0.01, - "learning_rate": 7.529411764705883e-06, - "loss": 1.0945, - "step": 160 - }, - { - "epoch": 0.01, - "learning_rate": 8.000000000000001e-06, - "loss": 1.0806, - "step": 170 - }, - { - "epoch": 0.01, - "learning_rate": 8.470588235294118e-06, - "loss": 1.0726, - "step": 180 - }, - { - "epoch": 0.01, - "learning_rate": 8.941176470588237e-06, - "loss": 1.0658, - "step": 190 - }, - { - "epoch": 0.01, - "learning_rate": 9.411764705882354e-06, - "loss": 1.0578, - "step": 200 - }, - { - "epoch": 0.01, - "learning_rate": 9.882352941176472e-06, - "loss": 1.0535, - "step": 210 - }, - { - "epoch": 0.02, - "learning_rate": 1.035294117647059e-05, - "loss": 1.0402, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 1.0823529411764706e-05, - "loss": 1.047, - "step": 230 - }, - { - "epoch": 0.02, - "learning_rate": 1.1294117647058825e-05, - "loss": 1.0332, - "step": 240 - }, - { - "epoch": 0.02, - "learning_rate": 1.1764705882352942e-05, - "loss": 1.0275, - "step": 250 - }, - { - "epoch": 0.02, - "learning_rate": 1.223529411764706e-05, - "loss": 1.0338, - "step": 260 - }, - { - "epoch": 0.02, - "learning_rate": 1.2705882352941177e-05, - "loss": 1.0286, - "step": 270 - }, - { - "epoch": 0.02, - "learning_rate": 1.3176470588235294e-05, - "loss": 1.0248, - "step": 280 - }, - { - "epoch": 0.02, - "learning_rate": 1.3647058823529413e-05, - "loss": 1.0303, - "step": 290 - }, - { - "epoch": 0.02, - "learning_rate": 1.4117647058823532e-05, - "loss": 1.0269, - "step": 300 - }, - { - "epoch": 0.02, - "learning_rate": 1.4588235294117647e-05, - "loss": 1.019, - "step": 310 - }, - { - "epoch": 0.02, - "learning_rate": 1.5058823529411765e-05, - "loss": 1.0283, - "step": 320 - }, - { - "epoch": 0.02, - "learning_rate": 1.5529411764705882e-05, - "loss": 1.0178, - "step": 330 - }, - { - "epoch": 0.02, - "learning_rate": 1.6000000000000003e-05, - "loss": 1.0279, - "step": 340 - }, - { - "epoch": 0.02, - "learning_rate": 1.647058823529412e-05, - "loss": 1.0125, - "step": 350 - }, - { - "epoch": 0.03, - "learning_rate": 1.6941176470588237e-05, - "loss": 1.0162, - "step": 360 - }, - { - "epoch": 0.03, - "learning_rate": 1.7411764705882353e-05, - "loss": 1.0113, - "step": 370 - }, - { - "epoch": 0.03, - "learning_rate": 1.7882352941176474e-05, - "loss": 1.0219, - "step": 380 - }, - { - "epoch": 0.03, - "learning_rate": 1.8352941176470587e-05, - "loss": 1.0163, - "step": 390 - }, - { - "epoch": 0.03, - "learning_rate": 1.8823529411764708e-05, - "loss": 1.0142, - "step": 400 - }, - { - "epoch": 0.03, - "learning_rate": 1.9294117647058825e-05, - "loss": 1.0063, - "step": 410 - }, - { - "epoch": 0.03, - "learning_rate": 1.9764705882352945e-05, - "loss": 0.9995, - "step": 420 - }, - { - "epoch": 0.03, - "learning_rate": 2.023529411764706e-05, - "loss": 1.0093, - "step": 430 - }, - { - "epoch": 0.03, - "learning_rate": 2.070588235294118e-05, - "loss": 1.0074, - "step": 440 - }, - { - "epoch": 0.03, - "learning_rate": 2.1176470588235296e-05, - "loss": 1.003, - "step": 450 - }, - { - "epoch": 0.03, - "learning_rate": 2.1647058823529413e-05, - "loss": 1.0029, - "step": 460 - }, - { - "epoch": 0.03, - "learning_rate": 2.2117647058823533e-05, - "loss": 1.0042, - "step": 470 - }, - { - "epoch": 0.03, - "learning_rate": 2.258823529411765e-05, - "loss": 1.0121, - "step": 480 - }, - { - "epoch": 0.03, - "learning_rate": 2.3058823529411763e-05, - "loss": 1.0019, - "step": 490 - }, - { - "epoch": 0.04, - "learning_rate": 2.3529411764705884e-05, - "loss": 1.0064, - "step": 500 - }, - { - "epoch": 0.04, - "learning_rate": 2.4e-05, - "loss": 1.0074, - "step": 510 - }, - { - "epoch": 0.04, - "learning_rate": 2.447058823529412e-05, - "loss": 1.0118, - "step": 520 - }, - { - "epoch": 0.04, - "learning_rate": 2.4941176470588238e-05, - "loss": 0.9997, - "step": 530 - }, - { - "epoch": 0.04, - "learning_rate": 2.5411764705882355e-05, - "loss": 1.0053, - "step": 540 - }, - { - "epoch": 0.04, - "learning_rate": 2.5882352941176475e-05, - "loss": 0.9937, - "step": 550 - }, - { - "epoch": 0.04, - "learning_rate": 2.635294117647059e-05, - "loss": 1.0067, - "step": 560 - }, - { - "epoch": 0.04, - "learning_rate": 2.6823529411764706e-05, - "loss": 1.0023, - "step": 570 - }, - { - "epoch": 0.04, - "learning_rate": 2.7294117647058826e-05, - "loss": 0.9994, - "step": 580 - }, - { - "epoch": 0.04, - "learning_rate": 2.7764705882352943e-05, - "loss": 0.9861, - "step": 590 - }, - { - "epoch": 0.04, - "learning_rate": 2.8235294117647063e-05, - "loss": 0.999, - "step": 600 - }, - { - "epoch": 0.04, - "learning_rate": 2.870588235294118e-05, - "loss": 0.9987, - "step": 610 - }, - { - "epoch": 0.04, - "learning_rate": 2.9176470588235294e-05, - "loss": 0.9934, - "step": 620 - }, - { - "epoch": 0.04, - "learning_rate": 2.9647058823529414e-05, - "loss": 0.9884, - "step": 630 - }, - { - "epoch": 0.05, - "learning_rate": 3.011764705882353e-05, - "loss": 0.9996, - "step": 640 - }, - { - "epoch": 0.05, - "learning_rate": 3.0588235294117644e-05, - "loss": 0.9902, - "step": 650 - }, - { - "epoch": 0.05, - "learning_rate": 3.1058823529411765e-05, - "loss": 0.9965, - "step": 660 - }, - { - "epoch": 0.05, - "learning_rate": 3.1529411764705885e-05, - "loss": 0.9936, - "step": 670 - }, - { - "epoch": 0.05, - "learning_rate": 3.2000000000000005e-05, - "loss": 0.9821, - "step": 680 - }, - { - "epoch": 0.05, - "learning_rate": 3.247058823529412e-05, - "loss": 0.9889, - "step": 690 - }, - { - "epoch": 0.05, - "learning_rate": 3.294117647058824e-05, - "loss": 0.9944, - "step": 700 - }, - { - "epoch": 0.05, - "learning_rate": 3.341176470588236e-05, - "loss": 0.9821, - "step": 710 - }, - { - "epoch": 0.05, - "learning_rate": 3.388235294117647e-05, - "loss": 0.9928, - "step": 720 - }, - { - "epoch": 0.05, - "learning_rate": 3.4352941176470587e-05, - "loss": 0.991, - "step": 730 - }, - { - "epoch": 0.05, - "learning_rate": 3.482352941176471e-05, - "loss": 0.9895, - "step": 740 - }, - { - "epoch": 0.05, - "learning_rate": 3.529411764705883e-05, - "loss": 0.9899, - "step": 750 - }, - { - "epoch": 0.05, - "learning_rate": 3.576470588235295e-05, - "loss": 0.99, - "step": 760 - }, - { - "epoch": 0.05, - "learning_rate": 3.623529411764706e-05, - "loss": 0.9951, - "step": 770 - }, - { - "epoch": 0.06, - "learning_rate": 3.6705882352941175e-05, - "loss": 0.9871, - "step": 780 - }, - { - "epoch": 0.06, - "learning_rate": 3.7176470588235295e-05, - "loss": 0.9781, - "step": 790 - }, - { - "epoch": 0.06, - "learning_rate": 3.7647058823529415e-05, - "loss": 0.9838, - "step": 800 - }, - { - "epoch": 0.06, - "learning_rate": 3.811764705882353e-05, - "loss": 0.9724, - "step": 810 - }, - { - "epoch": 0.06, - "learning_rate": 3.858823529411765e-05, - "loss": 0.9764, - "step": 820 - }, - { - "epoch": 0.06, - "learning_rate": 3.905882352941177e-05, - "loss": 0.981, - "step": 830 - }, - { - "epoch": 0.06, - "learning_rate": 3.952941176470589e-05, - "loss": 0.9841, - "step": 840 - }, - { - "epoch": 0.06, - "learning_rate": 4e-05, - "loss": 0.9871, - "step": 850 - }, - { - "epoch": 0.06, - "learning_rate": 3.999998691314254e-05, - "loss": 0.9905, - "step": 860 - }, - { - "epoch": 0.06, - "learning_rate": 3.999994765258726e-05, - "loss": 0.9767, - "step": 870 - }, - { - "epoch": 0.06, - "learning_rate": 3.999988221838556e-05, - "loss": 0.9712, - "step": 880 - }, - { - "epoch": 0.06, - "learning_rate": 3.9999790610623065e-05, - "loss": 0.9699, - "step": 890 - }, - { - "epoch": 0.06, - "learning_rate": 3.9999672829419655e-05, - "loss": 0.9781, - "step": 900 - }, - { - "epoch": 0.06, - "learning_rate": 3.999952887492948e-05, - "loss": 0.9844, - "step": 910 - }, - { - "epoch": 0.06, - "learning_rate": 3.9999358747340924e-05, - "loss": 0.9808, - "step": 920 - }, - { - "epoch": 0.07, - "learning_rate": 3.999916244687663e-05, - "loss": 0.979, - "step": 930 - }, - { - "epoch": 0.07, - "learning_rate": 3.99989399737935e-05, - "loss": 0.9738, - "step": 940 - }, - { - "epoch": 0.07, - "learning_rate": 3.9998691328382675e-05, - "loss": 0.9724, - "step": 950 - }, - { - "epoch": 0.07, - "learning_rate": 3.999841651096955e-05, - "loss": 0.9729, - "step": 960 - }, - { - "epoch": 0.07, - "learning_rate": 3.999811552191379e-05, - "loss": 0.9831, - "step": 970 - }, - { - "epoch": 0.07, - "learning_rate": 3.999778836160929e-05, - "loss": 0.9809, - "step": 980 - }, - { - "epoch": 0.07, - "learning_rate": 3.999743503048419e-05, - "loss": 0.9688, - "step": 990 - }, - { - "epoch": 0.07, - "learning_rate": 3.999705552900088e-05, - "loss": 0.9832, - "step": 1000 - }, - { - "epoch": 0.07, - "learning_rate": 3.9996649857656045e-05, - "loss": 0.97, - "step": 1010 - }, - { - "epoch": 0.07, - "learning_rate": 3.999621801698055e-05, - "loss": 0.9693, - "step": 1020 - }, - { - "epoch": 0.07, - "learning_rate": 3.999576000753955e-05, - "loss": 0.9661, - "step": 1030 - }, - { - "epoch": 0.07, - "learning_rate": 3.999527582993243e-05, - "loss": 0.9646, - "step": 1040 - }, - { - "epoch": 0.07, - "learning_rate": 3.999476548479283e-05, - "loss": 0.9751, - "step": 1050 - }, - { - "epoch": 0.07, - "learning_rate": 3.9994228972788636e-05, - "loss": 0.9712, - "step": 1060 - }, - { - "epoch": 0.08, - "learning_rate": 3.999366629462197e-05, - "loss": 0.9702, - "step": 1070 - }, - { - "epoch": 0.08, - "learning_rate": 3.9993077451029193e-05, - "loss": 0.9658, - "step": 1080 - }, - { - "epoch": 0.08, - "learning_rate": 3.9992462442780927e-05, - "loss": 0.9715, - "step": 1090 - }, - { - "epoch": 0.08, - "learning_rate": 3.999182127068202e-05, - "loss": 0.9743, - "step": 1100 - }, - { - "epoch": 0.08, - "learning_rate": 3.999115393557157e-05, - "loss": 0.9724, - "step": 1110 - }, - { - "epoch": 0.08, - "learning_rate": 3.99904604383229e-05, - "loss": 0.9667, - "step": 1120 - }, - { - "epoch": 0.08, - "learning_rate": 3.998974077984359e-05, - "loss": 0.9678, - "step": 1130 - }, - { - "epoch": 0.08, - "learning_rate": 3.9988994961075434e-05, - "loss": 0.967, - "step": 1140 - }, - { - "epoch": 0.08, - "learning_rate": 3.998822298299448e-05, - "loss": 0.9712, - "step": 1150 - }, - { - "epoch": 0.08, - "learning_rate": 3.998742484661101e-05, - "loss": 0.9719, - "step": 1160 - }, - { - "epoch": 0.08, - "learning_rate": 3.998660055296953e-05, - "loss": 0.96, - "step": 1170 - }, - { - "epoch": 0.08, - "learning_rate": 3.998575010314878e-05, - "loss": 0.9663, - "step": 1180 - }, - { - "epoch": 0.08, - "learning_rate": 3.9984873498261734e-05, - "loss": 0.9636, - "step": 1190 - }, - { - "epoch": 0.08, - "learning_rate": 3.998397073945559e-05, - "loss": 0.963, - "step": 1200 - }, - { - "epoch": 0.09, - "learning_rate": 3.998304182791178e-05, - "loss": 0.9634, - "step": 1210 - }, - { - "epoch": 0.09, - "learning_rate": 3.998208676484596e-05, - "loss": 0.9656, - "step": 1220 - }, - { - "epoch": 0.09, - "learning_rate": 3.998110555150799e-05, - "loss": 0.961, - "step": 1230 - }, - { - "epoch": 0.09, - "learning_rate": 3.9980098189181985e-05, - "loss": 0.9524, - "step": 1240 - }, - { - "epoch": 0.09, - "learning_rate": 3.997906467918627e-05, - "loss": 0.9728, - "step": 1250 - }, - { - "epoch": 0.09, - "learning_rate": 3.997800502287336e-05, - "loss": 0.9667, - "step": 1260 - }, - { - "epoch": 0.09, - "learning_rate": 3.997691922163004e-05, - "loss": 0.9617, - "step": 1270 - }, - { - "epoch": 0.09, - "learning_rate": 3.997580727687727e-05, - "loss": 0.9552, - "step": 1280 - }, - { - "epoch": 0.09, - "learning_rate": 3.997466919007024e-05, - "loss": 0.9621, - "step": 1290 - }, - { - "epoch": 0.09, - "learning_rate": 3.997350496269834e-05, - "loss": 0.958, - "step": 1300 - }, - { - "epoch": 0.09, - "learning_rate": 3.997231459628519e-05, - "loss": 0.971, - "step": 1310 - }, - { - "epoch": 0.09, - "learning_rate": 3.9971098092388596e-05, - "loss": 0.9472, - "step": 1320 - }, - { - "epoch": 0.09, - "learning_rate": 3.9969855452600585e-05, - "loss": 0.964, - "step": 1330 - }, - { - "epoch": 0.09, - "learning_rate": 3.996858667854738e-05, - "loss": 0.9638, - "step": 1340 - }, - { - "epoch": 0.1, - "learning_rate": 3.99672917718894e-05, - "loss": 0.9605, - "step": 1350 - }, - { - "epoch": 0.1, - "learning_rate": 3.996597073432129e-05, - "loss": 0.9614, - "step": 1360 - }, - { - "epoch": 0.1, - "learning_rate": 3.996462356757185e-05, - "loss": 0.9517, - "step": 1370 - }, - { - "epoch": 0.1, - "learning_rate": 3.996325027340411e-05, - "loss": 0.9506, - "step": 1380 - }, - { - "epoch": 0.1, - "learning_rate": 3.9961850853615285e-05, - "loss": 0.9477, - "step": 1390 - }, - { - "epoch": 0.1, - "learning_rate": 3.996042531003677e-05, - "loss": 0.9546, - "step": 1400 - }, - { - "epoch": 0.1, - "learning_rate": 3.995897364453415e-05, - "loss": 0.944, - "step": 1410 - }, - { - "epoch": 0.1, - "learning_rate": 3.9957495859007206e-05, - "loss": 0.9562, - "step": 1420 - }, - { - "epoch": 0.1, - "learning_rate": 3.9955991955389885e-05, - "loss": 0.955, - "step": 1430 - }, - { - "epoch": 0.1, - "learning_rate": 3.995446193565033e-05, - "loss": 0.9622, - "step": 1440 - }, - { - "epoch": 0.1, - "learning_rate": 3.9952905801790866e-05, - "loss": 0.9558, - "step": 1450 - }, - { - "epoch": 0.1, - "learning_rate": 3.995132355584797e-05, - "loss": 0.9489, - "step": 1460 - }, - { - "epoch": 0.1, - "learning_rate": 3.994971519989231e-05, - "loss": 0.9579, - "step": 1470 - }, - { - "epoch": 0.1, - "learning_rate": 3.9948080736028714e-05, - "loss": 0.9577, - "step": 1480 - }, - { - "epoch": 0.11, - "learning_rate": 3.994642016639619e-05, - "loss": 0.9568, - "step": 1490 - }, - { - "epoch": 0.11, - "learning_rate": 3.9944733493167885e-05, - "loss": 0.9444, - "step": 1500 - }, - { - "epoch": 0.11, - "learning_rate": 3.994302071855114e-05, - "loss": 0.9514, - "step": 1510 - }, - { - "epoch": 0.11, - "learning_rate": 3.994128184478745e-05, - "loss": 0.9394, - "step": 1520 - }, - { - "epoch": 0.11, - "learning_rate": 3.993951687415242e-05, - "loss": 0.9464, - "step": 1530 - }, - { - "epoch": 0.11, - "learning_rate": 3.993772580895587e-05, - "loss": 0.9436, - "step": 1540 - }, - { - "epoch": 0.11, - "learning_rate": 3.993590865154173e-05, - "loss": 0.9454, - "step": 1550 - }, - { - "epoch": 0.11, - "learning_rate": 3.9934065404288086e-05, - "loss": 0.95, - "step": 1560 - }, - { - "epoch": 0.11, - "learning_rate": 3.9932196069607175e-05, - "loss": 0.9569, - "step": 1570 - }, - { - "epoch": 0.11, - "learning_rate": 3.9930300649945375e-05, - "loss": 0.9461, - "step": 1580 - }, - { - "epoch": 0.11, - "learning_rate": 3.992837914778318e-05, - "loss": 0.9453, - "step": 1590 - }, - { - "epoch": 0.11, - "learning_rate": 3.992643156563524e-05, - "loss": 0.9464, - "step": 1600 - }, - { - "epoch": 0.11, - "learning_rate": 3.992445790605032e-05, - "loss": 0.942, - "step": 1610 - }, - { - "epoch": 0.11, - "learning_rate": 3.992245817161134e-05, - "loss": 0.9395, - "step": 1620 - }, - { - "epoch": 0.12, - "learning_rate": 3.992043236493531e-05, - "loss": 0.9497, - "step": 1630 - }, - { - "epoch": 0.12, - "learning_rate": 3.991838048867338e-05, - "loss": 0.9503, - "step": 1640 - }, - { - "epoch": 0.12, - "learning_rate": 3.9916302545510795e-05, - "loss": 0.9431, - "step": 1650 - }, - { - "epoch": 0.12, - "learning_rate": 3.991419853816694e-05, - "loss": 0.9549, - "step": 1660 - }, - { - "epoch": 0.12, - "learning_rate": 3.9912068469395315e-05, - "loss": 0.9464, - "step": 1670 - }, - { - "epoch": 0.12, - "learning_rate": 3.990991234198349e-05, - "loss": 0.94, - "step": 1680 - }, - { - "epoch": 0.12, - "learning_rate": 3.9907730158753164e-05, - "loss": 0.9347, - "step": 1690 - }, - { - "epoch": 0.12, - "learning_rate": 3.9905521922560134e-05, - "loss": 0.9414, - "step": 1700 - }, - { - "epoch": 0.12, - "learning_rate": 3.9903287636294286e-05, - "loss": 0.9365, - "step": 1710 - }, - { - "epoch": 0.12, - "learning_rate": 3.9901027302879585e-05, - "loss": 0.9406, - "step": 1720 - }, - { - "epoch": 0.12, - "learning_rate": 3.989874092527411e-05, - "loss": 0.9446, - "step": 1730 - }, - { - "epoch": 0.12, - "learning_rate": 3.989642850647002e-05, - "loss": 0.9456, - "step": 1740 - }, - { - "epoch": 0.12, - "learning_rate": 3.989409004949352e-05, - "loss": 0.9483, - "step": 1750 - }, - { - "epoch": 0.12, - "learning_rate": 3.989172555740494e-05, - "loss": 0.9379, - "step": 1760 - }, - { - "epoch": 0.13, - "learning_rate": 3.988933503329864e-05, - "loss": 0.943, - "step": 1770 - }, - { - "epoch": 0.13, - "learning_rate": 3.988691848030307e-05, - "loss": 0.938, - "step": 1780 - }, - { - "epoch": 0.13, - "learning_rate": 3.9884475901580736e-05, - "loss": 0.9429, - "step": 1790 - }, - { - "epoch": 0.13, - "learning_rate": 3.988200730032821e-05, - "loss": 0.9352, - "step": 1800 - }, - { - "epoch": 0.13, - "learning_rate": 3.987951267977612e-05, - "loss": 0.9414, - "step": 1810 - }, - { - "epoch": 0.13, - "learning_rate": 3.9876992043189135e-05, - "loss": 0.9367, - "step": 1820 - }, - { - "epoch": 0.13, - "learning_rate": 3.987444539386597e-05, - "loss": 0.9427, - "step": 1830 - }, - { - "epoch": 0.13, - "learning_rate": 3.98718727351394e-05, - "loss": 0.9416, - "step": 1840 - }, - { - "epoch": 0.13, - "learning_rate": 3.986927407037622e-05, - "loss": 0.9362, - "step": 1850 - }, - { - "epoch": 0.13, - "learning_rate": 3.986664940297726e-05, - "loss": 0.9452, - "step": 1860 - }, - { - "epoch": 0.13, - "learning_rate": 3.98639987363774e-05, - "loss": 0.9435, - "step": 1870 - }, - { - "epoch": 0.13, - "learning_rate": 3.986132207404551e-05, - "loss": 0.9354, - "step": 1880 - }, - { - "epoch": 0.13, - "learning_rate": 3.9858619419484526e-05, - "loss": 0.9401, - "step": 1890 - }, - { - "epoch": 0.13, - "learning_rate": 3.985589077623135e-05, - "loss": 0.9341, - "step": 1900 - }, - { - "epoch": 0.13, - "learning_rate": 3.9853136147856924e-05, - "loss": 0.9342, - "step": 1910 - }, - { - "epoch": 0.14, - "learning_rate": 3.98503555379662e-05, - "loss": 0.9357, - "step": 1920 - }, - { - "epoch": 0.14, - "learning_rate": 3.984754895019812e-05, - "loss": 0.9393, - "step": 1930 - }, - { - "epoch": 0.14, - "learning_rate": 3.984471638822562e-05, - "loss": 0.9401, - "step": 1940 - }, - { - "epoch": 0.14, - "learning_rate": 3.984185785575562e-05, - "loss": 0.9344, - "step": 1950 - }, - { - "epoch": 0.14, - "learning_rate": 3.983897335652907e-05, - "loss": 0.9319, - "step": 1960 - }, - { - "epoch": 0.14, - "learning_rate": 3.9836062894320864e-05, - "loss": 0.9449, - "step": 1970 - }, - { - "epoch": 0.14, - "learning_rate": 3.983312647293987e-05, - "loss": 0.9364, - "step": 1980 - }, - { - "epoch": 0.14, - "learning_rate": 3.983016409622895e-05, - "loss": 0.9347, - "step": 1990 - }, - { - "epoch": 0.14, - "learning_rate": 3.982717576806492e-05, - "loss": 0.9319, - "step": 2000 - }, - { - "epoch": 0.14, - "learning_rate": 3.9824161492358565e-05, - "loss": 0.9361, - "step": 2010 - }, - { - "epoch": 0.14, - "learning_rate": 3.982112127305463e-05, - "loss": 0.9315, - "step": 2020 - }, - { - "epoch": 0.14, - "learning_rate": 3.9818055114131795e-05, - "loss": 0.9288, - "step": 2030 - }, - { - "epoch": 0.14, - "learning_rate": 3.981496301960271e-05, - "loss": 0.9358, - "step": 2040 - }, - { - "epoch": 0.14, - "learning_rate": 3.981184499351395e-05, - "loss": 0.9358, - "step": 2050 - }, - { - "epoch": 0.15, - "learning_rate": 3.9808701039946026e-05, - "loss": 0.9335, - "step": 2060 - }, - { - "epoch": 0.15, - "learning_rate": 3.98055311630134e-05, - "loss": 0.941, - "step": 2070 - }, - { - "epoch": 0.15, - "learning_rate": 3.980233536686442e-05, - "loss": 0.9366, - "step": 2080 - }, - { - "epoch": 0.15, - "learning_rate": 3.9799113655681407e-05, - "loss": 0.9355, - "step": 2090 - }, - { - "epoch": 0.15, - "learning_rate": 3.979586603368055e-05, - "loss": 0.9345, - "step": 2100 - }, - { - "epoch": 0.15, - "learning_rate": 3.979259250511197e-05, - "loss": 0.9362, - "step": 2110 - }, - { - "epoch": 0.15, - "learning_rate": 3.978929307425969e-05, - "loss": 0.9266, - "step": 2120 - }, - { - "epoch": 0.15, - "learning_rate": 3.978596774544163e-05, - "loss": 0.9343, - "step": 2130 - }, - { - "epoch": 0.15, - "learning_rate": 3.978261652300959e-05, - "loss": 0.9275, - "step": 2140 - }, - { - "epoch": 0.15, - "learning_rate": 3.977923941134928e-05, - "loss": 0.9312, - "step": 2150 - }, - { - "epoch": 0.15, - "learning_rate": 3.9775836414880266e-05, - "loss": 0.9326, - "step": 2160 - }, - { - "epoch": 0.15, - "learning_rate": 3.9772407538056014e-05, - "loss": 0.9425, - "step": 2170 - }, - { - "epoch": 0.15, - "learning_rate": 3.976895278536383e-05, - "loss": 0.935, - "step": 2180 - }, - { - "epoch": 0.15, - "learning_rate": 3.976547216132492e-05, - "loss": 0.9308, - "step": 2190 - }, - { - "epoch": 0.16, - "learning_rate": 3.976196567049431e-05, - "loss": 0.9319, - "step": 2200 - }, - { - "epoch": 0.16, - "learning_rate": 3.97584333174609e-05, - "loss": 0.933, - "step": 2210 - }, - { - "epoch": 0.16, - "learning_rate": 3.975487510684743e-05, - "loss": 0.9285, - "step": 2220 - }, - { - "epoch": 0.16, - "learning_rate": 3.975129104331048e-05, - "loss": 0.9304, - "step": 2230 - }, - { - "epoch": 0.16, - "learning_rate": 3.974768113154046e-05, - "loss": 0.9349, - "step": 2240 - }, - { - "epoch": 0.16, - "learning_rate": 3.974404537626162e-05, - "loss": 0.9303, - "step": 2250 - }, - { - "epoch": 0.16, - "learning_rate": 3.9740383782232e-05, - "loss": 0.9273, - "step": 2260 - }, - { - "epoch": 0.16, - "learning_rate": 3.97366963542435e-05, - "loss": 0.9319, - "step": 2270 - }, - { - "epoch": 0.16, - "learning_rate": 3.9732983097121795e-05, - "loss": 0.9261, - "step": 2280 - }, - { - "epoch": 0.16, - "learning_rate": 3.972924401572637e-05, - "loss": 0.9354, - "step": 2290 - }, - { - "epoch": 0.16, - "learning_rate": 3.972547911495051e-05, - "loss": 0.9292, - "step": 2300 - }, - { - "epoch": 0.16, - "learning_rate": 3.972168839972128e-05, - "loss": 0.9302, - "step": 2310 - }, - { - "epoch": 0.16, - "learning_rate": 3.9717871874999545e-05, - "loss": 0.9232, - "step": 2320 - }, - { - "epoch": 0.16, - "learning_rate": 3.971402954577993e-05, - "loss": 0.9251, - "step": 2330 - }, - { - "epoch": 0.17, - "learning_rate": 3.971016141709083e-05, - "loss": 0.9447, - "step": 2340 - }, - { - "epoch": 0.17, - "learning_rate": 3.970626749399443e-05, - "loss": 0.9549, - "step": 2350 - }, - { - "epoch": 0.17, - "learning_rate": 3.9702347781586645e-05, - "loss": 0.9613, - "step": 2360 - }, - { - "epoch": 0.17, - "learning_rate": 3.9698402284997135e-05, - "loss": 1.0125, - "step": 2370 - }, - { - "epoch": 0.17, - "learning_rate": 3.9694431009389315e-05, - "loss": 0.9856, - "step": 2380 - }, - { - "epoch": 0.17, - "learning_rate": 3.969043395996035e-05, - "loss": 0.9733, - "step": 2390 - }, - { - "epoch": 0.17, - "learning_rate": 3.968641114194111e-05, - "loss": 0.9635, - "step": 2400 - }, - { - "epoch": 0.17, - "learning_rate": 3.9682362560596214e-05, - "loss": 0.9609, - "step": 2410 - }, - { - "epoch": 0.17, - "learning_rate": 3.967828822122397e-05, - "loss": 0.9423, - "step": 2420 - }, - { - "epoch": 0.17, - "learning_rate": 3.967418812915641e-05, - "loss": 0.9416, - "step": 2430 - }, - { - "epoch": 0.17, - "learning_rate": 3.967006228975927e-05, - "loss": 0.9448, - "step": 2440 - }, - { - "epoch": 0.17, - "learning_rate": 3.966591070843197e-05, - "loss": 0.942, - "step": 2450 - }, - { - "epoch": 0.17, - "learning_rate": 3.9661733390607635e-05, - "loss": 0.9356, - "step": 2460 - }, - { - "epoch": 0.17, - "learning_rate": 3.965753034175305e-05, - "loss": 0.9379, - "step": 2470 - }, - { - "epoch": 0.18, - "learning_rate": 3.96533015673687e-05, - "loss": 0.9402, - "step": 2480 - }, - { - "epoch": 0.18, - "learning_rate": 3.96490470729887e-05, - "loss": 0.937, - "step": 2490 - }, - { - "epoch": 0.18, - "learning_rate": 3.964476686418087e-05, - "loss": 0.9311, - "step": 2500 - }, - { - "epoch": 0.18, - "learning_rate": 3.9640460946546645e-05, - "loss": 0.9384, - "step": 2510 - }, - { - "epoch": 0.18, - "learning_rate": 3.9636129325721123e-05, - "loss": 0.9325, - "step": 2520 - }, - { - "epoch": 0.18, - "learning_rate": 3.963177200737303e-05, - "loss": 0.9311, - "step": 2530 - }, - { - "epoch": 0.18, - "learning_rate": 3.9627388997204726e-05, - "loss": 0.9279, - "step": 2540 - }, - { - "epoch": 0.18, - "learning_rate": 3.9622980300952204e-05, - "loss": 0.9336, - "step": 2550 - }, - { - "epoch": 0.18, - "learning_rate": 3.961854592438504e-05, - "loss": 0.9325, - "step": 2560 - }, - { - "epoch": 0.18, - "learning_rate": 3.961408587330647e-05, - "loss": 0.9294, - "step": 2570 - }, - { - "epoch": 0.18, - "learning_rate": 3.960960015355327e-05, - "loss": 0.9353, - "step": 2580 - }, - { - "epoch": 0.18, - "learning_rate": 3.960508877099586e-05, - "loss": 0.9248, - "step": 2590 - }, - { - "epoch": 0.18, - "learning_rate": 3.960055173153821e-05, - "loss": 0.9253, - "step": 2600 - }, - { - "epoch": 0.18, - "learning_rate": 3.959598904111788e-05, - "loss": 0.9241, - "step": 2610 - }, - { - "epoch": 0.19, - "learning_rate": 3.9591400705706e-05, - "loss": 0.9208, - "step": 2620 - }, - { - "epoch": 0.19, - "learning_rate": 3.9586786731307254e-05, - "loss": 0.9267, - "step": 2630 - }, - { - "epoch": 0.19, - "learning_rate": 3.9582147123959894e-05, - "loss": 0.9344, - "step": 2640 - }, - { - "epoch": 0.19, - "learning_rate": 3.9577481889735707e-05, - "loss": 0.9254, - "step": 2650 - }, - { - "epoch": 0.19, - "learning_rate": 3.957279103474001e-05, - "loss": 0.929, - "step": 2660 - }, - { - "epoch": 0.19, - "learning_rate": 3.9568074565111665e-05, - "loss": 0.9263, - "step": 2670 - }, - { - "epoch": 0.19, - "learning_rate": 3.956333248702304e-05, - "loss": 0.9292, - "step": 2680 - }, - { - "epoch": 0.19, - "learning_rate": 3.955856480668004e-05, - "loss": 0.9239, - "step": 2690 - }, - { - "epoch": 0.19, - "learning_rate": 3.9553771530322044e-05, - "loss": 0.93, - "step": 2700 - }, - { - "epoch": 0.19, - "learning_rate": 3.954895266422195e-05, - "loss": 0.9225, - "step": 2710 - }, - { - "epoch": 0.19, - "learning_rate": 3.9544108214686154e-05, - "loss": 0.9265, - "step": 2720 - }, - { - "epoch": 0.19, - "learning_rate": 3.9539238188054494e-05, - "loss": 0.9242, - "step": 2730 - }, - { - "epoch": 0.19, - "learning_rate": 3.953434259070032e-05, - "loss": 0.9193, - "step": 2740 - }, - { - "epoch": 0.19, - "learning_rate": 3.9529421429030426e-05, - "loss": 0.9261, - "step": 2750 - }, - { - "epoch": 0.19, - "learning_rate": 3.952447470948506e-05, - "loss": 0.925, - "step": 2760 - }, - { - "epoch": 0.2, - "learning_rate": 3.9519502438537944e-05, - "loss": 0.9155, - "step": 2770 - }, - { - "epoch": 0.2, - "learning_rate": 3.95145046226962e-05, - "loss": 0.9255, - "step": 2780 - }, - { - "epoch": 0.2, - "learning_rate": 3.95094812685004e-05, - "loss": 0.9201, - "step": 2790 - }, - { - "epoch": 0.2, - "learning_rate": 3.950443238252453e-05, - "loss": 0.9211, - "step": 2800 - }, - { - "epoch": 0.2, - "learning_rate": 3.9499357971376014e-05, - "loss": 0.9248, - "step": 2810 - }, - { - "epoch": 0.2, - "learning_rate": 3.949425804169565e-05, - "loss": 0.9207, - "step": 2820 - }, - { - "epoch": 0.2, - "learning_rate": 3.948913260015764e-05, - "loss": 0.9201, - "step": 2830 - }, - { - "epoch": 0.2, - "learning_rate": 3.9483981653469586e-05, - "loss": 0.9185, - "step": 2840 - }, - { - "epoch": 0.2, - "learning_rate": 3.947880520837245e-05, - "loss": 0.9272, - "step": 2850 - }, - { - "epoch": 0.2, - "learning_rate": 3.9473603271640575e-05, - "loss": 0.9164, - "step": 2860 - }, - { - "epoch": 0.2, - "learning_rate": 3.9468375850081664e-05, - "loss": 0.9204, - "step": 2870 - }, - { - "epoch": 0.2, - "learning_rate": 3.946312295053676e-05, - "loss": 0.9218, - "step": 2880 - }, - { - "epoch": 0.2, - "learning_rate": 3.9457844579880274e-05, - "loss": 0.9187, - "step": 2890 - }, - { - "epoch": 0.2, - "learning_rate": 3.9452540745019916e-05, - "loss": 0.9192, - "step": 2900 - }, - { - "epoch": 0.21, - "learning_rate": 3.9447211452896754e-05, - "loss": 0.9207, - "step": 2910 - }, - { - "epoch": 0.21, - "learning_rate": 3.944185671048514e-05, - "loss": 0.913, - "step": 2920 - }, - { - "epoch": 0.21, - "learning_rate": 3.943647652479277e-05, - "loss": 0.9088, - "step": 2930 - }, - { - "epoch": 0.21, - "learning_rate": 3.9431070902860605e-05, - "loss": 0.908, - "step": 2940 - }, - { - "epoch": 0.21, - "learning_rate": 3.94256398517629e-05, - "loss": 0.9183, - "step": 2950 - }, - { - "epoch": 0.21, - "learning_rate": 3.942018337860721e-05, - "loss": 0.9119, - "step": 2960 - }, - { - "epoch": 0.21, - "learning_rate": 3.941470149053433e-05, - "loss": 0.9215, - "step": 2970 - }, - { - "epoch": 0.21, - "learning_rate": 3.9409194194718325e-05, - "loss": 0.9189, - "step": 2980 - }, - { - "epoch": 0.21, - "learning_rate": 3.9403661498366536e-05, - "loss": 0.9221, - "step": 2990 - }, - { - "epoch": 0.21, - "learning_rate": 3.93981034087195e-05, - "loss": 0.9125, - "step": 3000 - }, - { - "epoch": 0.21, - "learning_rate": 3.9392519933051025e-05, - "loss": 0.9196, - "step": 3010 - }, - { - "epoch": 0.21, - "learning_rate": 3.938691107866812e-05, - "loss": 0.9212, - "step": 3020 - }, - { - "epoch": 0.21, - "learning_rate": 3.938127685291101e-05, - "loss": 0.918, - "step": 3030 - }, - { - "epoch": 0.21, - "learning_rate": 3.9375617263153136e-05, - "loss": 0.9136, - "step": 3040 - }, - { - "epoch": 0.22, - "learning_rate": 3.9369932316801115e-05, - "loss": 0.9161, - "step": 3050 - }, - { - "epoch": 0.22, - "learning_rate": 3.936422202129475e-05, - "loss": 0.9088, - "step": 3060 - }, - { - "epoch": 0.22, - "learning_rate": 3.935848638410704e-05, - "loss": 0.9151, - "step": 3070 - }, - { - "epoch": 0.22, - "learning_rate": 3.9352725412744114e-05, - "loss": 0.9188, - "step": 3080 - }, - { - "epoch": 0.22, - "learning_rate": 3.9346939114745284e-05, - "loss": 0.9169, - "step": 3090 - }, - { - "epoch": 0.22, - "learning_rate": 3.934112749768299e-05, - "loss": 0.9156, - "step": 3100 - }, - { - "epoch": 0.22, - "learning_rate": 3.9335290569162815e-05, - "loss": 0.9141, - "step": 3110 - }, - { - "epoch": 0.22, - "learning_rate": 3.9329428336823464e-05, - "loss": 0.9148, - "step": 3120 - }, - { - "epoch": 0.22, - "learning_rate": 3.9323540808336755e-05, - "loss": 0.9114, - "step": 3130 - }, - { - "epoch": 0.22, - "learning_rate": 3.931762799140762e-05, - "loss": 0.9081, - "step": 3140 - }, - { - "epoch": 0.22, - "learning_rate": 3.931168989377407e-05, - "loss": 0.9148, - "step": 3150 - }, - { - "epoch": 0.22, - "learning_rate": 3.930572652320721e-05, - "loss": 0.9127, - "step": 3160 - }, - { - "epoch": 0.22, - "learning_rate": 3.929973788751122e-05, - "loss": 0.9137, - "step": 3170 - }, - { - "epoch": 0.22, - "learning_rate": 3.929372399452334e-05, - "loss": 0.921, - "step": 3180 - }, - { - "epoch": 0.23, - "learning_rate": 3.928768485211387e-05, - "loss": 0.907, - "step": 3190 - }, - { - "epoch": 0.23, - "learning_rate": 3.928162046818615e-05, - "loss": 0.9095, - "step": 3200 - }, - { - "epoch": 0.23, - "learning_rate": 3.927553085067654e-05, - "loss": 0.9105, - "step": 3210 - }, - { - "epoch": 0.23, - "learning_rate": 3.926941600755445e-05, - "loss": 0.9149, - "step": 3220 - }, - { - "epoch": 0.23, - "learning_rate": 3.926327594682229e-05, - "loss": 0.913, - "step": 3230 - }, - { - "epoch": 0.23, - "learning_rate": 3.925711067651546e-05, - "loss": 0.9057, - "step": 3240 - }, - { - "epoch": 0.23, - "learning_rate": 3.9250920204702366e-05, - "loss": 0.905, - "step": 3250 - }, - { - "epoch": 0.23, - "learning_rate": 3.924470453948439e-05, - "loss": 0.9102, - "step": 3260 - }, - { - "epoch": 0.23, - "learning_rate": 3.923846368899588e-05, - "loss": 0.9165, - "step": 3270 - }, - { - "epoch": 0.23, - "learning_rate": 3.923219766140415e-05, - "loss": 0.9106, - "step": 3280 - }, - { - "epoch": 0.23, - "learning_rate": 3.922590646490946e-05, - "loss": 0.9157, - "step": 3290 - }, - { - "epoch": 0.23, - "learning_rate": 3.9219590107745016e-05, - "loss": 0.9071, - "step": 3300 - }, - { - "epoch": 0.23, - "learning_rate": 3.921324859817695e-05, - "loss": 0.9113, - "step": 3310 - }, - { - "epoch": 0.23, - "learning_rate": 3.920688194450429e-05, - "loss": 0.91, - "step": 3320 - }, - { - "epoch": 0.24, - "learning_rate": 3.920049015505898e-05, - "loss": 0.9212, - "step": 3330 - }, - { - "epoch": 0.24, - "learning_rate": 3.919407323820588e-05, - "loss": 0.9073, - "step": 3340 - }, - { - "epoch": 0.24, - "learning_rate": 3.918763120234272e-05, - "loss": 0.9135, - "step": 3350 - }, - { - "epoch": 0.24, - "learning_rate": 3.918116405590009e-05, - "loss": 0.9148, - "step": 3360 - }, - { - "epoch": 0.24, - "learning_rate": 3.9174671807341454e-05, - "loss": 0.9134, - "step": 3370 - }, - { - "epoch": 0.24, - "learning_rate": 3.916815446516312e-05, - "loss": 0.9051, - "step": 3380 - }, - { - "epoch": 0.24, - "learning_rate": 3.9161612037894256e-05, - "loss": 0.9033, - "step": 3390 - }, - { - "epoch": 0.24, - "learning_rate": 3.915504453409683e-05, - "loss": 0.9111, - "step": 3400 - }, - { - "epoch": 0.24, - "learning_rate": 3.914845196236564e-05, - "loss": 0.9113, - "step": 3410 - }, - { - "epoch": 0.24, - "learning_rate": 3.91418343313283e-05, - "loss": 0.9065, - "step": 3420 - }, - { - "epoch": 0.24, - "learning_rate": 3.913519164964521e-05, - "loss": 0.9063, - "step": 3430 - }, - { - "epoch": 0.24, - "learning_rate": 3.912852392600954e-05, - "loss": 0.9047, - "step": 3440 - }, - { - "epoch": 0.24, - "learning_rate": 3.912183116914725e-05, - "loss": 0.9116, - "step": 3450 - }, - { - "epoch": 0.24, - "learning_rate": 3.911511338781707e-05, - "loss": 0.9057, - "step": 3460 - }, - { - "epoch": 0.25, - "learning_rate": 3.9108370590810445e-05, - "loss": 0.9045, - "step": 3470 - }, - { - "epoch": 0.25, - "learning_rate": 3.910160278695158e-05, - "loss": 0.9155, - "step": 3480 - }, - { - "epoch": 0.25, - "learning_rate": 3.909480998509742e-05, - "loss": 0.9151, - "step": 3490 - }, - { - "epoch": 0.25, - "learning_rate": 3.908799219413759e-05, - "loss": 0.9137, - "step": 3500 - }, - { - "epoch": 0.25, - "learning_rate": 3.9081149422994445e-05, - "loss": 0.907, - "step": 3510 - }, - { - "epoch": 0.25, - "learning_rate": 3.907428168062302e-05, - "loss": 0.9205, - "step": 3520 - }, - { - "epoch": 0.25, - "learning_rate": 3.906738897601103e-05, - "loss": 0.9105, - "step": 3530 - }, - { - "epoch": 0.25, - "learning_rate": 3.906047131817886e-05, - "loss": 0.9154, - "step": 3540 - }, - { - "epoch": 0.25, - "learning_rate": 3.905352871617955e-05, - "loss": 0.904, - "step": 3550 - }, - { - "epoch": 0.25, - "learning_rate": 3.904656117909878e-05, - "loss": 0.9005, - "step": 3560 - }, - { - "epoch": 0.25, - "learning_rate": 3.903956871605488e-05, - "loss": 0.9097, - "step": 3570 - }, - { - "epoch": 0.25, - "learning_rate": 3.903255133619877e-05, - "loss": 0.9048, - "step": 3580 - }, - { - "epoch": 0.25, - "learning_rate": 3.902550904871401e-05, - "loss": 0.9028, - "step": 3590 - }, - { - "epoch": 0.25, - "learning_rate": 3.9018441862816726e-05, - "loss": 0.9088, - "step": 3600 - }, - { - "epoch": 0.25, - "learning_rate": 3.901134978775566e-05, - "loss": 0.9128, - "step": 3610 - }, - { - "epoch": 0.26, - "learning_rate": 3.90042328328121e-05, - "loss": 0.94, - "step": 3620 - }, - { - "epoch": 0.26, - "learning_rate": 3.89970910072999e-05, - "loss": 0.9483, - "step": 3630 - }, - { - "epoch": 0.26, - "learning_rate": 3.898992432056547e-05, - "loss": 0.9345, - "step": 3640 - }, - { - "epoch": 0.26, - "learning_rate": 3.898273278198776e-05, - "loss": 0.9188, - "step": 3650 - }, - { - "epoch": 0.26, - "learning_rate": 3.897551640097822e-05, - "loss": 0.9143, - "step": 3660 - }, - { - "epoch": 0.26, - "learning_rate": 3.8968275186980825e-05, - "loss": 0.9203, - "step": 3670 - }, - { - "epoch": 0.26, - "learning_rate": 3.8961009149472055e-05, - "loss": 0.9199, - "step": 3680 - }, - { - "epoch": 0.26, - "learning_rate": 3.895371829796087e-05, - "loss": 0.9076, - "step": 3690 - }, - { - "epoch": 0.26, - "learning_rate": 3.89464026419887e-05, - "loss": 0.9149, - "step": 3700 - }, - { - "epoch": 0.26, - "learning_rate": 3.8939062191129445e-05, - "loss": 0.9114, - "step": 3710 - }, - { - "epoch": 0.26, - "learning_rate": 3.893169695498944e-05, - "loss": 0.902, - "step": 3720 - }, - { - "epoch": 0.26, - "learning_rate": 3.892430694320747e-05, - "loss": 0.9026, - "step": 3730 - }, - { - "epoch": 0.26, - "learning_rate": 3.891689216545474e-05, - "loss": 0.9055, - "step": 3740 - }, - { - "epoch": 0.26, - "learning_rate": 3.890945263143486e-05, - "loss": 0.905, - "step": 3750 - }, - { - "epoch": 0.27, - "learning_rate": 3.890198835088384e-05, - "loss": 0.9146, - "step": 3760 - }, - { - "epoch": 0.27, - "learning_rate": 3.889449933357009e-05, - "loss": 0.908, - "step": 3770 - }, - { - "epoch": 0.27, - "learning_rate": 3.888698558929436e-05, - "loss": 0.9011, - "step": 3780 - }, - { - "epoch": 0.27, - "learning_rate": 3.887944712788979e-05, - "loss": 0.9, - "step": 3790 - }, - { - "epoch": 0.27, - "learning_rate": 3.887188395922187e-05, - "loss": 0.9093, - "step": 3800 - }, - { - "epoch": 0.27, - "learning_rate": 3.886429609318839e-05, - "loss": 0.9, - "step": 3810 - }, - { - "epoch": 0.27, - "learning_rate": 3.88566835397195e-05, - "loss": 0.9048, - "step": 3820 - }, - { - "epoch": 0.27, - "learning_rate": 3.884904630877763e-05, - "loss": 0.906, - "step": 3830 - }, - { - "epoch": 0.27, - "learning_rate": 3.8841384410357505e-05, - "loss": 0.9115, - "step": 3840 - }, - { - "epoch": 0.27, - "learning_rate": 3.8833697854486165e-05, - "loss": 0.9083, - "step": 3850 - }, - { - "epoch": 0.27, - "learning_rate": 3.882598665122288e-05, - "loss": 0.9096, - "step": 3860 - }, - { - "epoch": 0.27, - "learning_rate": 3.8818250810659195e-05, - "loss": 0.9031, - "step": 3870 - }, - { - "epoch": 0.27, - "learning_rate": 3.881049034291891e-05, - "loss": 0.8998, - "step": 3880 - }, - { - "epoch": 0.27, - "learning_rate": 3.880270525815801e-05, - "loss": 0.8949, - "step": 3890 - }, - { - "epoch": 0.28, - "learning_rate": 3.879489556656474e-05, - "loss": 0.9016, - "step": 3900 - }, - { - "epoch": 0.28, - "learning_rate": 3.8787061278359536e-05, - "loss": 0.9036, - "step": 3910 - }, - { - "epoch": 0.28, - "learning_rate": 3.877920240379502e-05, - "loss": 0.8993, - "step": 3920 - }, - { - "epoch": 0.28, - "learning_rate": 3.877131895315597e-05, - "loss": 0.9081, - "step": 3930 - }, - { - "epoch": 0.28, - "learning_rate": 3.8763410936759374e-05, - "loss": 0.9063, - "step": 3940 - }, - { - "epoch": 0.28, - "learning_rate": 3.8755478364954315e-05, - "loss": 0.9037, - "step": 3950 - }, - { - "epoch": 0.28, - "learning_rate": 3.874752124812205e-05, - "loss": 0.8991, - "step": 3960 - }, - { - "epoch": 0.28, - "learning_rate": 3.8739539596675945e-05, - "loss": 0.907, - "step": 3970 - }, - { - "epoch": 0.28, - "learning_rate": 3.8731533421061464e-05, - "loss": 0.9055, - "step": 3980 - }, - { - "epoch": 0.28, - "learning_rate": 3.872350273175619e-05, - "loss": 0.8946, - "step": 3990 - }, - { - "epoch": 0.28, - "learning_rate": 3.8715447539269754e-05, - "loss": 0.8988, - "step": 4000 - }, - { - "epoch": 0.28, - "learning_rate": 3.870736785414389e-05, - "loss": 0.8999, - "step": 4010 - }, - { - "epoch": 0.28, - "learning_rate": 3.869926368695235e-05, - "loss": 0.9024, - "step": 4020 - }, - { - "epoch": 0.28, - "learning_rate": 3.869113504830095e-05, - "loss": 0.8982, - "step": 4030 - }, - { - "epoch": 0.29, - "learning_rate": 3.868298194882752e-05, - "loss": 0.8997, - "step": 4040 - }, - { - "epoch": 0.29, - "learning_rate": 3.867480439920191e-05, - "loss": 0.8995, - "step": 4050 - }, - { - "epoch": 0.29, - "learning_rate": 3.866660241012596e-05, - "loss": 0.901, - "step": 4060 - }, - { - "epoch": 0.29, - "learning_rate": 3.8658375992333495e-05, - "loss": 0.8933, - "step": 4070 - }, - { - "epoch": 0.29, - "learning_rate": 3.8650125156590316e-05, - "loss": 0.8971, - "step": 4080 - }, - { - "epoch": 0.29, - "learning_rate": 3.864184991369417e-05, - "loss": 0.9019, - "step": 4090 - }, - { - "epoch": 0.29, - "learning_rate": 3.8633550274474744e-05, - "loss": 0.8938, - "step": 4100 - }, - { - "epoch": 0.29, - "learning_rate": 3.862522624979367e-05, - "loss": 0.8998, - "step": 4110 - }, - { - "epoch": 0.29, - "learning_rate": 3.861687785054447e-05, - "loss": 0.9028, - "step": 4120 - }, - { - "epoch": 0.29, - "learning_rate": 3.8608505087652584e-05, - "loss": 0.8935, - "step": 4130 - }, - { - "epoch": 0.29, - "learning_rate": 3.860010797207532e-05, - "loss": 0.8973, - "step": 4140 - }, - { - "epoch": 0.29, - "learning_rate": 3.859168651480186e-05, - "loss": 0.8981, - "step": 4150 - }, - { - "epoch": 0.29, - "learning_rate": 3.858324072685326e-05, - "loss": 0.899, - "step": 4160 - }, - { - "epoch": 0.29, - "learning_rate": 3.857477061928239e-05, - "loss": 0.9, - "step": 4170 - }, - { - "epoch": 0.3, - "learning_rate": 3.856627620317396e-05, - "loss": 0.9012, - "step": 4180 - }, - { - "epoch": 0.3, - "learning_rate": 3.855775748964449e-05, - "loss": 0.8935, - "step": 4190 - }, - { - "epoch": 0.3, - "learning_rate": 3.8549214489842316e-05, - "loss": 0.8997, - "step": 4200 - }, - { - "epoch": 0.3, - "learning_rate": 3.8540647214947516e-05, - "loss": 0.9008, - "step": 4210 - }, - { - "epoch": 0.3, - "learning_rate": 3.853205567617197e-05, - "loss": 0.9024, - "step": 4220 - }, - { - "epoch": 0.3, - "learning_rate": 3.852343988475931e-05, - "loss": 0.9004, - "step": 4230 - }, - { - "epoch": 0.3, - "learning_rate": 3.851479985198489e-05, - "loss": 0.8942, - "step": 4240 - }, - { - "epoch": 0.3, - "learning_rate": 3.85061355891558e-05, - "loss": 0.8981, - "step": 4250 - }, - { - "epoch": 0.3, - "learning_rate": 3.849744710761084e-05, - "loss": 0.9069, - "step": 4260 - }, - { - "epoch": 0.3, - "learning_rate": 3.8488734418720494e-05, - "loss": 0.8969, - "step": 4270 - }, - { - "epoch": 0.3, - "learning_rate": 3.847999753388694e-05, - "loss": 0.8929, - "step": 4280 - }, - { - "epoch": 0.3, - "learning_rate": 3.847123646454402e-05, - "loss": 0.9022, - "step": 4290 - }, - { - "epoch": 0.3, - "learning_rate": 3.846245122215722e-05, - "loss": 0.8959, - "step": 4300 - }, - { - "epoch": 0.3, - "learning_rate": 3.845364181822365e-05, - "loss": 0.8916, - "step": 4310 - }, - { - "epoch": 0.31, - "learning_rate": 3.8444808264272054e-05, - "loss": 0.8907, - "step": 4320 - }, - { - "epoch": 0.31, - "learning_rate": 3.843595057186279e-05, - "loss": 0.8874, - "step": 4330 - }, - { - "epoch": 0.31, - "learning_rate": 3.842706875258777e-05, - "loss": 0.8962, - "step": 4340 - }, - { - "epoch": 0.31, - "learning_rate": 3.841816281807054e-05, - "loss": 0.8932, - "step": 4350 - }, - { - "epoch": 0.31, - "learning_rate": 3.8409232779966145e-05, - "loss": 0.8923, - "step": 4360 - }, - { - "epoch": 0.31, - "learning_rate": 3.8400278649961197e-05, - "loss": 0.8946, - "step": 4370 - }, - { - "epoch": 0.31, - "learning_rate": 3.839130043977385e-05, - "loss": 0.9001, - "step": 4380 - }, - { - "epoch": 0.31, - "learning_rate": 3.8382298161153745e-05, - "loss": 0.8927, - "step": 4390 - }, - { - "epoch": 0.31, - "learning_rate": 3.837327182588205e-05, - "loss": 0.8923, - "step": 4400 - }, - { - "epoch": 0.31, - "learning_rate": 3.8364221445771395e-05, - "loss": 0.9049, - "step": 4410 - }, - { - "epoch": 0.31, - "learning_rate": 3.835514703266589e-05, - "loss": 0.8904, - "step": 4420 - }, - { - "epoch": 0.31, - "learning_rate": 3.8346048598441076e-05, - "loss": 0.8916, - "step": 4430 - }, - { - "epoch": 0.31, - "learning_rate": 3.833692615500396e-05, - "loss": 0.8882, - "step": 4440 - }, - { - "epoch": 0.31, - "learning_rate": 3.832777971429295e-05, - "loss": 0.8914, - "step": 4450 - }, - { - "epoch": 0.32, - "learning_rate": 3.831860928827785e-05, - "loss": 0.888, - "step": 4460 - }, - { - "epoch": 0.32, - "learning_rate": 3.830941488895988e-05, - "loss": 0.8958, - "step": 4470 - }, - { - "epoch": 0.32, - "learning_rate": 3.8300196528371616e-05, - "loss": 0.8957, - "step": 4480 - }, - { - "epoch": 0.32, - "learning_rate": 3.829095421857699e-05, - "loss": 0.8938, - "step": 4490 - }, - { - "epoch": 0.32, - "learning_rate": 3.8281687971671285e-05, - "loss": 0.8947, - "step": 4500 - }, - { - "epoch": 0.32, - "learning_rate": 3.827239779978111e-05, - "loss": 0.894, - "step": 4510 - }, - { - "epoch": 0.32, - "learning_rate": 3.8263083715064363e-05, - "loss": 0.8921, - "step": 4520 - }, - { - "epoch": 0.32, - "learning_rate": 3.8253745729710284e-05, - "loss": 0.8947, - "step": 4530 - }, - { - "epoch": 0.32, - "learning_rate": 3.824438385593934e-05, - "loss": 0.8978, - "step": 4540 - }, - { - "epoch": 0.32, - "learning_rate": 3.8234998106003283e-05, - "loss": 0.8894, - "step": 4550 - }, - { - "epoch": 0.32, - "learning_rate": 3.8225588492185105e-05, - "loss": 0.896, - "step": 4560 - }, - { - "epoch": 0.32, - "learning_rate": 3.821615502679906e-05, - "loss": 0.8856, - "step": 4570 - }, - { - "epoch": 0.32, - "learning_rate": 3.8206697722190563e-05, - "loss": 0.8892, - "step": 4580 - }, - { - "epoch": 0.32, - "learning_rate": 3.819721659073626e-05, - "loss": 0.8863, - "step": 4590 - }, - { - "epoch": 0.32, - "learning_rate": 3.818771164484398e-05, - "loss": 0.8932, - "step": 4600 - }, - { - "epoch": 0.33, - "learning_rate": 3.81781828969527e-05, - "loss": 0.8907, - "step": 4610 - }, - { - "epoch": 0.33, - "learning_rate": 3.816863035953257e-05, - "loss": 0.8863, - "step": 4620 - }, - { - "epoch": 0.33, - "learning_rate": 3.8159054045084846e-05, - "loss": 0.8903, - "step": 4630 - }, - { - "epoch": 0.33, - "learning_rate": 3.814945396614192e-05, - "loss": 0.8884, - "step": 4640 - }, - { - "epoch": 0.33, - "learning_rate": 3.813983013526728e-05, - "loss": 0.9007, - "step": 4650 - }, - { - "epoch": 0.33, - "learning_rate": 3.813018256505549e-05, - "loss": 0.885, - "step": 4660 - }, - { - "epoch": 0.33, - "learning_rate": 3.8120511268132195e-05, - "loss": 0.8925, - "step": 4670 - }, - { - "epoch": 0.33, - "learning_rate": 3.811081625715408e-05, - "loss": 0.8887, - "step": 4680 - }, - { - "epoch": 0.33, - "learning_rate": 3.8101097544808866e-05, - "loss": 0.8858, - "step": 4690 - }, - { - "epoch": 0.33, - "learning_rate": 3.80913551438153e-05, - "loss": 0.8851, - "step": 4700 - }, - { - "epoch": 0.33, - "learning_rate": 3.808158906692311e-05, - "loss": 0.8947, - "step": 4710 - }, - { - "epoch": 0.33, - "learning_rate": 3.8071799326913045e-05, - "loss": 0.8966, - "step": 4720 - }, - { - "epoch": 0.33, - "learning_rate": 3.8061985936596776e-05, - "loss": 0.8866, - "step": 4730 - }, - { - "epoch": 0.33, - "learning_rate": 3.805214890881695e-05, - "loss": 0.8919, - "step": 4740 - }, - { - "epoch": 0.34, - "learning_rate": 3.804228825644716e-05, - "loss": 0.8876, - "step": 4750 - }, - { - "epoch": 0.34, - "learning_rate": 3.8032403992391885e-05, - "loss": 0.8922, - "step": 4760 - }, - { - "epoch": 0.34, - "learning_rate": 3.802249612958653e-05, - "loss": 0.8934, - "step": 4770 - }, - { - "epoch": 0.34, - "learning_rate": 3.801256468099736e-05, - "loss": 0.8935, - "step": 4780 - }, - { - "epoch": 0.34, - "learning_rate": 3.800260965962154e-05, - "loss": 0.8863, - "step": 4790 - }, - { - "epoch": 0.34, - "learning_rate": 3.799263107848705e-05, - "loss": 0.8887, - "step": 4800 - }, - { - "epoch": 0.34, - "learning_rate": 3.7982628950652725e-05, - "loss": 0.8858, - "step": 4810 - }, - { - "epoch": 0.34, - "learning_rate": 3.79726032892082e-05, - "loss": 0.8957, - "step": 4820 - }, - { - "epoch": 0.34, - "learning_rate": 3.7962554107273926e-05, - "loss": 0.885, - "step": 4830 - }, - { - "epoch": 0.34, - "learning_rate": 3.795248141800112e-05, - "loss": 0.8843, - "step": 4840 - }, - { - "epoch": 0.34, - "learning_rate": 3.794238523457175e-05, - "loss": 0.8899, - "step": 4850 - }, - { - "epoch": 0.34, - "learning_rate": 3.793226557019857e-05, - "loss": 0.9039, - "step": 4860 - }, - { - "epoch": 0.34, - "learning_rate": 3.7922122438125035e-05, - "loss": 0.9641, - "step": 4870 - }, - { - "epoch": 0.34, - "learning_rate": 3.791195585162531e-05, - "loss": 0.9534, - "step": 4880 - }, - { - "epoch": 0.35, - "learning_rate": 3.7901765824004264e-05, - "loss": 0.9882, - "step": 4890 - }, - { - "epoch": 0.35, - "learning_rate": 3.7891552368597456e-05, - "loss": 1.0478, - "step": 4900 - }, - { - "epoch": 0.35, - "learning_rate": 3.788131549877107e-05, - "loss": 0.9491, - "step": 4910 - }, - { - "epoch": 0.35, - "learning_rate": 3.787105522792195e-05, - "loss": 0.9363, - "step": 4920 - }, - { - "epoch": 0.35, - "learning_rate": 3.7860771569477587e-05, - "loss": 0.9231, - "step": 4930 - }, - { - "epoch": 0.35, - "learning_rate": 3.7850464536896036e-05, - "loss": 0.9272, - "step": 4940 - }, - { - "epoch": 0.35, - "learning_rate": 3.784013414366598e-05, - "loss": 0.9205, - "step": 4950 - }, - { - "epoch": 0.35, - "learning_rate": 3.7829780403306654e-05, - "loss": 0.9177, - "step": 4960 - }, - { - "epoch": 0.35, - "learning_rate": 3.781940332936784e-05, - "loss": 0.9212, - "step": 4970 - }, - { - "epoch": 0.35, - "learning_rate": 3.780900293542988e-05, - "loss": 0.9116, - "step": 4980 - }, - { - "epoch": 0.35, - "learning_rate": 3.779857923510361e-05, - "loss": 0.9004, - "step": 4990 - }, - { - "epoch": 0.35, - "learning_rate": 3.778813224203038e-05, - "loss": 0.906, - "step": 5000 - }, - { - "epoch": 0.35, - "learning_rate": 3.7777661969882034e-05, - "loss": 0.9011, - "step": 5010 - }, - { - "epoch": 0.35, - "learning_rate": 3.776716843236086e-05, - "loss": 0.9084, - "step": 5020 - }, - { - "epoch": 0.36, - "learning_rate": 3.775665164319959e-05, - "loss": 0.9045, - "step": 5030 - }, - { - "epoch": 0.36, - "learning_rate": 3.774611161616141e-05, - "loss": 0.9059, - "step": 5040 - }, - { - "epoch": 0.36, - "learning_rate": 3.77355483650399e-05, - "loss": 0.9022, - "step": 5050 - }, - { - "epoch": 0.36, - "learning_rate": 3.772496190365903e-05, - "loss": 0.9047, - "step": 5060 - }, - { - "epoch": 0.36, - "learning_rate": 3.7714352245873154e-05, - "loss": 0.9057, - "step": 5070 - }, - { - "epoch": 0.36, - "learning_rate": 3.7703719405566986e-05, - "loss": 0.8948, - "step": 5080 - }, - { - "epoch": 0.36, - "learning_rate": 3.7693063396655574e-05, - "loss": 0.8981, - "step": 5090 - }, - { - "epoch": 0.36, - "learning_rate": 3.768238423308427e-05, - "loss": 0.8995, - "step": 5100 - }, - { - "epoch": 0.36, - "learning_rate": 3.767168192882876e-05, - "loss": 0.8987, - "step": 5110 - }, - { - "epoch": 0.36, - "learning_rate": 3.766095649789498e-05, - "loss": 0.899, - "step": 5120 - }, - { - "epoch": 0.36, - "learning_rate": 3.765020795431917e-05, - "loss": 0.8897, - "step": 5130 - }, - { - "epoch": 0.36, - "learning_rate": 3.763943631216778e-05, - "loss": 0.8925, - "step": 5140 - }, - { - "epoch": 0.36, - "learning_rate": 3.7628641585537505e-05, - "loss": 0.8906, - "step": 5150 - }, - { - "epoch": 0.36, - "learning_rate": 3.761782378855525e-05, - "loss": 0.892, - "step": 5160 - }, - { - "epoch": 0.37, - "learning_rate": 3.760698293537813e-05, - "loss": 0.8906, - "step": 5170 - }, - { - "epoch": 0.37, - "learning_rate": 3.759611904019338e-05, - "loss": 0.8962, - "step": 5180 - }, - { - "epoch": 0.37, - "learning_rate": 3.758523211721846e-05, - "loss": 0.8891, - "step": 5190 - }, - { - "epoch": 0.37, - "learning_rate": 3.757432218070091e-05, - "loss": 0.8903, - "step": 5200 - }, - { - "epoch": 0.37, - "learning_rate": 3.7563389244918415e-05, - "loss": 0.8921, - "step": 5210 - }, - { - "epoch": 0.37, - "learning_rate": 3.755243332417875e-05, - "loss": 0.8881, - "step": 5220 - }, - { - "epoch": 0.37, - "learning_rate": 3.7541454432819776e-05, - "loss": 0.8935, - "step": 5230 - }, - { - "epoch": 0.37, - "learning_rate": 3.753045258520941e-05, - "loss": 0.8919, - "step": 5240 - }, - { - "epoch": 0.37, - "learning_rate": 3.751942779574561e-05, - "loss": 0.8902, - "step": 5250 - }, - { - "epoch": 0.37, - "learning_rate": 3.750838007885636e-05, - "loss": 0.8911, - "step": 5260 - }, - { - "epoch": 0.37, - "learning_rate": 3.749730944899966e-05, - "loss": 0.8917, - "step": 5270 - }, - { - "epoch": 0.37, - "learning_rate": 3.7486215920663476e-05, - "loss": 0.8857, - "step": 5280 - }, - { - "epoch": 0.37, - "learning_rate": 3.747509950836575e-05, - "loss": 0.8885, - "step": 5290 - }, - { - "epoch": 0.37, - "learning_rate": 3.746396022665438e-05, - "loss": 0.8928, - "step": 5300 - }, - { - "epoch": 0.38, - "learning_rate": 3.745279809010718e-05, - "loss": 0.893, - "step": 5310 - }, - { - "epoch": 0.38, - "learning_rate": 3.7441613113331876e-05, - "loss": 0.8953, - "step": 5320 - }, - { - "epoch": 0.38, - "learning_rate": 3.74304053109661e-05, - "loss": 0.8903, - "step": 5330 - }, - { - "epoch": 0.38, - "learning_rate": 3.741917469767731e-05, - "loss": 0.8934, - "step": 5340 - }, - { - "epoch": 0.38, - "learning_rate": 3.74079212881629e-05, - "loss": 0.8851, - "step": 5350 - }, - { - "epoch": 0.38, - "learning_rate": 3.7396645097150014e-05, - "loss": 0.8973, - "step": 5360 - }, - { - "epoch": 0.38, - "learning_rate": 3.7385346139395645e-05, - "loss": 0.8952, - "step": 5370 - }, - { - "epoch": 0.38, - "learning_rate": 3.737402442968658e-05, - "loss": 0.8808, - "step": 5380 - }, - { - "epoch": 0.38, - "learning_rate": 3.736267998283939e-05, - "loss": 0.886, - "step": 5390 - }, - { - "epoch": 0.38, - "learning_rate": 3.735131281370037e-05, - "loss": 0.8864, - "step": 5400 - }, - { - "epoch": 0.38, - "learning_rate": 3.7339922937145596e-05, - "loss": 0.8817, - "step": 5410 - }, - { - "epoch": 0.38, - "learning_rate": 3.732851036808082e-05, - "loss": 0.8875, - "step": 5420 - }, - { - "epoch": 0.38, - "learning_rate": 3.731707512144151e-05, - "loss": 0.8716, - "step": 5430 - }, - { - "epoch": 0.38, - "learning_rate": 3.7305617212192816e-05, - "loss": 0.8835, - "step": 5440 - }, - { - "epoch": 0.38, - "learning_rate": 3.729413665532955e-05, - "loss": 0.8775, - "step": 5450 - }, - { - "epoch": 0.39, - "learning_rate": 3.728263346587612e-05, - "loss": 0.8899, - "step": 5460 - }, - { - "epoch": 0.39, - "learning_rate": 3.727110765888663e-05, - "loss": 0.889, - "step": 5470 - }, - { - "epoch": 0.39, - "learning_rate": 3.7259559249444704e-05, - "loss": 0.8818, - "step": 5480 - }, - { - "epoch": 0.39, - "learning_rate": 3.724798825266359e-05, - "loss": 0.884, - "step": 5490 - }, - { - "epoch": 0.39, - "learning_rate": 3.72363946836861e-05, - "loss": 0.8796, - "step": 5500 - }, - { - "epoch": 0.39, - "learning_rate": 3.722477855768456e-05, - "loss": 0.893, - "step": 5510 - }, - { - "epoch": 0.39, - "learning_rate": 3.721313988986083e-05, - "loss": 0.8833, - "step": 5520 - }, - { - "epoch": 0.39, - "learning_rate": 3.720147869544628e-05, - "loss": 0.8789, - "step": 5530 - }, - { - "epoch": 0.39, - "learning_rate": 3.718979498970172e-05, - "loss": 0.8888, - "step": 5540 - }, - { - "epoch": 0.39, - "learning_rate": 3.7178088787917484e-05, - "loss": 0.8824, - "step": 5550 - }, - { - "epoch": 0.39, - "learning_rate": 3.7166360105413285e-05, - "loss": 0.8788, - "step": 5560 - }, - { - "epoch": 0.39, - "learning_rate": 3.71546089575383e-05, - "loss": 0.8857, - "step": 5570 - }, - { - "epoch": 0.39, - "learning_rate": 3.714283535967108e-05, - "loss": 0.8796, - "step": 5580 - }, - { - "epoch": 0.39, - "learning_rate": 3.713103932721957e-05, - "loss": 0.8757, - "step": 5590 - }, - { - "epoch": 0.4, - "learning_rate": 3.7119220875621065e-05, - "loss": 0.8838, - "step": 5600 - }, - { - "epoch": 0.4, - "learning_rate": 3.71073800203422e-05, - "loss": 0.8872, - "step": 5610 - }, - { - "epoch": 0.4, - "learning_rate": 3.709551677687895e-05, - "loss": 0.8847, - "step": 5620 - }, - { - "epoch": 0.4, - "learning_rate": 3.708363116075656e-05, - "loss": 0.8829, - "step": 5630 - }, - { - "epoch": 0.4, - "learning_rate": 3.707172318752956e-05, - "loss": 0.8821, - "step": 5640 - }, - { - "epoch": 0.4, - "learning_rate": 3.705979287278175e-05, - "loss": 0.8831, - "step": 5650 - }, - { - "epoch": 0.4, - "learning_rate": 3.704784023212618e-05, - "loss": 0.8776, - "step": 5660 - }, - { - "epoch": 0.4, - "learning_rate": 3.703586528120507e-05, - "loss": 0.884, - "step": 5670 - }, - { - "epoch": 0.4, - "learning_rate": 3.70238680356899e-05, - "loss": 0.8811, - "step": 5680 - }, - { - "epoch": 0.4, - "learning_rate": 3.7011848511281266e-05, - "loss": 0.8814, - "step": 5690 - }, - { - "epoch": 0.4, - "learning_rate": 3.699980672370896e-05, - "loss": 0.8762, - "step": 5700 - }, - { - "epoch": 0.4, - "learning_rate": 3.69877426887319e-05, - "loss": 0.8859, - "step": 5710 - }, - { - "epoch": 0.4, - "learning_rate": 3.697565642213812e-05, - "loss": 0.8864, - "step": 5720 - }, - { - "epoch": 0.4, - "learning_rate": 3.6963547939744737e-05, - "loss": 0.8829, - "step": 5730 - }, - { - "epoch": 0.41, - "learning_rate": 3.695141725739795e-05, - "loss": 0.8848, - "step": 5740 - }, - { - "epoch": 0.41, - "learning_rate": 3.6939264390973016e-05, - "loss": 0.8864, - "step": 5750 - }, - { - "epoch": 0.41, - "learning_rate": 3.692708935637421e-05, - "loss": 0.8772, - "step": 5760 - }, - { - "epoch": 0.41, - "learning_rate": 3.691489216953483e-05, - "loss": 0.8742, - "step": 5770 - }, - { - "epoch": 0.41, - "learning_rate": 3.690267284641716e-05, - "loss": 0.881, - "step": 5780 - }, - { - "epoch": 0.41, - "learning_rate": 3.689043140301246e-05, - "loss": 0.8787, - "step": 5790 - }, - { - "epoch": 0.41, - "learning_rate": 3.687816785534092e-05, - "loss": 0.8845, - "step": 5800 - }, - { - "epoch": 0.41, - "learning_rate": 3.686588221945168e-05, - "loss": 0.877, - "step": 5810 - }, - { - "epoch": 0.41, - "learning_rate": 3.685357451142278e-05, - "loss": 0.8824, - "step": 5820 - }, - { - "epoch": 0.41, - "learning_rate": 3.6841244747361126e-05, - "loss": 0.8767, - "step": 5830 - }, - { - "epoch": 0.41, - "learning_rate": 3.682889294340252e-05, - "loss": 0.8736, - "step": 5840 - }, - { - "epoch": 0.41, - "learning_rate": 3.6816519115711586e-05, - "loss": 0.8808, - "step": 5850 - }, - { - "epoch": 0.41, - "learning_rate": 3.680412328048178e-05, - "loss": 0.8791, - "step": 5860 - }, - { - "epoch": 0.41, - "learning_rate": 3.679170545393534e-05, - "loss": 0.8793, - "step": 5870 - }, - { - "epoch": 0.42, - "learning_rate": 3.6779265652323315e-05, - "loss": 0.8797, - "step": 5880 - }, - { - "epoch": 0.42, - "learning_rate": 3.67668038919255e-05, - "loss": 0.8802, - "step": 5890 - }, - { - "epoch": 0.42, - "learning_rate": 3.6754320189050404e-05, - "loss": 0.8805, - "step": 5900 - }, - { - "epoch": 0.42, - "learning_rate": 3.674181456003528e-05, - "loss": 0.88, - "step": 5910 - }, - { - "epoch": 0.42, - "learning_rate": 3.672928702124607e-05, - "loss": 0.8706, - "step": 5920 - }, - { - "epoch": 0.42, - "learning_rate": 3.6716737589077386e-05, - "loss": 0.8814, - "step": 5930 - }, - { - "epoch": 0.42, - "learning_rate": 3.670416627995249e-05, - "loss": 0.8861, - "step": 5940 - }, - { - "epoch": 0.42, - "learning_rate": 3.669157311032326e-05, - "loss": 0.8821, - "step": 5950 - }, - { - "epoch": 0.42, - "learning_rate": 3.667895809667022e-05, - "loss": 0.884, - "step": 5960 - }, - { - "epoch": 0.42, - "learning_rate": 3.666632125550245e-05, - "loss": 0.8849, - "step": 5970 - }, - { - "epoch": 0.42, - "learning_rate": 3.66536626033576e-05, - "loss": 0.8863, - "step": 5980 - }, - { - "epoch": 0.42, - "learning_rate": 3.664098215680187e-05, - "loss": 0.8699, - "step": 5990 - }, - { - "epoch": 0.42, - "learning_rate": 3.662827993242998e-05, - "loss": 0.871, - "step": 6000 - }, - { - "epoch": 0.42, - "learning_rate": 3.661555594686515e-05, - "loss": 0.878, - "step": 6010 - }, - { - "epoch": 0.43, - "learning_rate": 3.660281021675908e-05, - "loss": 0.8794, - "step": 6020 - }, - { - "epoch": 0.43, - "learning_rate": 3.659004275879193e-05, - "loss": 0.8727, - "step": 6030 - }, - { - "epoch": 0.43, - "learning_rate": 3.657725358967228e-05, - "loss": 0.8734, - "step": 6040 - }, - { - "epoch": 0.43, - "learning_rate": 3.6564442726137134e-05, - "loss": 0.8767, - "step": 6050 - }, - { - "epoch": 0.43, - "learning_rate": 3.655161018495189e-05, - "loss": 0.8723, - "step": 6060 - }, - { - "epoch": 0.43, - "learning_rate": 3.6538755982910314e-05, - "loss": 0.8725, - "step": 6070 - }, - { - "epoch": 0.43, - "learning_rate": 3.6525880136834514e-05, - "loss": 0.8683, - "step": 6080 - }, - { - "epoch": 0.43, - "learning_rate": 3.651298266357493e-05, - "loss": 0.8709, - "step": 6090 - }, - { - "epoch": 0.43, - "learning_rate": 3.65000635800103e-05, - "loss": 0.8735, - "step": 6100 - }, - { - "epoch": 0.43, - "learning_rate": 3.648712290304764e-05, - "loss": 0.8882, - "step": 6110 - }, - { - "epoch": 0.43, - "learning_rate": 3.647416064962224e-05, - "loss": 0.8879, - "step": 6120 - }, - { - "epoch": 0.43, - "learning_rate": 3.64611768366976e-05, - "loss": 0.8852, - "step": 6130 - }, - { - "epoch": 0.43, - "learning_rate": 3.644817148126546e-05, - "loss": 0.8906, - "step": 6140 - }, - { - "epoch": 0.43, - "learning_rate": 3.643514460034575e-05, - "loss": 0.906, - "step": 6150 - }, - { - "epoch": 0.44, - "learning_rate": 3.642209621098655e-05, - "loss": 0.9239, - "step": 6160 - }, - { - "epoch": 0.44, - "learning_rate": 3.640902633026411e-05, - "loss": 0.9071, - "step": 6170 - }, - { - "epoch": 0.44, - "learning_rate": 3.63959349752828e-05, - "loss": 0.9016, - "step": 6180 - }, - { - "epoch": 0.44, - "learning_rate": 3.638282216317508e-05, - "loss": 0.896, - "step": 6190 - }, - { - "epoch": 0.44, - "learning_rate": 3.63696879111015e-05, - "loss": 0.8891, - "step": 6200 - }, - { - "epoch": 0.44, - "learning_rate": 3.635653223625067e-05, - "loss": 0.8903, - "step": 6210 - }, - { - "epoch": 0.44, - "learning_rate": 3.634335515583924e-05, - "loss": 0.8887, - "step": 6220 - }, - { - "epoch": 0.44, - "learning_rate": 3.633015668711187e-05, - "loss": 0.8891, - "step": 6230 - }, - { - "epoch": 0.44, - "learning_rate": 3.63169368473412e-05, - "loss": 0.8896, - "step": 6240 - }, - { - "epoch": 0.44, - "learning_rate": 3.630369565382785e-05, - "loss": 0.8906, - "step": 6250 - }, - { - "epoch": 0.44, - "learning_rate": 3.629043312390037e-05, - "loss": 0.8869, - "step": 6260 - }, - { - "epoch": 0.44, - "learning_rate": 3.627714927491527e-05, - "loss": 0.8818, - "step": 6270 - }, - { - "epoch": 0.44, - "learning_rate": 3.6263844124256905e-05, - "loss": 0.8876, - "step": 6280 - }, - { - "epoch": 0.44, - "learning_rate": 3.6250517689337545e-05, - "loss": 0.886, - "step": 6290 - }, - { - "epoch": 0.45, - "learning_rate": 3.6237169987597314e-05, - "loss": 0.8833, - "step": 6300 - }, - { - "epoch": 0.45, - "learning_rate": 3.622380103650415e-05, - "loss": 0.883, - "step": 6310 - }, - { - "epoch": 0.45, - "learning_rate": 3.6210410853553816e-05, - "loss": 0.8848, - "step": 6320 - }, - { - "epoch": 0.45, - "learning_rate": 3.6196999456269845e-05, - "loss": 0.8759, - "step": 6330 - }, - { - "epoch": 0.45, - "learning_rate": 3.6183566862203555e-05, - "loss": 0.8765, - "step": 6340 - }, - { - "epoch": 0.45, - "learning_rate": 3.617011308893398e-05, - "loss": 0.888, - "step": 6350 - }, - { - "epoch": 0.45, - "learning_rate": 3.6156638154067876e-05, - "loss": 0.8796, - "step": 6360 - }, - { - "epoch": 0.45, - "learning_rate": 3.614314207523971e-05, - "loss": 0.8835, - "step": 6370 - }, - { - "epoch": 0.45, - "learning_rate": 3.61296248701116e-05, - "loss": 0.8799, - "step": 6380 - }, - { - "epoch": 0.45, - "learning_rate": 3.611608655637333e-05, - "loss": 0.8861, - "step": 6390 - }, - { - "epoch": 0.45, - "learning_rate": 3.610252715174229e-05, - "loss": 0.8689, - "step": 6400 - }, - { - "epoch": 0.45, - "learning_rate": 3.608894667396347e-05, - "loss": 0.8771, - "step": 6410 - }, - { - "epoch": 0.45, - "learning_rate": 3.607534514080946e-05, - "loss": 0.8784, - "step": 6420 - }, - { - "epoch": 0.45, - "learning_rate": 3.60617225700804e-05, - "loss": 0.8831, - "step": 6430 - }, - { - "epoch": 0.45, - "learning_rate": 3.604807897960394e-05, - "loss": 0.882, - "step": 6440 - }, - { - "epoch": 0.46, - "learning_rate": 3.603441438723526e-05, - "loss": 0.8744, - "step": 6450 - }, - { - "epoch": 0.46, - "learning_rate": 3.602072881085701e-05, - "loss": 0.8706, - "step": 6460 - }, - { - "epoch": 0.46, - "learning_rate": 3.6007022268379316e-05, - "loss": 0.8786, - "step": 6470 - }, - { - "epoch": 0.46, - "learning_rate": 3.599329477773973e-05, - "loss": 0.8805, - "step": 6480 - }, - { - "epoch": 0.46, - "learning_rate": 3.597954635690324e-05, - "loss": 0.877, - "step": 6490 - }, - { - "epoch": 0.46, - "learning_rate": 3.5965777023862176e-05, - "loss": 0.878, - "step": 6500 - }, - { - "epoch": 0.46, - "learning_rate": 3.5951986796636295e-05, - "loss": 0.8697, - "step": 6510 - }, - { - "epoch": 0.46, - "learning_rate": 3.5938175693272655e-05, - "loss": 0.8735, - "step": 6520 - }, - { - "epoch": 0.46, - "learning_rate": 3.5924343731845664e-05, - "loss": 0.8823, - "step": 6530 - }, - { - "epoch": 0.46, - "learning_rate": 3.5910490930457006e-05, - "loss": 0.8761, - "step": 6540 - }, - { - "epoch": 0.46, - "learning_rate": 3.589661730723563e-05, - "loss": 0.8735, - "step": 6550 - }, - { - "epoch": 0.46, - "learning_rate": 3.5882722880337776e-05, - "loss": 0.8684, - "step": 6560 - }, - { - "epoch": 0.46, - "learning_rate": 3.5868807667946876e-05, - "loss": 0.8788, - "step": 6570 - }, - { - "epoch": 0.46, - "learning_rate": 3.585487168827355e-05, - "loss": 0.8685, - "step": 6580 - }, - { - "epoch": 0.47, - "learning_rate": 3.584091495955563e-05, - "loss": 0.8677, - "step": 6590 - }, - { - "epoch": 0.47, - "learning_rate": 3.58269375000581e-05, - "loss": 0.8762, - "step": 6600 - }, - { - "epoch": 0.47, - "learning_rate": 3.581293932807304e-05, - "loss": 0.8684, - "step": 6610 - }, - { - "epoch": 0.47, - "learning_rate": 3.579892046191967e-05, - "loss": 0.8732, - "step": 6620 - }, - { - "epoch": 0.47, - "learning_rate": 3.578488091994428e-05, - "loss": 0.8729, - "step": 6630 - }, - { - "epoch": 0.47, - "learning_rate": 3.5770820720520216e-05, - "loss": 0.8734, - "step": 6640 - }, - { - "epoch": 0.47, - "learning_rate": 3.575673988204786e-05, - "loss": 0.8739, - "step": 6650 - }, - { - "epoch": 0.47, - "learning_rate": 3.57426384229546e-05, - "loss": 0.88, - "step": 6660 - }, - { - "epoch": 0.47, - "learning_rate": 3.572851636169484e-05, - "loss": 0.8732, - "step": 6670 - }, - { - "epoch": 0.47, - "learning_rate": 3.5714373716749886e-05, - "loss": 0.8738, - "step": 6680 - }, - { - "epoch": 0.47, - "learning_rate": 3.5700210506628026e-05, - "loss": 0.8734, - "step": 6690 - }, - { - "epoch": 0.47, - "learning_rate": 3.5686026749864466e-05, - "loss": 0.8692, - "step": 6700 - }, - { - "epoch": 0.47, - "learning_rate": 3.567182246502127e-05, - "loss": 0.8713, - "step": 6710 - }, - { - "epoch": 0.47, - "learning_rate": 3.56575976706874e-05, - "loss": 0.8797, - "step": 6720 - }, - { - "epoch": 0.48, - "learning_rate": 3.5643352385478616e-05, - "loss": 0.8732, - "step": 6730 - }, - { - "epoch": 0.48, - "learning_rate": 3.562908662803754e-05, - "loss": 0.8665, - "step": 6740 - }, - { - "epoch": 0.48, - "learning_rate": 3.5614800417033565e-05, - "loss": 0.8634, - "step": 6750 - }, - { - "epoch": 0.48, - "learning_rate": 3.560049377116284e-05, - "loss": 0.872, - "step": 6760 - }, - { - "epoch": 0.48, - "learning_rate": 3.558616670914828e-05, - "loss": 0.8687, - "step": 6770 - }, - { - "epoch": 0.48, - "learning_rate": 3.55718192497395e-05, - "loss": 0.8679, - "step": 6780 - }, - { - "epoch": 0.48, - "learning_rate": 3.555745141171282e-05, - "loss": 0.8648, - "step": 6790 - }, - { - "epoch": 0.48, - "learning_rate": 3.554306321387122e-05, - "loss": 0.87, - "step": 6800 - }, - { - "epoch": 0.48, - "learning_rate": 3.552865467504432e-05, - "loss": 0.8689, - "step": 6810 - }, - { - "epoch": 0.48, - "learning_rate": 3.55142258140884e-05, - "loss": 0.8704, - "step": 6820 - }, - { - "epoch": 0.48, - "learning_rate": 3.549977664988628e-05, - "loss": 0.8718, - "step": 6830 - }, - { - "epoch": 0.48, - "learning_rate": 3.548530720134738e-05, - "loss": 0.8733, - "step": 6840 - }, - { - "epoch": 0.48, - "learning_rate": 3.547081748740766e-05, - "loss": 0.8699, - "step": 6850 - }, - { - "epoch": 0.48, - "learning_rate": 3.5456307527029606e-05, - "loss": 0.8721, - "step": 6860 - }, - { - "epoch": 0.49, - "learning_rate": 3.54417773392022e-05, - "loss": 0.8687, - "step": 6870 - }, - { - "epoch": 0.49, - "learning_rate": 3.542722694294088e-05, - "loss": 0.8551, - "step": 6880 - }, - { - "epoch": 0.49, - "learning_rate": 3.541265635728755e-05, - "loss": 0.8632, - "step": 6890 - }, - { - "epoch": 0.49, - "learning_rate": 3.539806560131053e-05, - "loss": 0.8739, - "step": 6900 - }, - { - "epoch": 0.49, - "learning_rate": 3.538345469410453e-05, - "loss": 0.8674, - "step": 6910 - }, - { - "epoch": 0.49, - "learning_rate": 3.536882365479063e-05, - "loss": 0.8617, - "step": 6920 - }, - { - "epoch": 0.49, - "learning_rate": 3.535417250251627e-05, - "loss": 0.8618, - "step": 6930 - }, - { - "epoch": 0.49, - "learning_rate": 3.5339501256455216e-05, - "loss": 0.8731, - "step": 6940 - }, - { - "epoch": 0.49, - "learning_rate": 3.53248099358075e-05, - "loss": 0.8584, - "step": 6950 - }, - { - "epoch": 0.49, - "learning_rate": 3.531009855979945e-05, - "loss": 0.8692, - "step": 6960 - }, - { - "epoch": 0.49, - "learning_rate": 3.529536714768363e-05, - "loss": 0.8698, - "step": 6970 - }, - { - "epoch": 0.49, - "learning_rate": 3.528061571873883e-05, - "loss": 0.8577, - "step": 6980 - }, - { - "epoch": 0.49, - "learning_rate": 3.526584429227005e-05, - "loss": 0.8652, - "step": 6990 - }, - { - "epoch": 0.49, - "learning_rate": 3.525105288760843e-05, - "loss": 0.8676, - "step": 7000 - }, - { - "epoch": 0.5, - "learning_rate": 3.5236241524111264e-05, - "loss": 0.8682, - "step": 7010 - }, - { - "epoch": 0.5, - "learning_rate": 3.522141022116199e-05, - "loss": 0.8664, - "step": 7020 - }, - { - "epoch": 0.5, - "learning_rate": 3.520655899817012e-05, - "loss": 0.8714, - "step": 7030 - }, - { - "epoch": 0.5, - "learning_rate": 3.519168787457122e-05, - "loss": 0.8672, - "step": 7040 - }, - { - "epoch": 0.5, - "learning_rate": 3.5176796869826943e-05, - "loss": 0.8731, - "step": 7050 - }, - { - "epoch": 0.5, - "learning_rate": 3.5161886003424915e-05, - "loss": 0.8679, - "step": 7060 - }, - { - "epoch": 0.5, - "learning_rate": 3.5146955294878784e-05, - "loss": 0.8692, - "step": 7070 - }, - { - "epoch": 0.5, - "learning_rate": 3.5132004763728144e-05, - "loss": 0.8751, - "step": 7080 - }, - { - "epoch": 0.5, - "learning_rate": 3.511703442953856e-05, - "loss": 0.8656, - "step": 7090 - }, - { - "epoch": 0.5, - "learning_rate": 3.510204431190149e-05, - "loss": 0.8742, - "step": 7100 - }, - { - "epoch": 0.5, - "learning_rate": 3.508703443043427e-05, - "loss": 0.87, - "step": 7110 - }, - { - "epoch": 0.5, - "learning_rate": 3.5072004804780135e-05, - "loss": 0.8608, - "step": 7120 - }, - { - "epoch": 0.5, - "learning_rate": 3.505695545460814e-05, - "loss": 0.8727, - "step": 7130 - }, - { - "epoch": 0.5, - "learning_rate": 3.504188639961315e-05, - "loss": 0.8731, - "step": 7140 - }, - { - "epoch": 0.51, - "learning_rate": 3.502679765951583e-05, - "loss": 0.871, - "step": 7150 - }, - { - "epoch": 0.51, - "learning_rate": 3.501168925406259e-05, - "loss": 0.8688, - "step": 7160 - }, - { - "epoch": 0.51, - "learning_rate": 3.49965612030256e-05, - "loss": 0.8682, - "step": 7170 - }, - { - "epoch": 0.51, - "learning_rate": 3.4981413526202704e-05, - "loss": 0.8718, - "step": 7180 - }, - { - "epoch": 0.51, - "learning_rate": 3.4966246243417465e-05, - "loss": 0.8663, - "step": 7190 - }, - { - "epoch": 0.51, - "learning_rate": 3.495105937451908e-05, - "loss": 0.861, - "step": 7200 - }, - { - "epoch": 0.51, - "learning_rate": 3.4935852939382396e-05, - "loss": 0.8612, - "step": 7210 - }, - { - "epoch": 0.51, - "learning_rate": 3.492062695790786e-05, - "loss": 0.8704, - "step": 7220 - }, - { - "epoch": 0.51, - "learning_rate": 3.4905381450021484e-05, - "loss": 0.8608, - "step": 7230 - }, - { - "epoch": 0.51, - "learning_rate": 3.489011643567486e-05, - "loss": 0.8659, - "step": 7240 - }, - { - "epoch": 0.51, - "learning_rate": 3.4874831934845095e-05, - "loss": 0.8692, - "step": 7250 - }, - { - "epoch": 0.51, - "learning_rate": 3.485952796753479e-05, - "loss": 0.8565, - "step": 7260 - }, - { - "epoch": 0.51, - "learning_rate": 3.484420455377202e-05, - "loss": 0.8642, - "step": 7270 - }, - { - "epoch": 0.51, - "learning_rate": 3.482886171361034e-05, - "loss": 0.8631, - "step": 7280 - }, - { - "epoch": 0.51, - "learning_rate": 3.481349946712869e-05, - "loss": 0.8671, - "step": 7290 - }, - { - "epoch": 0.52, - "learning_rate": 3.4798117834431436e-05, - "loss": 0.8676, - "step": 7300 - }, - { - "epoch": 0.52, - "learning_rate": 3.478271683564829e-05, - "loss": 0.8689, - "step": 7310 - }, - { - "epoch": 0.52, - "learning_rate": 3.4767296490934324e-05, - "loss": 0.869, - "step": 7320 - }, - { - "epoch": 0.52, - "learning_rate": 3.475185682046992e-05, - "loss": 0.8651, - "step": 7330 - }, - { - "epoch": 0.52, - "learning_rate": 3.4736397844460766e-05, - "loss": 0.8637, - "step": 7340 - }, - { - "epoch": 0.52, - "learning_rate": 3.4720919583137786e-05, - "loss": 0.8707, - "step": 7350 - }, - { - "epoch": 0.52, - "learning_rate": 3.470542205675717e-05, - "loss": 0.8607, - "step": 7360 - }, - { - "epoch": 0.52, - "learning_rate": 3.4689905285600314e-05, - "loss": 0.8707, - "step": 7370 - }, - { - "epoch": 0.52, - "learning_rate": 3.467436928997379e-05, - "loss": 0.8762, - "step": 7380 - }, - { - "epoch": 0.52, - "learning_rate": 3.465881409020933e-05, - "loss": 0.8809, - "step": 7390 - }, - { - "epoch": 0.52, - "learning_rate": 3.4643239706663813e-05, - "loss": 0.8796, - "step": 7400 - }, - { - "epoch": 0.52, - "learning_rate": 3.46276461597192e-05, - "loss": 0.8746, - "step": 7410 - }, - { - "epoch": 0.52, - "learning_rate": 3.4612033469782555e-05, - "loss": 0.8796, - "step": 7420 - }, - { - "epoch": 0.52, - "learning_rate": 3.4596401657285975e-05, - "loss": 0.8775, - "step": 7430 - }, - { - "epoch": 0.53, - "learning_rate": 3.458075074268659e-05, - "loss": 0.871, - "step": 7440 - }, - { - "epoch": 0.53, - "learning_rate": 3.456508074646654e-05, - "loss": 0.8694, - "step": 7450 - }, - { - "epoch": 0.53, - "learning_rate": 3.4549391689132914e-05, - "loss": 0.8706, - "step": 7460 - }, - { - "epoch": 0.53, - "learning_rate": 3.453368359121775e-05, - "loss": 0.8639, - "step": 7470 - }, - { - "epoch": 0.53, - "learning_rate": 3.4517956473278037e-05, - "loss": 0.8688, - "step": 7480 - }, - { - "epoch": 0.53, - "learning_rate": 3.4502210355895615e-05, - "loss": 0.8723, - "step": 7490 - }, - { - "epoch": 0.53, - "learning_rate": 3.44864452596772e-05, - "loss": 0.8668, - "step": 7500 - }, - { - "epoch": 0.53, - "learning_rate": 3.4470661205254354e-05, - "loss": 0.8661, - "step": 7510 - }, - { - "epoch": 0.53, - "learning_rate": 3.445485821328345e-05, - "loss": 0.8662, - "step": 7520 - }, - { - "epoch": 0.53, - "learning_rate": 3.443903630444562e-05, - "loss": 0.8715, - "step": 7530 - }, - { - "epoch": 0.53, - "learning_rate": 3.4423195499446796e-05, - "loss": 0.8683, - "step": 7540 - }, - { - "epoch": 0.53, - "learning_rate": 3.440733581901759e-05, - "loss": 0.868, - "step": 7550 - }, - { - "epoch": 0.53, - "learning_rate": 3.439145728391335e-05, - "loss": 0.8663, - "step": 7560 - }, - { - "epoch": 0.53, - "learning_rate": 3.437555991491409e-05, - "loss": 0.8629, - "step": 7570 - }, - { - "epoch": 0.54, - "learning_rate": 3.435964373282447e-05, - "loss": 0.8564, - "step": 7580 - }, - { - "epoch": 0.54, - "learning_rate": 3.434370875847377e-05, - "loss": 0.8574, - "step": 7590 - }, - { - "epoch": 0.54, - "learning_rate": 3.432775501271586e-05, - "loss": 0.8704, - "step": 7600 - }, - { - "epoch": 0.54, - "learning_rate": 3.431178251642918e-05, - "loss": 0.8659, - "step": 7610 - }, - { - "epoch": 0.54, - "learning_rate": 3.429579129051672e-05, - "loss": 0.865, - "step": 7620 - }, - { - "epoch": 0.54, - "learning_rate": 3.4279781355905956e-05, - "loss": 0.8623, - "step": 7630 - }, - { - "epoch": 0.54, - "learning_rate": 3.4263752733548866e-05, - "loss": 0.8667, - "step": 7640 - }, - { - "epoch": 0.54, - "learning_rate": 3.424770544442187e-05, - "loss": 0.8646, - "step": 7650 - }, - { - "epoch": 0.54, - "learning_rate": 3.423163950952585e-05, - "loss": 0.8646, - "step": 7660 - }, - { - "epoch": 0.54, - "learning_rate": 3.421555494988605e-05, - "loss": 0.8655, - "step": 7670 - }, - { - "epoch": 0.54, - "learning_rate": 3.4199451786552094e-05, - "loss": 0.8669, - "step": 7680 - }, - { - "epoch": 0.54, - "learning_rate": 3.418333004059798e-05, - "loss": 0.8621, - "step": 7690 - }, - { - "epoch": 0.54, - "learning_rate": 3.4167189733122e-05, - "loss": 0.8618, - "step": 7700 - }, - { - "epoch": 0.54, - "learning_rate": 3.415103088524675e-05, - "loss": 0.8609, - "step": 7710 - }, - { - "epoch": 0.55, - "learning_rate": 3.413485351811908e-05, - "loss": 0.8595, - "step": 7720 - }, - { - "epoch": 0.55, - "learning_rate": 3.411865765291008e-05, - "loss": 0.8593, - "step": 7730 - }, - { - "epoch": 0.55, - "learning_rate": 3.410244331081503e-05, - "loss": 0.8648, - "step": 7740 - }, - { - "epoch": 0.55, - "learning_rate": 3.4086210513053446e-05, - "loss": 0.8623, - "step": 7750 - }, - { - "epoch": 0.55, - "learning_rate": 3.4069959280868934e-05, - "loss": 0.8567, - "step": 7760 - }, - { - "epoch": 0.55, - "learning_rate": 3.405368963552925e-05, - "loss": 0.8696, - "step": 7770 - }, - { - "epoch": 0.55, - "learning_rate": 3.403740159832625e-05, - "loss": 0.8615, - "step": 7780 - }, - { - "epoch": 0.55, - "learning_rate": 3.4021095190575865e-05, - "loss": 0.8553, - "step": 7790 - }, - { - "epoch": 0.55, - "learning_rate": 3.400477043361805e-05, - "loss": 0.8628, - "step": 7800 - }, - { - "epoch": 0.55, - "learning_rate": 3.3988427348816775e-05, - "loss": 0.8558, - "step": 7810 - }, - { - "epoch": 0.55, - "learning_rate": 3.397206595756001e-05, - "loss": 0.8637, - "step": 7820 - }, - { - "epoch": 0.55, - "learning_rate": 3.395568628125968e-05, - "loss": 0.8641, - "step": 7830 - }, - { - "epoch": 0.55, - "learning_rate": 3.393928834135163e-05, - "loss": 0.8641, - "step": 7840 - }, - { - "epoch": 0.55, - "learning_rate": 3.39228721592956e-05, - "loss": 0.8638, - "step": 7850 - }, - { - "epoch": 0.56, - "learning_rate": 3.390643775657523e-05, - "loss": 0.8666, - "step": 7860 - }, - { - "epoch": 0.56, - "learning_rate": 3.388998515469798e-05, - "loss": 0.8678, - "step": 7870 - }, - { - "epoch": 0.56, - "learning_rate": 3.387351437519513e-05, - "loss": 0.8633, - "step": 7880 - }, - { - "epoch": 0.56, - "learning_rate": 3.385702543962176e-05, - "loss": 0.857, - "step": 7890 - }, - { - "epoch": 0.56, - "learning_rate": 3.384051836955672e-05, - "loss": 0.8629, - "step": 7900 - }, - { - "epoch": 0.56, - "learning_rate": 3.382399318660255e-05, - "loss": 0.8648, - "step": 7910 - }, - { - "epoch": 0.56, - "learning_rate": 3.3807449912385535e-05, - "loss": 0.8627, - "step": 7920 - }, - { - "epoch": 0.56, - "learning_rate": 3.379088856855562e-05, - "loss": 0.8617, - "step": 7930 - }, - { - "epoch": 0.56, - "learning_rate": 3.377430917678641e-05, - "loss": 0.855, - "step": 7940 - }, - { - "epoch": 0.56, - "learning_rate": 3.37577117587751e-05, - "loss": 0.8582, - "step": 7950 - }, - { - "epoch": 0.56, - "learning_rate": 3.374109633624251e-05, - "loss": 0.861, - "step": 7960 - }, - { - "epoch": 0.56, - "learning_rate": 3.3724462930933e-05, - "loss": 0.8596, - "step": 7970 - }, - { - "epoch": 0.56, - "learning_rate": 3.370781156461447e-05, - "loss": 0.8588, - "step": 7980 - }, - { - "epoch": 0.56, - "learning_rate": 3.369114225907833e-05, - "loss": 0.857, - "step": 7990 - }, - { - "epoch": 0.57, - "learning_rate": 3.3674455036139455e-05, - "loss": 0.8584, - "step": 8000 - }, - { - "epoch": 0.57, - "learning_rate": 3.365774991763618e-05, - "loss": 0.861, - "step": 8010 - }, - { - "epoch": 0.57, - "learning_rate": 3.364102692543026e-05, - "loss": 0.8559, - "step": 8020 - }, - { - "epoch": 0.57, - "learning_rate": 3.362428608140682e-05, - "loss": 0.8608, - "step": 8030 - }, - { - "epoch": 0.57, - "learning_rate": 3.3607527407474395e-05, - "loss": 0.8532, - "step": 8040 - }, - { - "epoch": 0.57, - "learning_rate": 3.3590750925564794e-05, - "loss": 0.8544, - "step": 8050 - }, - { - "epoch": 0.57, - "learning_rate": 3.357395665763317e-05, - "loss": 0.8605, - "step": 8060 - }, - { - "epoch": 0.57, - "learning_rate": 3.355714462565795e-05, - "loss": 0.8473, - "step": 8070 - }, - { - "epoch": 0.57, - "learning_rate": 3.354031485164078e-05, - "loss": 0.8579, - "step": 8080 - }, - { - "epoch": 0.57, - "learning_rate": 3.352346735760656e-05, - "loss": 0.8528, - "step": 8090 - }, - { - "epoch": 0.57, - "learning_rate": 3.3506602165603365e-05, - "loss": 0.8618, - "step": 8100 - }, - { - "epoch": 0.57, - "learning_rate": 3.348971929770243e-05, - "loss": 0.8677, - "step": 8110 - }, - { - "epoch": 0.57, - "learning_rate": 3.347281877599813e-05, - "loss": 0.8551, - "step": 8120 - }, - { - "epoch": 0.57, - "learning_rate": 3.345590062260792e-05, - "loss": 0.8561, - "step": 8130 - }, - { - "epoch": 0.57, - "learning_rate": 3.3438964859672364e-05, - "loss": 0.8535, - "step": 8140 - }, - { - "epoch": 0.58, - "learning_rate": 3.342201150935504e-05, - "loss": 0.8597, - "step": 8150 - }, - { - "epoch": 0.58, - "learning_rate": 3.340504059384256e-05, - "loss": 0.8583, - "step": 8160 - }, - { - "epoch": 0.58, - "learning_rate": 3.338805213534453e-05, - "loss": 0.8661, - "step": 8170 - }, - { - "epoch": 0.58, - "learning_rate": 3.337104615609349e-05, - "loss": 0.863, - "step": 8180 - }, - { - "epoch": 0.58, - "learning_rate": 3.335402267834492e-05, - "loss": 0.8563, - "step": 8190 - }, - { - "epoch": 0.58, - "learning_rate": 3.3336981724377214e-05, - "loss": 0.8523, - "step": 8200 - }, - { - "epoch": 0.58, - "learning_rate": 3.331992331649163e-05, - "loss": 0.8642, - "step": 8210 - }, - { - "epoch": 0.58, - "learning_rate": 3.3302847477012246e-05, - "loss": 0.858, - "step": 8220 - }, - { - "epoch": 0.58, - "learning_rate": 3.328575422828598e-05, - "loss": 0.8653, - "step": 8230 - }, - { - "epoch": 0.58, - "learning_rate": 3.326864359268251e-05, - "loss": 0.8537, - "step": 8240 - }, - { - "epoch": 0.58, - "learning_rate": 3.325151559259431e-05, - "loss": 0.8556, - "step": 8250 - }, - { - "epoch": 0.58, - "learning_rate": 3.323437025043653e-05, - "loss": 0.8584, - "step": 8260 - }, - { - "epoch": 0.58, - "learning_rate": 3.321720758864703e-05, - "loss": 0.8562, - "step": 8270 - }, - { - "epoch": 0.58, - "learning_rate": 3.320002762968635e-05, - "loss": 0.8572, - "step": 8280 - }, - { - "epoch": 0.59, - "learning_rate": 3.318283039603765e-05, - "loss": 0.8592, - "step": 8290 - }, - { - "epoch": 0.59, - "learning_rate": 3.316561591020671e-05, - "loss": 0.8488, - "step": 8300 - }, - { - "epoch": 0.59, - "learning_rate": 3.314838419472189e-05, - "loss": 0.8493, - "step": 8310 - }, - { - "epoch": 0.59, - "learning_rate": 3.3131135272134076e-05, - "loss": 0.8506, - "step": 8320 - }, - { - "epoch": 0.59, - "learning_rate": 3.31138691650167e-05, - "loss": 0.8557, - "step": 8330 - }, - { - "epoch": 0.59, - "learning_rate": 3.309658589596565e-05, - "loss": 0.8631, - "step": 8340 - }, - { - "epoch": 0.59, - "learning_rate": 3.3079285487599326e-05, - "loss": 0.8636, - "step": 8350 - }, - { - "epoch": 0.59, - "learning_rate": 3.306196796255849e-05, - "loss": 0.8544, - "step": 8360 - }, - { - "epoch": 0.59, - "learning_rate": 3.304463334350637e-05, - "loss": 0.8587, - "step": 8370 - }, - { - "epoch": 0.59, - "learning_rate": 3.302728165312852e-05, - "loss": 0.8607, - "step": 8380 - }, - { - "epoch": 0.59, - "learning_rate": 3.300991291413285e-05, - "loss": 0.8569, - "step": 8390 - }, - { - "epoch": 0.59, - "learning_rate": 3.299252714924958e-05, - "loss": 0.8508, - "step": 8400 - }, - { - "epoch": 0.59, - "learning_rate": 3.297512438123122e-05, - "loss": 0.8474, - "step": 8410 - }, - { - "epoch": 0.59, - "learning_rate": 3.295770463285252e-05, - "loss": 0.8497, - "step": 8420 - }, - { - "epoch": 0.6, - "learning_rate": 3.2940267926910455e-05, - "loss": 0.859, - "step": 8430 - }, - { - "epoch": 0.6, - "learning_rate": 3.29228142862242e-05, - "loss": 0.8549, - "step": 8440 - }, - { - "epoch": 0.6, - "learning_rate": 3.290534373363507e-05, - "loss": 0.8596, - "step": 8450 - }, - { - "epoch": 0.6, - "learning_rate": 3.288785629200655e-05, - "loss": 0.8524, - "step": 8460 - }, - { - "epoch": 0.6, - "learning_rate": 3.28703519842242e-05, - "loss": 0.8589, - "step": 8470 - }, - { - "epoch": 0.6, - "learning_rate": 3.285283083319565e-05, - "loss": 0.8562, - "step": 8480 - }, - { - "epoch": 0.6, - "learning_rate": 3.283529286185058e-05, - "loss": 0.8591, - "step": 8490 - }, - { - "epoch": 0.6, - "learning_rate": 3.2817738093140685e-05, - "loss": 0.8594, - "step": 8500 - }, - { - "epoch": 0.6, - "learning_rate": 3.280016655003964e-05, - "loss": 0.8512, - "step": 8510 - }, - { - "epoch": 0.6, - "learning_rate": 3.2782578255543084e-05, - "loss": 0.8542, - "step": 8520 - }, - { - "epoch": 0.6, - "learning_rate": 3.2764973232668555e-05, - "loss": 0.8552, - "step": 8530 - }, - { - "epoch": 0.6, - "learning_rate": 3.27473515044555e-05, - "loss": 0.8498, - "step": 8540 - }, - { - "epoch": 0.6, - "learning_rate": 3.272971309396522e-05, - "loss": 0.8549, - "step": 8550 - }, - { - "epoch": 0.6, - "learning_rate": 3.271205802428086e-05, - "loss": 0.8541, - "step": 8560 - }, - { - "epoch": 0.61, - "learning_rate": 3.269438631850735e-05, - "loss": 0.8507, - "step": 8570 - }, - { - "epoch": 0.61, - "learning_rate": 3.26766979997714e-05, - "loss": 0.86, - "step": 8580 - }, - { - "epoch": 0.61, - "learning_rate": 3.265899309122147e-05, - "loss": 0.8541, - "step": 8590 - }, - { - "epoch": 0.61, - "learning_rate": 3.2641271616027705e-05, - "loss": 0.8491, - "step": 8600 - }, - { - "epoch": 0.61, - "learning_rate": 3.262353359738196e-05, - "loss": 0.8591, - "step": 8610 - }, - { - "epoch": 0.61, - "learning_rate": 3.260577905849772e-05, - "loss": 0.8558, - "step": 8620 - }, - { - "epoch": 0.61, - "learning_rate": 3.258800802261011e-05, - "loss": 0.8472, - "step": 8630 - }, - { - "epoch": 0.61, - "learning_rate": 3.257022051297581e-05, - "loss": 0.8519, - "step": 8640 - }, - { - "epoch": 0.61, - "learning_rate": 3.2552416552873096e-05, - "loss": 0.8617, - "step": 8650 - }, - { - "epoch": 0.61, - "learning_rate": 3.253459616560175e-05, - "loss": 0.8605, - "step": 8660 - }, - { - "epoch": 0.61, - "learning_rate": 3.2516759374483066e-05, - "loss": 0.8592, - "step": 8670 - }, - { - "epoch": 0.61, - "learning_rate": 3.249890620285979e-05, - "loss": 0.861, - "step": 8680 - }, - { - "epoch": 0.61, - "learning_rate": 3.2481036674096116e-05, - "loss": 0.8666, - "step": 8690 - }, - { - "epoch": 0.61, - "learning_rate": 3.246315081157765e-05, - "loss": 0.8559, - "step": 8700 - }, - { - "epoch": 0.62, - "learning_rate": 3.244524863871135e-05, - "loss": 0.8587, - "step": 8710 - }, - { - "epoch": 0.62, - "learning_rate": 3.242733017892555e-05, - "loss": 0.8553, - "step": 8720 - }, - { - "epoch": 0.62, - "learning_rate": 3.2409395455669866e-05, - "loss": 0.8505, - "step": 8730 - }, - { - "epoch": 0.62, - "learning_rate": 3.239144449241523e-05, - "loss": 0.8571, - "step": 8740 - }, - { - "epoch": 0.62, - "learning_rate": 3.23734773126538e-05, - "loss": 0.8586, - "step": 8750 - }, - { - "epoch": 0.62, - "learning_rate": 3.2355493939898976e-05, - "loss": 0.87, - "step": 8760 - }, - { - "epoch": 0.62, - "learning_rate": 3.233749439768534e-05, - "loss": 0.8534, - "step": 8770 - }, - { - "epoch": 0.62, - "learning_rate": 3.231947870956864e-05, - "loss": 0.8524, - "step": 8780 - }, - { - "epoch": 0.62, - "learning_rate": 3.2301446899125746e-05, - "loss": 0.8516, - "step": 8790 - }, - { - "epoch": 0.62, - "learning_rate": 3.2283398989954623e-05, - "loss": 0.8598, - "step": 8800 - }, - { - "epoch": 0.62, - "learning_rate": 3.226533500567433e-05, - "loss": 0.8614, - "step": 8810 - }, - { - "epoch": 0.62, - "learning_rate": 3.224725496992493e-05, - "loss": 0.8446, - "step": 8820 - }, - { - "epoch": 0.62, - "learning_rate": 3.2229158906367515e-05, - "loss": 0.8587, - "step": 8830 - }, - { - "epoch": 0.62, - "learning_rate": 3.221104683868415e-05, - "loss": 0.8602, - "step": 8840 - }, - { - "epoch": 0.63, - "learning_rate": 3.219291879057783e-05, - "loss": 0.8543, - "step": 8850 - }, - { - "epoch": 0.63, - "learning_rate": 3.2174774785772487e-05, - "loss": 0.8479, - "step": 8860 - }, - { - "epoch": 0.63, - "learning_rate": 3.2156614848012905e-05, - "loss": 0.8525, - "step": 8870 - }, - { - "epoch": 0.63, - "learning_rate": 3.2138439001064745e-05, - "loss": 0.852, - "step": 8880 - }, - { - "epoch": 0.63, - "learning_rate": 3.2120247268714473e-05, - "loss": 0.8557, - "step": 8890 - }, - { - "epoch": 0.63, - "learning_rate": 3.210203967476936e-05, - "loss": 0.8597, - "step": 8900 - }, - { - "epoch": 0.63, - "learning_rate": 3.2083816243057415e-05, - "loss": 0.8491, - "step": 8910 - }, - { - "epoch": 0.63, - "learning_rate": 3.206557699742739e-05, - "loss": 0.855, - "step": 8920 - }, - { - "epoch": 0.63, - "learning_rate": 3.204732196174872e-05, - "loss": 0.8586, - "step": 8930 - }, - { - "epoch": 0.63, - "learning_rate": 3.2029051159911513e-05, - "loss": 0.8527, - "step": 8940 - }, - { - "epoch": 0.63, - "learning_rate": 3.201076461582651e-05, - "loss": 0.8521, - "step": 8950 - }, - { - "epoch": 0.63, - "learning_rate": 3.199246235342504e-05, - "loss": 0.8504, - "step": 8960 - }, - { - "epoch": 0.63, - "learning_rate": 3.197414439665902e-05, - "loss": 0.8455, - "step": 8970 - }, - { - "epoch": 0.63, - "learning_rate": 3.1955810769500915e-05, - "loss": 0.8501, - "step": 8980 - }, - { - "epoch": 0.64, - "learning_rate": 3.193746149594365e-05, - "loss": 0.8529, - "step": 8990 - }, - { - "epoch": 0.64, - "learning_rate": 3.1919096600000674e-05, - "loss": 0.8473, - "step": 9000 - }, - { - "epoch": 0.64, - "learning_rate": 3.190071610570587e-05, - "loss": 0.8482, - "step": 9010 - }, - { - "epoch": 0.64, - "learning_rate": 3.188232003711353e-05, - "loss": 0.8533, - "step": 9020 - }, - { - "epoch": 0.64, - "learning_rate": 3.1863908418298304e-05, - "loss": 0.8582, - "step": 9030 - }, - { - "epoch": 0.64, - "learning_rate": 3.184548127335524e-05, - "loss": 0.8495, - "step": 9040 - }, - { - "epoch": 0.64, - "learning_rate": 3.1827038626399665e-05, - "loss": 0.8522, - "step": 9050 - }, - { - "epoch": 0.64, - "learning_rate": 3.180858050156722e-05, - "loss": 0.8487, - "step": 9060 - }, - { - "epoch": 0.64, - "learning_rate": 3.1790106923013784e-05, - "loss": 0.8523, - "step": 9070 - }, - { - "epoch": 0.64, - "learning_rate": 3.177161791491546e-05, - "loss": 0.8602, - "step": 9080 - }, - { - "epoch": 0.64, - "learning_rate": 3.175311350146856e-05, - "loss": 0.8475, - "step": 9090 - }, - { - "epoch": 0.64, - "learning_rate": 3.173459370688954e-05, - "loss": 0.8514, - "step": 9100 - }, - { - "epoch": 0.64, - "learning_rate": 3.1716058555414985e-05, - "loss": 0.8554, - "step": 9110 - }, - { - "epoch": 0.64, - "learning_rate": 3.16975080713016e-05, - "loss": 0.8494, - "step": 9120 - }, - { - "epoch": 0.64, - "learning_rate": 3.167894227882613e-05, - "loss": 0.8458, - "step": 9130 - }, - { - "epoch": 0.65, - "learning_rate": 3.166036120228536e-05, - "loss": 0.8438, - "step": 9140 - }, - { - "epoch": 0.65, - "learning_rate": 3.1641764865996084e-05, - "loss": 0.8474, - "step": 9150 - }, - { - "epoch": 0.65, - "learning_rate": 3.162315329429506e-05, - "loss": 0.8478, - "step": 9160 - }, - { - "epoch": 0.65, - "learning_rate": 3.1604526511538996e-05, - "loss": 0.8474, - "step": 9170 - }, - { - "epoch": 0.65, - "learning_rate": 3.1585884542104476e-05, - "loss": 0.8415, - "step": 9180 - }, - { - "epoch": 0.65, - "learning_rate": 3.1567227410388004e-05, - "loss": 0.8496, - "step": 9190 - }, - { - "epoch": 0.65, - "learning_rate": 3.154855514080589e-05, - "loss": 0.8499, - "step": 9200 - }, - { - "epoch": 0.65, - "learning_rate": 3.152986775779426e-05, - "loss": 0.8456, - "step": 9210 - }, - { - "epoch": 0.65, - "learning_rate": 3.151116528580904e-05, - "loss": 0.8463, - "step": 9220 - }, - { - "epoch": 0.65, - "learning_rate": 3.149244774932588e-05, - "loss": 0.8463, - "step": 9230 - }, - { - "epoch": 0.65, - "learning_rate": 3.1473715172840154e-05, - "loss": 0.8533, - "step": 9240 - }, - { - "epoch": 0.65, - "learning_rate": 3.145496758086693e-05, - "loss": 0.8517, - "step": 9250 - }, - { - "epoch": 0.65, - "learning_rate": 3.143620499794089e-05, - "loss": 0.8505, - "step": 9260 - }, - { - "epoch": 0.65, - "learning_rate": 3.141742744861638e-05, - "loss": 0.8543, - "step": 9270 - }, - { - "epoch": 0.66, - "learning_rate": 3.139863495746731e-05, - "loss": 0.8473, - "step": 9280 - }, - { - "epoch": 0.66, - "learning_rate": 3.137982754908713e-05, - "loss": 0.8493, - "step": 9290 - }, - { - "epoch": 0.66, - "learning_rate": 3.136100524808884e-05, - "loss": 0.853, - "step": 9300 - }, - { - "epoch": 0.66, - "learning_rate": 3.1342168079104914e-05, - "loss": 0.8481, - "step": 9310 - }, - { - "epoch": 0.66, - "learning_rate": 3.132331606678728e-05, - "loss": 0.8475, - "step": 9320 - }, - { - "epoch": 0.66, - "learning_rate": 3.130444923580732e-05, - "loss": 0.8443, - "step": 9330 - }, - { - "epoch": 0.66, - "learning_rate": 3.128556761085576e-05, - "loss": 0.8536, - "step": 9340 - }, - { - "epoch": 0.66, - "learning_rate": 3.126667121664272e-05, - "loss": 0.862, - "step": 9350 - }, - { - "epoch": 0.66, - "learning_rate": 3.1247760077897656e-05, - "loss": 0.846, - "step": 9360 - }, - { - "epoch": 0.66, - "learning_rate": 3.122883421936929e-05, - "loss": 0.8382, - "step": 9370 - }, - { - "epoch": 0.66, - "learning_rate": 3.1209893665825625e-05, - "loss": 0.8503, - "step": 9380 - }, - { - "epoch": 0.66, - "learning_rate": 3.119093844205391e-05, - "loss": 0.85, - "step": 9390 - }, - { - "epoch": 0.66, - "learning_rate": 3.117196857286055e-05, - "loss": 0.845, - "step": 9400 - }, - { - "epoch": 0.66, - "learning_rate": 3.1152984083071165e-05, - "loss": 0.8503, - "step": 9410 - }, - { - "epoch": 0.67, - "learning_rate": 3.1133984997530475e-05, - "loss": 0.8424, - "step": 9420 - }, - { - "epoch": 0.67, - "learning_rate": 3.1114971341102306e-05, - "loss": 0.855, - "step": 9430 - }, - { - "epoch": 0.67, - "learning_rate": 3.109594313866958e-05, - "loss": 0.853, - "step": 9440 - }, - { - "epoch": 0.67, - "learning_rate": 3.107690041513422e-05, - "loss": 0.8463, - "step": 9450 - }, - { - "epoch": 0.67, - "learning_rate": 3.1057843195417166e-05, - "loss": 0.8502, - "step": 9460 - }, - { - "epoch": 0.67, - "learning_rate": 3.103877150445834e-05, - "loss": 0.8467, - "step": 9470 - }, - { - "epoch": 0.67, - "learning_rate": 3.1019685367216575e-05, - "loss": 0.8472, - "step": 9480 - }, - { - "epoch": 0.67, - "learning_rate": 3.100058480866964e-05, - "loss": 0.8547, - "step": 9490 - }, - { - "epoch": 0.67, - "learning_rate": 3.098146985381416e-05, - "loss": 0.8431, - "step": 9500 - }, - { - "epoch": 0.67, - "learning_rate": 3.096234052766561e-05, - "loss": 0.8443, - "step": 9510 - }, - { - "epoch": 0.67, - "learning_rate": 3.0943196855258255e-05, - "loss": 0.8513, - "step": 9520 - }, - { - "epoch": 0.67, - "learning_rate": 3.0924038861645155e-05, - "loss": 0.8471, - "step": 9530 - }, - { - "epoch": 0.67, - "learning_rate": 3.0904866571898096e-05, - "loss": 0.8477, - "step": 9540 - }, - { - "epoch": 0.67, - "learning_rate": 3.088568001110759e-05, - "loss": 0.8484, - "step": 9550 - }, - { - "epoch": 0.68, - "learning_rate": 3.0866479204382805e-05, - "loss": 0.8471, - "step": 9560 - }, - { - "epoch": 0.68, - "learning_rate": 3.0847264176851575e-05, - "loss": 0.8402, - "step": 9570 - }, - { - "epoch": 0.68, - "learning_rate": 3.0828034953660324e-05, - "loss": 0.8387, - "step": 9580 - }, - { - "epoch": 0.68, - "learning_rate": 3.080879155997406e-05, - "loss": 0.8419, - "step": 9590 - }, - { - "epoch": 0.68, - "learning_rate": 3.078953402097635e-05, - "loss": 0.8425, - "step": 9600 - }, - { - "epoch": 0.68, - "learning_rate": 3.077026236186925e-05, - "loss": 0.8519, - "step": 9610 - }, - { - "epoch": 0.68, - "learning_rate": 3.07509766078733e-05, - "loss": 0.8449, - "step": 9620 - }, - { - "epoch": 0.68, - "learning_rate": 3.073167678422752e-05, - "loss": 0.8422, - "step": 9630 - }, - { - "epoch": 0.68, - "learning_rate": 3.0712362916189274e-05, - "loss": 0.847, - "step": 9640 - }, - { - "epoch": 0.68, - "learning_rate": 3.069303502903437e-05, - "loss": 0.8407, - "step": 9650 - }, - { - "epoch": 0.68, - "learning_rate": 3.0673693148056946e-05, - "loss": 0.8537, - "step": 9660 - }, - { - "epoch": 0.68, - "learning_rate": 3.065433729856943e-05, - "loss": 0.8499, - "step": 9670 - }, - { - "epoch": 0.68, - "learning_rate": 3.063496750590255e-05, - "loss": 0.847, - "step": 9680 - }, - { - "epoch": 0.68, - "learning_rate": 3.061558379540527e-05, - "loss": 0.8511, - "step": 9690 - }, - { - "epoch": 0.69, - "learning_rate": 3.059618619244479e-05, - "loss": 0.8594, - "step": 9700 - }, - { - "epoch": 0.69, - "learning_rate": 3.0576774722406466e-05, - "loss": 0.8452, - "step": 9710 - }, - { - "epoch": 0.69, - "learning_rate": 3.055734941069383e-05, - "loss": 0.8435, - "step": 9720 - }, - { - "epoch": 0.69, - "learning_rate": 3.053791028272849e-05, - "loss": 0.8431, - "step": 9730 - }, - { - "epoch": 0.69, - "learning_rate": 3.0518457363950154e-05, - "loss": 0.8477, - "step": 9740 - }, - { - "epoch": 0.69, - "learning_rate": 3.0498990679816602e-05, - "loss": 0.8436, - "step": 9750 - }, - { - "epoch": 0.69, - "learning_rate": 3.0479510255803587e-05, - "loss": 0.8414, - "step": 9760 - }, - { - "epoch": 0.69, - "learning_rate": 3.046001611740487e-05, - "loss": 0.8352, - "step": 9770 - }, - { - "epoch": 0.69, - "learning_rate": 3.044050829013215e-05, - "loss": 0.8465, - "step": 9780 - }, - { - "epoch": 0.69, - "learning_rate": 3.0420986799515037e-05, - "loss": 0.8419, - "step": 9790 - }, - { - "epoch": 0.69, - "learning_rate": 3.0401451671101036e-05, - "loss": 0.8477, - "step": 9800 - }, - { - "epoch": 0.69, - "learning_rate": 3.038190293045548e-05, - "loss": 0.8451, - "step": 9810 - }, - { - "epoch": 0.69, - "learning_rate": 3.0362340603161552e-05, - "loss": 0.8537, - "step": 9820 - }, - { - "epoch": 0.69, - "learning_rate": 3.0342764714820166e-05, - "loss": 0.8401, - "step": 9830 - }, - { - "epoch": 0.7, - "learning_rate": 3.0323175291050014e-05, - "loss": 0.8411, - "step": 9840 - }, - { - "epoch": 0.7, - "learning_rate": 3.03035723574875e-05, - "loss": 0.8516, - "step": 9850 - }, - { - "epoch": 0.7, - "learning_rate": 3.02839559397867e-05, - "loss": 0.8453, - "step": 9860 - }, - { - "epoch": 0.7, - "learning_rate": 3.0264326063619346e-05, - "loss": 0.8418, - "step": 9870 - }, - { - "epoch": 0.7, - "learning_rate": 3.024468275467477e-05, - "loss": 0.8473, - "step": 9880 - }, - { - "epoch": 0.7, - "learning_rate": 3.0225026038659892e-05, - "loss": 0.8435, - "step": 9890 - }, - { - "epoch": 0.7, - "learning_rate": 3.0205355941299187e-05, - "loss": 0.8538, - "step": 9900 - }, - { - "epoch": 0.7, - "learning_rate": 3.0185672488334607e-05, - "loss": 0.8559, - "step": 9910 - }, - { - "epoch": 0.7, - "learning_rate": 3.0165975705525628e-05, - "loss": 0.8616, - "step": 9920 - }, - { - "epoch": 0.7, - "learning_rate": 3.014626561864914e-05, - "loss": 0.8619, - "step": 9930 - }, - { - "epoch": 0.7, - "learning_rate": 3.0126542253499463e-05, - "loss": 0.8567, - "step": 9940 - }, - { - "epoch": 0.7, - "learning_rate": 3.010680563588827e-05, - "loss": 0.8562, - "step": 9950 - }, - { - "epoch": 0.7, - "learning_rate": 3.0087055791644594e-05, - "loss": 0.8564, - "step": 9960 - }, - { - "epoch": 0.7, - "learning_rate": 3.0067292746614776e-05, - "loss": 0.8443, - "step": 9970 - }, - { - "epoch": 0.7, - "learning_rate": 3.0047516526662437e-05, - "loss": 0.8505, - "step": 9980 - }, - { - "epoch": 0.71, - "learning_rate": 3.0027727157668428e-05, - "loss": 0.8452, - "step": 9990 - }, - { - "epoch": 0.71, - "learning_rate": 3.000792466553082e-05, - "loss": 0.8473, - "step": 10000 - }, - { - "epoch": 0.71, - "learning_rate": 2.9988109076164843e-05, - "loss": 0.8509, - "step": 10010 - }, - { - "epoch": 0.71, - "learning_rate": 2.9968280415502886e-05, - "loss": 0.8457, - "step": 10020 - }, - { - "epoch": 0.71, - "learning_rate": 2.9948438709494424e-05, - "loss": 0.8476, - "step": 10030 - }, - { - "epoch": 0.71, - "learning_rate": 2.9928583984106026e-05, - "loss": 0.85, - "step": 10040 - }, - { - "epoch": 0.71, - "learning_rate": 2.990871626532128e-05, - "loss": 0.8502, - "step": 10050 - }, - { - "epoch": 0.71, - "learning_rate": 2.988883557914079e-05, - "loss": 0.8528, - "step": 10060 - }, - { - "epoch": 0.71, - "learning_rate": 2.986894195158213e-05, - "loss": 0.8436, - "step": 10070 - }, - { - "epoch": 0.71, - "learning_rate": 2.98490354086798e-05, - "loss": 0.838, - "step": 10080 - }, - { - "epoch": 0.71, - "learning_rate": 2.9829115976485213e-05, - "loss": 0.85, - "step": 10090 - }, - { - "epoch": 0.71, - "learning_rate": 2.9809183681066646e-05, - "loss": 0.848, - "step": 10100 - }, - { - "epoch": 0.71, - "learning_rate": 2.9789238548509202e-05, - "loss": 0.8383, - "step": 10110 - }, - { - "epoch": 0.71, - "learning_rate": 2.9769280604914805e-05, - "loss": 0.8451, - "step": 10120 - }, - { - "epoch": 0.72, - "learning_rate": 2.9749309876402122e-05, - "loss": 0.8435, - "step": 10130 - }, - { - "epoch": 0.72, - "learning_rate": 2.9729326389106564e-05, - "loss": 0.8475, - "step": 10140 - }, - { - "epoch": 0.72, - "learning_rate": 2.9709330169180238e-05, - "loss": 0.8423, - "step": 10150 - }, - { - "epoch": 0.72, - "learning_rate": 2.9689321242791906e-05, - "loss": 0.846, - "step": 10160 - }, - { - "epoch": 0.72, - "learning_rate": 2.9669299636126973e-05, - "loss": 0.845, - "step": 10170 - }, - { - "epoch": 0.72, - "learning_rate": 2.964926537538742e-05, - "loss": 0.843, - "step": 10180 - }, - { - "epoch": 0.72, - "learning_rate": 2.9629218486791807e-05, - "loss": 0.847, - "step": 10190 - }, - { - "epoch": 0.72, - "learning_rate": 2.9609158996575207e-05, - "loss": 0.846, - "step": 10200 - }, - { - "epoch": 0.72, - "learning_rate": 2.9589086930989196e-05, - "loss": 0.8472, - "step": 10210 - }, - { - "epoch": 0.72, - "learning_rate": 2.956900231630179e-05, - "loss": 0.8405, - "step": 10220 - }, - { - "epoch": 0.72, - "learning_rate": 2.9548905178797442e-05, - "loss": 0.8372, - "step": 10230 - }, - { - "epoch": 0.72, - "learning_rate": 2.952879554477699e-05, - "loss": 0.844, - "step": 10240 - }, - { - "epoch": 0.72, - "learning_rate": 2.9508673440557625e-05, - "loss": 0.8434, - "step": 10250 - }, - { - "epoch": 0.72, - "learning_rate": 2.948853889247286e-05, - "loss": 0.8393, - "step": 10260 - }, - { - "epoch": 0.73, - "learning_rate": 2.9468391926872487e-05, - "loss": 0.8438, - "step": 10270 - }, - { - "epoch": 0.73, - "learning_rate": 2.9448232570122557e-05, - "loss": 0.8412, - "step": 10280 - }, - { - "epoch": 0.73, - "learning_rate": 2.942806084860533e-05, - "loss": 0.8415, - "step": 10290 - }, - { - "epoch": 0.73, - "learning_rate": 2.9407876788719253e-05, - "loss": 0.8446, - "step": 10300 - }, - { - "epoch": 0.73, - "learning_rate": 2.9387680416878915e-05, - "loss": 0.8449, - "step": 10310 - }, - { - "epoch": 0.73, - "learning_rate": 2.936747175951502e-05, - "loss": 0.8492, - "step": 10320 - }, - { - "epoch": 0.73, - "learning_rate": 2.934725084307435e-05, - "loss": 0.8442, - "step": 10330 - }, - { - "epoch": 0.73, - "learning_rate": 2.9327017694019737e-05, - "loss": 0.843, - "step": 10340 - }, - { - "epoch": 0.73, - "learning_rate": 2.930677233883e-05, - "loss": 0.848, - "step": 10350 - }, - { - "epoch": 0.73, - "learning_rate": 2.9286514803999963e-05, - "loss": 0.8436, - "step": 10360 - }, - { - "epoch": 0.73, - "learning_rate": 2.926624511604036e-05, - "loss": 0.8414, - "step": 10370 - }, - { - "epoch": 0.73, - "learning_rate": 2.924596330147785e-05, - "loss": 0.8418, - "step": 10380 - }, - { - "epoch": 0.73, - "learning_rate": 2.9225669386854955e-05, - "loss": 0.8421, - "step": 10390 - }, - { - "epoch": 0.73, - "learning_rate": 2.9205363398730032e-05, - "loss": 0.8387, - "step": 10400 - }, - { - "epoch": 0.74, - "learning_rate": 2.918504536367723e-05, - "loss": 0.8409, - "step": 10410 - }, - { - "epoch": 0.74, - "learning_rate": 2.9164715308286474e-05, - "loss": 0.8445, - "step": 10420 - }, - { - "epoch": 0.74, - "learning_rate": 2.914437325916343e-05, - "loss": 0.8413, - "step": 10430 - }, - { - "epoch": 0.74, - "learning_rate": 2.912401924292944e-05, - "loss": 0.8371, - "step": 10440 - }, - { - "epoch": 0.74, - "learning_rate": 2.9103653286221508e-05, - "loss": 0.8427, - "step": 10450 - }, - { - "epoch": 0.74, - "learning_rate": 2.908327541569228e-05, - "loss": 0.838, - "step": 10460 - }, - { - "epoch": 0.74, - "learning_rate": 2.9062885658009978e-05, - "loss": 0.8401, - "step": 10470 - }, - { - "epoch": 0.74, - "learning_rate": 2.9042484039858397e-05, - "loss": 0.8333, - "step": 10480 - }, - { - "epoch": 0.74, - "learning_rate": 2.9022070587936837e-05, - "loss": 0.8424, - "step": 10490 - }, - { - "epoch": 0.74, - "learning_rate": 2.9001645328960092e-05, - "loss": 0.8416, - "step": 10500 - }, - { - "epoch": 0.74, - "learning_rate": 2.898120828965841e-05, - "loss": 0.8489, - "step": 10510 - }, - { - "epoch": 0.74, - "learning_rate": 2.8960759496777444e-05, - "loss": 0.8442, - "step": 10520 - }, - { - "epoch": 0.74, - "learning_rate": 2.8940298977078254e-05, - "loss": 0.8352, - "step": 10530 - }, - { - "epoch": 0.74, - "learning_rate": 2.8919826757337218e-05, - "loss": 0.8413, - "step": 10540 - }, - { - "epoch": 0.75, - "learning_rate": 2.889934286434604e-05, - "loss": 0.8369, - "step": 10550 - }, - { - "epoch": 0.75, - "learning_rate": 2.88788473249117e-05, - "loss": 0.8431, - "step": 10560 - }, - { - "epoch": 0.75, - "learning_rate": 2.885834016585642e-05, - "loss": 0.8444, - "step": 10570 - }, - { - "epoch": 0.75, - "learning_rate": 2.883782141401763e-05, - "loss": 0.8423, - "step": 10580 - }, - { - "epoch": 0.75, - "learning_rate": 2.8817291096247918e-05, - "loss": 0.837, - "step": 10590 - }, - { - "epoch": 0.75, - "learning_rate": 2.8796749239415027e-05, - "loss": 0.8352, - "step": 10600 - }, - { - "epoch": 0.75, - "learning_rate": 2.8776195870401787e-05, - "loss": 0.8348, - "step": 10610 - }, - { - "epoch": 0.75, - "learning_rate": 2.8755631016106095e-05, - "loss": 0.8442, - "step": 10620 - }, - { - "epoch": 0.75, - "learning_rate": 2.8735054703440903e-05, - "loss": 0.8427, - "step": 10630 - }, - { - "epoch": 0.75, - "learning_rate": 2.871446695933411e-05, - "loss": 0.8401, - "step": 10640 - }, - { - "epoch": 0.75, - "learning_rate": 2.869386781072863e-05, - "loss": 0.8367, - "step": 10650 - }, - { - "epoch": 0.75, - "learning_rate": 2.867325728458225e-05, - "loss": 0.8501, - "step": 10660 - }, - { - "epoch": 0.75, - "learning_rate": 2.86526354078677e-05, - "loss": 0.8445, - "step": 10670 - }, - { - "epoch": 0.75, - "learning_rate": 2.863200220757251e-05, - "loss": 0.8357, - "step": 10680 - }, - { - "epoch": 0.76, - "learning_rate": 2.8611357710699065e-05, - "loss": 0.8394, - "step": 10690 - }, - { - "epoch": 0.76, - "learning_rate": 2.859070194426452e-05, - "loss": 0.8384, - "step": 10700 - }, - { - "epoch": 0.76, - "learning_rate": 2.85700349353008e-05, - "loss": 0.83, - "step": 10710 - }, - { - "epoch": 0.76, - "learning_rate": 2.85493567108545e-05, - "loss": 0.8343, - "step": 10720 - }, - { - "epoch": 0.76, - "learning_rate": 2.852866729798693e-05, - "loss": 0.8314, - "step": 10730 - }, - { - "epoch": 0.76, - "learning_rate": 2.8507966723774033e-05, - "loss": 0.8405, - "step": 10740 - }, - { - "epoch": 0.76, - "learning_rate": 2.848725501530635e-05, - "loss": 0.843, - "step": 10750 - }, - { - "epoch": 0.76, - "learning_rate": 2.8466532199689e-05, - "loss": 0.8309, - "step": 10760 - }, - { - "epoch": 0.76, - "learning_rate": 2.8445798304041635e-05, - "loss": 0.8388, - "step": 10770 - }, - { - "epoch": 0.76, - "learning_rate": 2.8425053355498408e-05, - "loss": 0.8385, - "step": 10780 - }, - { - "epoch": 0.76, - "learning_rate": 2.8404297381207944e-05, - "loss": 0.8487, - "step": 10790 - }, - { - "epoch": 0.76, - "learning_rate": 2.8383530408333285e-05, - "loss": 0.8389, - "step": 10800 - }, - { - "epoch": 0.76, - "learning_rate": 2.8362752464051873e-05, - "loss": 0.836, - "step": 10810 - }, - { - "epoch": 0.76, - "learning_rate": 2.8341963575555513e-05, - "loss": 0.8313, - "step": 10820 - }, - { - "epoch": 0.76, - "learning_rate": 2.8321163770050312e-05, - "loss": 0.8407, - "step": 10830 - }, - { - "epoch": 0.77, - "learning_rate": 2.8300353074756698e-05, - "loss": 0.8298, - "step": 10840 - }, - { - "epoch": 0.77, - "learning_rate": 2.8279531516909314e-05, - "loss": 0.8368, - "step": 10850 - }, - { - "epoch": 0.77, - "learning_rate": 2.8258699123757048e-05, - "loss": 0.8335, - "step": 10860 - }, - { - "epoch": 0.77, - "learning_rate": 2.8237855922562956e-05, - "loss": 0.8334, - "step": 10870 - }, - { - "epoch": 0.77, - "learning_rate": 2.8217001940604234e-05, - "loss": 0.8394, - "step": 10880 - }, - { - "epoch": 0.77, - "learning_rate": 2.819613720517219e-05, - "loss": 0.836, - "step": 10890 - }, - { - "epoch": 0.77, - "learning_rate": 2.8175261743572207e-05, - "loss": 0.8328, - "step": 10900 - }, - { - "epoch": 0.77, - "learning_rate": 2.8154375583123706e-05, - "loss": 0.8367, - "step": 10910 - }, - { - "epoch": 0.77, - "learning_rate": 2.8133478751160104e-05, - "loss": 0.8341, - "step": 10920 - }, - { - "epoch": 0.77, - "learning_rate": 2.811257127502879e-05, - "loss": 0.8346, - "step": 10930 - }, - { - "epoch": 0.77, - "learning_rate": 2.8091653182091083e-05, - "loss": 0.8342, - "step": 10940 - }, - { - "epoch": 0.77, - "learning_rate": 2.8070724499722186e-05, - "loss": 0.8395, - "step": 10950 - }, - { - "epoch": 0.77, - "learning_rate": 2.804978525531117e-05, - "loss": 0.8364, - "step": 10960 - }, - { - "epoch": 0.77, - "learning_rate": 2.8028835476260928e-05, - "loss": 0.8338, - "step": 10970 - }, - { - "epoch": 0.78, - "learning_rate": 2.8007875189988136e-05, - "loss": 0.8367, - "step": 10980 - }, - { - "epoch": 0.78, - "learning_rate": 2.7986904423923218e-05, - "loss": 0.8436, - "step": 10990 - }, - { - "epoch": 0.78, - "learning_rate": 2.7965923205510324e-05, - "loss": 0.8386, - "step": 11000 - }, - { - "epoch": 0.78, - "learning_rate": 2.794493156220727e-05, - "loss": 0.8293, - "step": 11010 - }, - { - "epoch": 0.78, - "learning_rate": 2.7923929521485525e-05, - "loss": 0.8347, - "step": 11020 - }, - { - "epoch": 0.78, - "learning_rate": 2.790291711083015e-05, - "loss": 0.8416, - "step": 11030 - }, - { - "epoch": 0.78, - "learning_rate": 2.7881894357739803e-05, - "loss": 0.8335, - "step": 11040 - }, - { - "epoch": 0.78, - "learning_rate": 2.7860861289726647e-05, - "loss": 0.8368, - "step": 11050 - }, - { - "epoch": 0.78, - "learning_rate": 2.783981793431637e-05, - "loss": 0.8385, - "step": 11060 - }, - { - "epoch": 0.78, - "learning_rate": 2.7818764319048102e-05, - "loss": 0.8316, - "step": 11070 - }, - { - "epoch": 0.78, - "learning_rate": 2.7797700471474415e-05, - "loss": 0.839, - "step": 11080 - }, - { - "epoch": 0.78, - "learning_rate": 2.7776626419161263e-05, - "loss": 0.8286, - "step": 11090 - }, - { - "epoch": 0.78, - "learning_rate": 2.7755542189687957e-05, - "loss": 0.8357, - "step": 11100 - }, - { - "epoch": 0.78, - "learning_rate": 2.773444781064713e-05, - "loss": 0.8369, - "step": 11110 - }, - { - "epoch": 0.79, - "learning_rate": 2.7713343309644693e-05, - "loss": 0.8322, - "step": 11120 - }, - { - "epoch": 0.79, - "learning_rate": 2.7692228714299814e-05, - "loss": 0.8313, - "step": 11130 - }, - { - "epoch": 0.79, - "learning_rate": 2.767110405224485e-05, - "loss": 0.8384, - "step": 11140 - }, - { - "epoch": 0.79, - "learning_rate": 2.764996935112535e-05, - "loss": 0.8403, - "step": 11150 - }, - { - "epoch": 0.79, - "learning_rate": 2.7628824638600004e-05, - "loss": 0.8576, - "step": 11160 - }, - { - "epoch": 0.79, - "learning_rate": 2.7607669942340588e-05, - "loss": 0.8731, - "step": 11170 - }, - { - "epoch": 0.79, - "learning_rate": 2.7586505290031946e-05, - "loss": 0.8715, - "step": 11180 - }, - { - "epoch": 0.79, - "learning_rate": 2.756533070937197e-05, - "loss": 0.8995, - "step": 11190 - }, - { - "epoch": 0.79, - "learning_rate": 2.7544146228071524e-05, - "loss": 0.8774, - "step": 11200 - }, - { - "epoch": 0.79, - "learning_rate": 2.752295187385444e-05, - "loss": 0.8696, - "step": 11210 - }, - { - "epoch": 0.79, - "learning_rate": 2.7501747674457462e-05, - "loss": 0.8675, - "step": 11220 - }, - { - "epoch": 0.79, - "learning_rate": 2.748053365763023e-05, - "loss": 0.8541, - "step": 11230 - }, - { - "epoch": 0.79, - "learning_rate": 2.745930985113522e-05, - "loss": 0.8546, - "step": 11240 - }, - { - "epoch": 0.79, - "learning_rate": 2.743807628274773e-05, - "loss": 0.8482, - "step": 11250 - }, - { - "epoch": 0.8, - "learning_rate": 2.7416832980255823e-05, - "loss": 0.8488, - "step": 11260 - }, - { - "epoch": 0.8, - "learning_rate": 2.7395579971460313e-05, - "loss": 0.8427, - "step": 11270 - }, - { - "epoch": 0.8, - "learning_rate": 2.7374317284174705e-05, - "loss": 0.8478, - "step": 11280 - }, - { - "epoch": 0.8, - "learning_rate": 2.7353044946225172e-05, - "loss": 0.8459, - "step": 11290 - }, - { - "epoch": 0.8, - "learning_rate": 2.733176298545053e-05, - "loss": 0.8439, - "step": 11300 - }, - { - "epoch": 0.8, - "learning_rate": 2.731047142970216e-05, - "loss": 0.8401, - "step": 11310 - }, - { - "epoch": 0.8, - "learning_rate": 2.728917030684404e-05, - "loss": 0.8487, - "step": 11320 - }, - { - "epoch": 0.8, - "learning_rate": 2.726785964475262e-05, - "loss": 0.848, - "step": 11330 - }, - { - "epoch": 0.8, - "learning_rate": 2.7246539471316885e-05, - "loss": 0.8441, - "step": 11340 - }, - { - "epoch": 0.8, - "learning_rate": 2.722520981443823e-05, - "loss": 0.8377, - "step": 11350 - }, - { - "epoch": 0.8, - "learning_rate": 2.720387070203047e-05, - "loss": 0.8417, - "step": 11360 - }, - { - "epoch": 0.8, - "learning_rate": 2.7182522162019808e-05, - "loss": 0.8355, - "step": 11370 - }, - { - "epoch": 0.8, - "learning_rate": 2.716116422234476e-05, - "loss": 0.8411, - "step": 11380 - }, - { - "epoch": 0.8, - "learning_rate": 2.7139796910956165e-05, - "loss": 0.8437, - "step": 11390 - }, - { - "epoch": 0.81, - "learning_rate": 2.7118420255817122e-05, - "loss": 0.8393, - "step": 11400 - }, - { - "epoch": 0.81, - "learning_rate": 2.7097034284902952e-05, - "loss": 0.8429, - "step": 11410 - }, - { - "epoch": 0.81, - "learning_rate": 2.7075639026201167e-05, - "loss": 0.84, - "step": 11420 - }, - { - "epoch": 0.81, - "learning_rate": 2.705423450771144e-05, - "loss": 0.8401, - "step": 11430 - }, - { - "epoch": 0.81, - "learning_rate": 2.703282075744556e-05, - "loss": 0.8366, - "step": 11440 - }, - { - "epoch": 0.81, - "learning_rate": 2.7011397803427395e-05, - "loss": 0.8404, - "step": 11450 - }, - { - "epoch": 0.81, - "learning_rate": 2.6989965673692857e-05, - "loss": 0.8454, - "step": 11460 - }, - { - "epoch": 0.81, - "learning_rate": 2.6968524396289874e-05, - "loss": 0.8451, - "step": 11470 - }, - { - "epoch": 0.81, - "learning_rate": 2.694707399927834e-05, - "loss": 0.8412, - "step": 11480 - }, - { - "epoch": 0.81, - "learning_rate": 2.692561451073008e-05, - "loss": 0.8341, - "step": 11490 - }, - { - "epoch": 0.81, - "learning_rate": 2.6904145958728826e-05, - "loss": 0.8362, - "step": 11500 - }, - { - "epoch": 0.81, - "learning_rate": 2.6882668371370156e-05, - "loss": 0.8362, - "step": 11510 - }, - { - "epoch": 0.81, - "learning_rate": 2.6861181776761496e-05, - "loss": 0.8395, - "step": 11520 - }, - { - "epoch": 0.81, - "learning_rate": 2.6839686203022032e-05, - "loss": 0.8384, - "step": 11530 - }, - { - "epoch": 0.82, - "learning_rate": 2.6818181678282725e-05, - "loss": 0.8412, - "step": 11540 - }, - { - "epoch": 0.82, - "learning_rate": 2.679666823068624e-05, - "loss": 0.8325, - "step": 11550 - }, - { - "epoch": 0.82, - "learning_rate": 2.677514588838692e-05, - "loss": 0.8353, - "step": 11560 - }, - { - "epoch": 0.82, - "learning_rate": 2.6753614679550738e-05, - "loss": 0.8426, - "step": 11570 - }, - { - "epoch": 0.82, - "learning_rate": 2.6732074632355286e-05, - "loss": 0.8359, - "step": 11580 - }, - { - "epoch": 0.82, - "learning_rate": 2.6710525774989718e-05, - "loss": 0.8344, - "step": 11590 - }, - { - "epoch": 0.82, - "learning_rate": 2.6688968135654712e-05, - "loss": 0.8332, - "step": 11600 - }, - { - "epoch": 0.82, - "learning_rate": 2.6667401742562452e-05, - "loss": 0.8356, - "step": 11610 - }, - { - "epoch": 0.82, - "learning_rate": 2.664582662393656e-05, - "loss": 0.8238, - "step": 11620 - }, - { - "epoch": 0.82, - "learning_rate": 2.6624242808012098e-05, - "loss": 0.8394, - "step": 11630 - }, - { - "epoch": 0.82, - "learning_rate": 2.6602650323035484e-05, - "loss": 0.8319, - "step": 11640 - }, - { - "epoch": 0.82, - "learning_rate": 2.6581049197264503e-05, - "loss": 0.8348, - "step": 11650 - }, - { - "epoch": 0.82, - "learning_rate": 2.6559439458968233e-05, - "loss": 0.8315, - "step": 11660 - }, - { - "epoch": 0.82, - "learning_rate": 2.653782113642705e-05, - "loss": 0.8328, - "step": 11670 - }, - { - "epoch": 0.83, - "learning_rate": 2.6516194257932522e-05, - "loss": 0.8345, - "step": 11680 - }, - { - "epoch": 0.83, - "learning_rate": 2.6494558851787445e-05, - "loss": 0.8422, - "step": 11690 - }, - { - "epoch": 0.83, - "learning_rate": 2.6472914946305768e-05, - "loss": 0.8417, - "step": 11700 - }, - { - "epoch": 0.83, - "learning_rate": 2.6451262569812567e-05, - "loss": 0.8416, - "step": 11710 - }, - { - "epoch": 0.83, - "learning_rate": 2.642960175064399e-05, - "loss": 0.8346, - "step": 11720 - }, - { - "epoch": 0.83, - "learning_rate": 2.6407932517147244e-05, - "loss": 0.8358, - "step": 11730 - }, - { - "epoch": 0.83, - "learning_rate": 2.6386254897680546e-05, - "loss": 0.8367, - "step": 11740 - }, - { - "epoch": 0.83, - "learning_rate": 2.6364568920613094e-05, - "loss": 0.8325, - "step": 11750 - }, - { - "epoch": 0.83, - "learning_rate": 2.6342874614325004e-05, - "loss": 0.8356, - "step": 11760 - }, - { - "epoch": 0.83, - "learning_rate": 2.632117200720732e-05, - "loss": 0.8281, - "step": 11770 - }, - { - "epoch": 0.83, - "learning_rate": 2.629946112766192e-05, - "loss": 0.8346, - "step": 11780 - }, - { - "epoch": 0.83, - "learning_rate": 2.6277742004101536e-05, - "loss": 0.8351, - "step": 11790 - }, - { - "epoch": 0.83, - "learning_rate": 2.625601466494967e-05, - "loss": 0.8401, - "step": 11800 - }, - { - "epoch": 0.83, - "learning_rate": 2.623427913864058e-05, - "loss": 0.8367, - "step": 11810 - }, - { - "epoch": 0.83, - "learning_rate": 2.621253545361924e-05, - "loss": 0.8282, - "step": 11820 - }, - { - "epoch": 0.84, - "learning_rate": 2.6190783638341302e-05, - "loss": 0.8396, - "step": 11830 - }, - { - "epoch": 0.84, - "learning_rate": 2.6169023721273047e-05, - "loss": 0.8293, - "step": 11840 - }, - { - "epoch": 0.84, - "learning_rate": 2.6147255730891384e-05, - "loss": 0.836, - "step": 11850 - }, - { - "epoch": 0.84, - "learning_rate": 2.612547969568377e-05, - "loss": 0.8323, - "step": 11860 - }, - { - "epoch": 0.84, - "learning_rate": 2.6103695644148182e-05, - "loss": 0.8256, - "step": 11870 - }, - { - "epoch": 0.84, - "learning_rate": 2.6081903604793098e-05, - "loss": 0.8355, - "step": 11880 - }, - { - "epoch": 0.84, - "learning_rate": 2.6060103606137458e-05, - "loss": 0.8313, - "step": 11890 - }, - { - "epoch": 0.84, - "learning_rate": 2.6038295676710607e-05, - "loss": 0.8319, - "step": 11900 - }, - { - "epoch": 0.84, - "learning_rate": 2.6016479845052266e-05, - "loss": 0.8282, - "step": 11910 - }, - { - "epoch": 0.84, - "learning_rate": 2.5994656139712504e-05, - "loss": 0.8308, - "step": 11920 - }, - { - "epoch": 0.84, - "learning_rate": 2.59728245892517e-05, - "loss": 0.8287, - "step": 11930 - }, - { - "epoch": 0.84, - "learning_rate": 2.595098522224049e-05, - "loss": 0.8318, - "step": 11940 - }, - { - "epoch": 0.84, - "learning_rate": 2.5929138067259735e-05, - "loss": 0.8346, - "step": 11950 - }, - { - "epoch": 0.84, - "learning_rate": 2.5907283152900508e-05, - "loss": 0.8366, - "step": 11960 - }, - { - "epoch": 0.85, - "learning_rate": 2.588542050776401e-05, - "loss": 0.8334, - "step": 11970 - }, - { - "epoch": 0.85, - "learning_rate": 2.5863550160461587e-05, - "loss": 0.8316, - "step": 11980 - }, - { - "epoch": 0.85, - "learning_rate": 2.584167213961464e-05, - "loss": 0.8317, - "step": 11990 - }, - { - "epoch": 0.85, - "learning_rate": 2.5819786473854627e-05, - "loss": 0.8327, - "step": 12000 - }, - { - "epoch": 0.85, - "learning_rate": 2.5797893191823e-05, - "loss": 0.8366, - "step": 12010 - }, - { - "epoch": 0.85, - "learning_rate": 2.5775992322171207e-05, - "loss": 0.8319, - "step": 12020 - }, - { - "epoch": 0.85, - "learning_rate": 2.575408389356058e-05, - "loss": 0.8367, - "step": 12030 - }, - { - "epoch": 0.85, - "learning_rate": 2.573216793466238e-05, - "loss": 0.8278, - "step": 12040 - }, - { - "epoch": 0.85, - "learning_rate": 2.57102444741577e-05, - "loss": 0.8318, - "step": 12050 - }, - { - "epoch": 0.85, - "learning_rate": 2.5688313540737473e-05, - "loss": 0.83, - "step": 12060 - }, - { - "epoch": 0.85, - "learning_rate": 2.5666375163102388e-05, - "loss": 0.8265, - "step": 12070 - }, - { - "epoch": 0.85, - "learning_rate": 2.5644429369962894e-05, - "loss": 0.8296, - "step": 12080 - }, - { - "epoch": 0.85, - "learning_rate": 2.5622476190039135e-05, - "loss": 0.8326, - "step": 12090 - }, - { - "epoch": 0.85, - "learning_rate": 2.560051565206092e-05, - "loss": 0.8325, - "step": 12100 - }, - { - "epoch": 0.86, - "learning_rate": 2.5578547784767702e-05, - "loss": 0.8348, - "step": 12110 - }, - { - "epoch": 0.86, - "learning_rate": 2.555657261690851e-05, - "loss": 0.8244, - "step": 12120 - }, - { - "epoch": 0.86, - "learning_rate": 2.5534590177241936e-05, - "loss": 0.8249, - "step": 12130 - }, - { - "epoch": 0.86, - "learning_rate": 2.5512600494536077e-05, - "loss": 0.8305, - "step": 12140 - }, - { - "epoch": 0.86, - "learning_rate": 2.5490603597568528e-05, - "loss": 0.8331, - "step": 12150 - }, - { - "epoch": 0.86, - "learning_rate": 2.546859951512631e-05, - "loss": 0.8354, - "step": 12160 - }, - { - "epoch": 0.86, - "learning_rate": 2.5446588276005857e-05, - "loss": 0.8248, - "step": 12170 - }, - { - "epoch": 0.86, - "learning_rate": 2.542456990901295e-05, - "loss": 0.8329, - "step": 12180 - }, - { - "epoch": 0.86, - "learning_rate": 2.5402544442962724e-05, - "loss": 0.8352, - "step": 12190 - }, - { - "epoch": 0.86, - "learning_rate": 2.5380511906679587e-05, - "loss": 0.8318, - "step": 12200 - }, - { - "epoch": 0.86, - "learning_rate": 2.5358472328997214e-05, - "loss": 0.8321, - "step": 12210 - }, - { - "epoch": 0.86, - "learning_rate": 2.533642573875848e-05, - "loss": 0.8292, - "step": 12220 - }, - { - "epoch": 0.86, - "learning_rate": 2.531437216481544e-05, - "loss": 0.8294, - "step": 12230 - }, - { - "epoch": 0.86, - "learning_rate": 2.5292311636029293e-05, - "loss": 0.8348, - "step": 12240 - }, - { - "epoch": 0.87, - "learning_rate": 2.527024418127035e-05, - "loss": 0.8381, - "step": 12250 - }, - { - "epoch": 0.87, - "learning_rate": 2.5248169829417957e-05, - "loss": 0.8278, - "step": 12260 - }, - { - "epoch": 0.87, - "learning_rate": 2.522608860936051e-05, - "loss": 0.8273, - "step": 12270 - }, - { - "epoch": 0.87, - "learning_rate": 2.5204000549995397e-05, - "loss": 0.829, - "step": 12280 - }, - { - "epoch": 0.87, - "learning_rate": 2.5181905680228935e-05, - "loss": 0.8233, - "step": 12290 - }, - { - "epoch": 0.87, - "learning_rate": 2.5159804028976367e-05, - "loss": 0.8286, - "step": 12300 - }, - { - "epoch": 0.87, - "learning_rate": 2.513769562516181e-05, - "loss": 0.8304, - "step": 12310 - }, - { - "epoch": 0.87, - "learning_rate": 2.5115580497718223e-05, - "loss": 0.8371, - "step": 12320 - }, - { - "epoch": 0.87, - "learning_rate": 2.5093458675587344e-05, - "loss": 0.8297, - "step": 12330 - }, - { - "epoch": 0.87, - "learning_rate": 2.50713301877197e-05, - "loss": 0.8313, - "step": 12340 - }, - { - "epoch": 0.87, - "learning_rate": 2.5049195063074522e-05, - "loss": 0.8318, - "step": 12350 - }, - { - "epoch": 0.87, - "learning_rate": 2.5027053330619735e-05, - "loss": 0.8312, - "step": 12360 - }, - { - "epoch": 0.87, - "learning_rate": 2.5004905019331903e-05, - "loss": 0.8293, - "step": 12370 - }, - { - "epoch": 0.87, - "learning_rate": 2.498275015819621e-05, - "loss": 0.8235, - "step": 12380 - }, - { - "epoch": 0.88, - "learning_rate": 2.49605887762064e-05, - "loss": 0.8282, - "step": 12390 - }, - { - "epoch": 0.88, - "learning_rate": 2.4938420902364774e-05, - "loss": 0.8285, - "step": 12400 - }, - { - "epoch": 0.88, - "learning_rate": 2.4916246565682097e-05, - "loss": 0.8278, - "step": 12410 - }, - { - "epoch": 0.88, - "learning_rate": 2.4894065795177607e-05, - "loss": 0.8361, - "step": 12420 - }, - { - "epoch": 0.88, - "learning_rate": 2.4871878619878973e-05, - "loss": 0.833, - "step": 12430 - }, - { - "epoch": 0.88, - "learning_rate": 2.484968506882223e-05, - "loss": 0.8351, - "step": 12440 - }, - { - "epoch": 0.88, - "learning_rate": 2.4827485171051756e-05, - "loss": 0.8288, - "step": 12450 - }, - { - "epoch": 0.88, - "learning_rate": 2.480527895562025e-05, - "loss": 0.8269, - "step": 12460 - }, - { - "epoch": 0.88, - "learning_rate": 2.478306645158867e-05, - "loss": 0.824, - "step": 12470 - }, - { - "epoch": 0.88, - "learning_rate": 2.4760847688026198e-05, - "loss": 0.8285, - "step": 12480 - }, - { - "epoch": 0.88, - "learning_rate": 2.473862269401021e-05, - "loss": 0.8401, - "step": 12490 - }, - { - "epoch": 0.88, - "learning_rate": 2.4716391498626244e-05, - "loss": 0.8317, - "step": 12500 - }, - { - "epoch": 0.88, - "learning_rate": 2.4694154130967945e-05, - "loss": 0.8345, - "step": 12510 - }, - { - "epoch": 0.88, - "learning_rate": 2.4671910620137053e-05, - "loss": 0.8313, - "step": 12520 - }, - { - "epoch": 0.89, - "learning_rate": 2.4649660995243312e-05, - "loss": 0.8277, - "step": 12530 - }, - { - "epoch": 0.89, - "learning_rate": 2.46274052854045e-05, - "loss": 0.8225, - "step": 12540 - }, - { - "epoch": 0.89, - "learning_rate": 2.4605143519746352e-05, - "loss": 0.8263, - "step": 12550 - }, - { - "epoch": 0.89, - "learning_rate": 2.4582875727402516e-05, - "loss": 0.8237, - "step": 12560 - }, - { - "epoch": 0.89, - "learning_rate": 2.4560601937514537e-05, - "loss": 0.8312, - "step": 12570 - }, - { - "epoch": 0.89, - "learning_rate": 2.4538322179231804e-05, - "loss": 0.8357, - "step": 12580 - }, - { - "epoch": 0.89, - "learning_rate": 2.451603648171153e-05, - "loss": 0.8255, - "step": 12590 - }, - { - "epoch": 0.89, - "learning_rate": 2.4493744874118668e-05, - "loss": 0.8312, - "step": 12600 - }, - { - "epoch": 0.89, - "learning_rate": 2.4471447385625945e-05, - "loss": 0.8232, - "step": 12610 - }, - { - "epoch": 0.89, - "learning_rate": 2.4449144045413763e-05, - "loss": 0.8255, - "step": 12620 - }, - { - "epoch": 0.89, - "learning_rate": 2.4426834882670182e-05, - "loss": 0.8213, - "step": 12630 - }, - { - "epoch": 0.89, - "learning_rate": 2.4404519926590888e-05, - "loss": 0.8305, - "step": 12640 - }, - { - "epoch": 0.89, - "learning_rate": 2.4382199206379146e-05, - "loss": 0.8276, - "step": 12650 - }, - { - "epoch": 0.89, - "learning_rate": 2.4359872751245763e-05, - "loss": 0.8259, - "step": 12660 - }, - { - "epoch": 0.89, - "learning_rate": 2.4337540590409054e-05, - "loss": 0.8283, - "step": 12670 - }, - { - "epoch": 0.9, - "learning_rate": 2.4315202753094794e-05, - "loss": 0.8322, - "step": 12680 - }, - { - "epoch": 0.9, - "learning_rate": 2.42928592685362e-05, - "loss": 0.8296, - "step": 12690 - }, - { - "epoch": 0.9, - "learning_rate": 2.4270510165973865e-05, - "loss": 0.8211, - "step": 12700 - }, - { - "epoch": 0.9, - "learning_rate": 2.424815547465575e-05, - "loss": 0.8254, - "step": 12710 - }, - { - "epoch": 0.9, - "learning_rate": 2.4225795223837114e-05, - "loss": 0.8312, - "step": 12720 - }, - { - "epoch": 0.9, - "learning_rate": 2.42034294427805e-05, - "loss": 0.8276, - "step": 12730 - }, - { - "epoch": 0.9, - "learning_rate": 2.4181058160755682e-05, - "loss": 0.8308, - "step": 12740 - }, - { - "epoch": 0.9, - "learning_rate": 2.415868140703965e-05, - "loss": 0.82, - "step": 12750 - }, - { - "epoch": 0.9, - "learning_rate": 2.4136299210916532e-05, - "loss": 0.8272, - "step": 12760 - }, - { - "epoch": 0.9, - "learning_rate": 2.4113911601677592e-05, - "loss": 0.8316, - "step": 12770 - }, - { - "epoch": 0.9, - "learning_rate": 2.4091518608621183e-05, - "loss": 0.8289, - "step": 12780 - }, - { - "epoch": 0.9, - "learning_rate": 2.4069120261052682e-05, - "loss": 0.8267, - "step": 12790 - }, - { - "epoch": 0.9, - "learning_rate": 2.4046716588284492e-05, - "loss": 0.824, - "step": 12800 - }, - { - "epoch": 0.9, - "learning_rate": 2.4024307619635984e-05, - "loss": 0.821, - "step": 12810 - }, - { - "epoch": 0.91, - "learning_rate": 2.400189338443345e-05, - "loss": 0.8322, - "step": 12820 - }, - { - "epoch": 0.91, - "learning_rate": 2.3979473912010094e-05, - "loss": 0.8339, - "step": 12830 - }, - { - "epoch": 0.91, - "learning_rate": 2.3957049231705946e-05, - "loss": 0.8256, - "step": 12840 - }, - { - "epoch": 0.91, - "learning_rate": 2.3934619372867866e-05, - "loss": 0.8233, - "step": 12850 - }, - { - "epoch": 0.91, - "learning_rate": 2.3912184364849492e-05, - "loss": 0.8185, - "step": 12860 - }, - { - "epoch": 0.91, - "learning_rate": 2.388974423701121e-05, - "loss": 0.8257, - "step": 12870 - }, - { - "epoch": 0.91, - "learning_rate": 2.3867299018720084e-05, - "loss": 0.8285, - "step": 12880 - }, - { - "epoch": 0.91, - "learning_rate": 2.3844848739349846e-05, - "loss": 0.8218, - "step": 12890 - }, - { - "epoch": 0.91, - "learning_rate": 2.3822393428280873e-05, - "loss": 0.8207, - "step": 12900 - }, - { - "epoch": 0.91, - "learning_rate": 2.3799933114900096e-05, - "loss": 0.8256, - "step": 12910 - }, - { - "epoch": 0.91, - "learning_rate": 2.3777467828601016e-05, - "loss": 0.8304, - "step": 12920 - }, - { - "epoch": 0.91, - "learning_rate": 2.3754997598783637e-05, - "loss": 0.8314, - "step": 12930 - }, - { - "epoch": 0.91, - "learning_rate": 2.373252245485441e-05, - "loss": 0.8228, - "step": 12940 - }, - { - "epoch": 0.91, - "learning_rate": 2.3710042426226254e-05, - "loss": 0.8339, - "step": 12950 - }, - { - "epoch": 0.92, - "learning_rate": 2.3687557542318447e-05, - "loss": 0.8282, - "step": 12960 - }, - { - "epoch": 0.92, - "learning_rate": 2.366506783255665e-05, - "loss": 0.8255, - "step": 12970 - }, - { - "epoch": 0.92, - "learning_rate": 2.3642573326372825e-05, - "loss": 0.8301, - "step": 12980 - }, - { - "epoch": 0.92, - "learning_rate": 2.3620074053205202e-05, - "loss": 0.8217, - "step": 12990 - }, - { - "epoch": 0.92, - "learning_rate": 2.3597570042498262e-05, - "loss": 0.8214, - "step": 13000 - }, - { - "epoch": 0.92, - "learning_rate": 2.357506132370269e-05, - "loss": 0.8173, - "step": 13010 - }, - { - "epoch": 0.92, - "learning_rate": 2.3552547926275313e-05, - "loss": 0.8229, - "step": 13020 - }, - { - "epoch": 0.92, - "learning_rate": 2.3530029879679104e-05, - "loss": 0.8241, - "step": 13030 - }, - { - "epoch": 0.92, - "learning_rate": 2.3507507213383108e-05, - "loss": 0.8256, - "step": 13040 - }, - { - "epoch": 0.92, - "learning_rate": 2.3484979956862413e-05, - "loss": 0.8285, - "step": 13050 - }, - { - "epoch": 0.92, - "learning_rate": 2.3462448139598123e-05, - "loss": 0.8183, - "step": 13060 - }, - { - "epoch": 0.92, - "learning_rate": 2.3439911791077303e-05, - "loss": 0.8159, - "step": 13070 - }, - { - "epoch": 0.92, - "learning_rate": 2.3417370940792944e-05, - "loss": 0.8265, - "step": 13080 - }, - { - "epoch": 0.92, - "learning_rate": 2.3394825618243954e-05, - "loss": 0.8214, - "step": 13090 - }, - { - "epoch": 0.93, - "learning_rate": 2.337227585293506e-05, - "loss": 0.8272, - "step": 13100 - }, - { - "epoch": 0.93, - "learning_rate": 2.3349721674376826e-05, - "loss": 0.822, - "step": 13110 - }, - { - "epoch": 0.93, - "learning_rate": 2.332716311208558e-05, - "loss": 0.8174, - "step": 13120 - }, - { - "epoch": 0.93, - "learning_rate": 2.3304600195583394e-05, - "loss": 0.8198, - "step": 13130 - }, - { - "epoch": 0.93, - "learning_rate": 2.3282032954398032e-05, - "loss": 0.831, - "step": 13140 - }, - { - "epoch": 0.93, - "learning_rate": 2.3259461418062923e-05, - "loss": 0.8243, - "step": 13150 - }, - { - "epoch": 0.93, - "learning_rate": 2.3236885616117112e-05, - "loss": 0.8253, - "step": 13160 - }, - { - "epoch": 0.93, - "learning_rate": 2.3214305578105237e-05, - "loss": 0.8201, - "step": 13170 - }, - { - "epoch": 0.93, - "learning_rate": 2.319172133357746e-05, - "loss": 0.8184, - "step": 13180 - }, - { - "epoch": 0.93, - "learning_rate": 2.3169132912089468e-05, - "loss": 0.8208, - "step": 13190 - }, - { - "epoch": 0.93, - "learning_rate": 2.3146540343202408e-05, - "loss": 0.8271, - "step": 13200 - }, - { - "epoch": 0.93, - "learning_rate": 2.3123943656482845e-05, - "loss": 0.83, - "step": 13210 - }, - { - "epoch": 0.93, - "learning_rate": 2.310134288150275e-05, - "loss": 0.819, - "step": 13220 - }, - { - "epoch": 0.93, - "learning_rate": 2.3078738047839425e-05, - "loss": 0.8219, - "step": 13230 - }, - { - "epoch": 0.94, - "learning_rate": 2.3056129185075503e-05, - "loss": 0.8263, - "step": 13240 - }, - { - "epoch": 0.94, - "learning_rate": 2.3033516322798875e-05, - "loss": 0.821, - "step": 13250 - }, - { - "epoch": 0.94, - "learning_rate": 2.3010899490602673e-05, - "loss": 0.8181, - "step": 13260 - }, - { - "epoch": 0.94, - "learning_rate": 2.2988278718085223e-05, - "loss": 0.8123, - "step": 13270 - }, - { - "epoch": 0.94, - "learning_rate": 2.296565403485001e-05, - "loss": 0.8258, - "step": 13280 - }, - { - "epoch": 0.94, - "learning_rate": 2.2943025470505625e-05, - "loss": 0.8197, - "step": 13290 - }, - { - "epoch": 0.94, - "learning_rate": 2.2920393054665757e-05, - "loss": 0.8257, - "step": 13300 - }, - { - "epoch": 0.94, - "learning_rate": 2.2897756816949128e-05, - "loss": 0.822, - "step": 13310 - }, - { - "epoch": 0.94, - "learning_rate": 2.2875116786979454e-05, - "loss": 0.8286, - "step": 13320 - }, - { - "epoch": 0.94, - "learning_rate": 2.2852472994385416e-05, - "loss": 0.8185, - "step": 13330 - }, - { - "epoch": 0.94, - "learning_rate": 2.282982546880063e-05, - "loss": 0.8149, - "step": 13340 - }, - { - "epoch": 0.94, - "learning_rate": 2.280717423986359e-05, - "loss": 0.8205, - "step": 13350 - }, - { - "epoch": 0.94, - "learning_rate": 2.2784519337217637e-05, - "loss": 0.8162, - "step": 13360 - }, - { - "epoch": 0.94, - "learning_rate": 2.2761860790510907e-05, - "loss": 0.8217, - "step": 13370 - }, - { - "epoch": 0.95, - "learning_rate": 2.273919862939633e-05, - "loss": 0.8192, - "step": 13380 - }, - { - "epoch": 0.95, - "learning_rate": 2.2716532883531545e-05, - "loss": 0.8173, - "step": 13390 - }, - { - "epoch": 0.95, - "learning_rate": 2.2693863582578905e-05, - "loss": 0.8156, - "step": 13400 - }, - { - "epoch": 0.95, - "learning_rate": 2.2671190756205384e-05, - "loss": 0.8208, - "step": 13410 - }, - { - "epoch": 0.95, - "learning_rate": 2.2648514434082593e-05, - "loss": 0.82, - "step": 13420 - }, - { - "epoch": 0.95, - "learning_rate": 2.262583464588671e-05, - "loss": 0.8197, - "step": 13430 - }, - { - "epoch": 0.95, - "learning_rate": 2.260315142129846e-05, - "loss": 0.8213, - "step": 13440 - }, - { - "epoch": 0.95, - "learning_rate": 2.2580464790003046e-05, - "loss": 0.8208, - "step": 13450 - }, - { - "epoch": 0.95, - "learning_rate": 2.255777478169014e-05, - "loss": 0.8168, - "step": 13460 - }, - { - "epoch": 0.95, - "learning_rate": 2.2535081426053834e-05, - "loss": 0.825, - "step": 13470 - }, - { - "epoch": 0.95, - "learning_rate": 2.2512384752792605e-05, - "loss": 0.8188, - "step": 13480 - }, - { - "epoch": 0.95, - "learning_rate": 2.248968479160925e-05, - "loss": 0.8199, - "step": 13490 - }, - { - "epoch": 0.95, - "learning_rate": 2.24669815722109e-05, - "loss": 0.8231, - "step": 13500 - }, - { - "epoch": 0.95, - "learning_rate": 2.244427512430893e-05, - "loss": 0.8239, - "step": 13510 - }, - { - "epoch": 0.96, - "learning_rate": 2.2421565477618938e-05, - "loss": 0.8165, - "step": 13520 - }, - { - "epoch": 0.96, - "learning_rate": 2.2398852661860725e-05, - "loss": 0.8146, - "step": 13530 - }, - { - "epoch": 0.96, - "learning_rate": 2.2376136706758222e-05, - "loss": 0.8142, - "step": 13540 - }, - { - "epoch": 0.96, - "learning_rate": 2.2353417642039483e-05, - "loss": 0.8221, - "step": 13550 - }, - { - "epoch": 0.96, - "learning_rate": 2.2330695497436618e-05, - "loss": 0.8191, - "step": 13560 - }, - { - "epoch": 0.96, - "learning_rate": 2.2307970302685775e-05, - "loss": 0.8216, - "step": 13570 - }, - { - "epoch": 0.96, - "learning_rate": 2.2285242087527092e-05, - "loss": 0.8183, - "step": 13580 - }, - { - "epoch": 0.96, - "learning_rate": 2.2262510881704662e-05, - "loss": 0.8228, - "step": 13590 - }, - { - "epoch": 0.96, - "learning_rate": 2.2239776714966492e-05, - "loss": 0.8149, - "step": 13600 - }, - { - "epoch": 0.96, - "learning_rate": 2.221703961706446e-05, - "loss": 0.8148, - "step": 13610 - }, - { - "epoch": 0.96, - "learning_rate": 2.2194299617754274e-05, - "loss": 0.8123, - "step": 13620 - }, - { - "epoch": 0.96, - "learning_rate": 2.217155674679546e-05, - "loss": 0.814, - "step": 13630 - }, - { - "epoch": 0.96, - "learning_rate": 2.2148811033951283e-05, - "loss": 0.8214, - "step": 13640 - }, - { - "epoch": 0.96, - "learning_rate": 2.2126062508988736e-05, - "loss": 0.8275, - "step": 13650 - }, - { - "epoch": 0.96, - "learning_rate": 2.210331120167848e-05, - "loss": 0.8457, - "step": 13660 - }, - { - "epoch": 0.97, - "learning_rate": 2.2080557141794836e-05, - "loss": 0.8277, - "step": 13670 - }, - { - "epoch": 0.97, - "learning_rate": 2.2057800359115716e-05, - "loss": 0.8376, - "step": 13680 - }, - { - "epoch": 0.97, - "learning_rate": 2.2035040883422595e-05, - "loss": 0.9098, - "step": 13690 - }, - { - "epoch": 0.97, - "learning_rate": 2.2012278744500482e-05, - "loss": 0.8815, - "step": 13700 - }, - { - "epoch": 0.97, - "learning_rate": 2.1989513972137852e-05, - "loss": 0.8618, - "step": 13710 - }, - { - "epoch": 0.97, - "learning_rate": 2.1966746596126643e-05, - "loss": 0.8511, - "step": 13720 - }, - { - "epoch": 0.97, - "learning_rate": 2.1943976646262198e-05, - "loss": 0.847, - "step": 13730 - }, - { - "epoch": 0.97, - "learning_rate": 2.1921204152343233e-05, - "loss": 0.8395, - "step": 13740 - }, - { - "epoch": 0.97, - "learning_rate": 2.1898429144171763e-05, - "loss": 0.838, - "step": 13750 - }, - { - "epoch": 0.97, - "learning_rate": 2.187565165155314e-05, - "loss": 0.832, - "step": 13760 - }, - { - "epoch": 0.97, - "learning_rate": 2.185287170429593e-05, - "loss": 0.8328, - "step": 13770 - }, - { - "epoch": 0.97, - "learning_rate": 2.1830089332211936e-05, - "loss": 0.8363, - "step": 13780 - }, - { - "epoch": 0.97, - "learning_rate": 2.180730456511611e-05, - "loss": 0.8291, - "step": 13790 - }, - { - "epoch": 0.97, - "learning_rate": 2.1784517432826563e-05, - "loss": 0.8242, - "step": 13800 - }, - { - "epoch": 0.98, - "learning_rate": 2.1761727965164488e-05, - "loss": 0.8305, - "step": 13810 - }, - { - "epoch": 0.98, - "learning_rate": 2.1738936191954134e-05, - "loss": 0.8369, - "step": 13820 - }, - { - "epoch": 0.98, - "learning_rate": 2.1716142143022772e-05, - "loss": 0.8263, - "step": 13830 - }, - { - "epoch": 0.98, - "learning_rate": 2.1693345848200647e-05, - "loss": 0.8374, - "step": 13840 - }, - { - "epoch": 0.98, - "learning_rate": 2.1670547337320948e-05, - "loss": 0.8276, - "step": 13850 - }, - { - "epoch": 0.98, - "learning_rate": 2.1647746640219762e-05, - "loss": 0.8313, - "step": 13860 - }, - { - "epoch": 0.98, - "learning_rate": 2.162494378673603e-05, - "loss": 0.8276, - "step": 13870 - }, - { - "epoch": 0.98, - "learning_rate": 2.1602138806711524e-05, - "loss": 0.8181, - "step": 13880 - }, - { - "epoch": 0.98, - "learning_rate": 2.1579331729990795e-05, - "loss": 0.8233, - "step": 13890 - }, - { - "epoch": 0.98, - "learning_rate": 2.155652258642115e-05, - "loss": 0.8214, - "step": 13900 - }, - { - "epoch": 0.98, - "learning_rate": 2.1533711405852578e-05, - "loss": 0.8231, - "step": 13910 - }, - { - "epoch": 0.98, - "learning_rate": 2.151089821813775e-05, - "loss": 0.8134, - "step": 13920 - }, - { - "epoch": 0.98, - "learning_rate": 2.1488083053131956e-05, - "loss": 0.8192, - "step": 13930 - }, - { - "epoch": 0.98, - "learning_rate": 2.1465265940693084e-05, - "loss": 0.8182, - "step": 13940 - }, - { - "epoch": 0.99, - "learning_rate": 2.1442446910681557e-05, - "loss": 0.8205, - "step": 13950 - }, - { - "epoch": 0.99, - "learning_rate": 2.141962599296032e-05, - "loss": 0.8181, - "step": 13960 - }, - { - "epoch": 0.99, - "learning_rate": 2.1396803217394777e-05, - "loss": 0.8177, - "step": 13970 - }, - { - "epoch": 0.99, - "learning_rate": 2.137397861385278e-05, - "loss": 0.8257, - "step": 13980 - }, - { - "epoch": 0.99, - "learning_rate": 2.135115221220455e-05, - "loss": 0.8191, - "step": 13990 - }, - { - "epoch": 0.99, - "learning_rate": 2.1328324042322678e-05, - "loss": 0.8218, - "step": 14000 - }, - { - "epoch": 0.99, - "learning_rate": 2.1305494134082068e-05, - "loss": 0.814, - "step": 14010 - }, - { - "epoch": 0.99, - "learning_rate": 2.1282662517359885e-05, - "loss": 0.8173, - "step": 14020 - }, - { - "epoch": 0.99, - "learning_rate": 2.1259829222035554e-05, - "loss": 0.8182, - "step": 14030 - }, - { - "epoch": 0.99, - "learning_rate": 2.123699427799067e-05, - "loss": 0.8199, - "step": 14040 - }, - { - "epoch": 0.99, - "learning_rate": 2.121415771510902e-05, - "loss": 0.8215, - "step": 14050 - }, - { - "epoch": 0.99, - "learning_rate": 2.119131956327646e-05, - "loss": 0.8149, - "step": 14060 - }, - { - "epoch": 0.99, - "learning_rate": 2.1168479852380973e-05, - "loss": 0.8138, - "step": 14070 - }, - { - "epoch": 0.99, - "learning_rate": 2.114563861231256e-05, - "loss": 0.8089, - "step": 14080 - }, - { - "epoch": 1.0, - "learning_rate": 2.112279587296322e-05, - "loss": 0.8176, - "step": 14090 - }, - { - "epoch": 1.0, - "learning_rate": 2.1099951664226927e-05, - "loss": 0.8194, - "step": 14100 - }, - { - "epoch": 1.0, - "learning_rate": 2.1077106015999566e-05, - "loss": 0.8233, - "step": 14110 - }, - { - "epoch": 1.0, - "learning_rate": 2.1054258958178914e-05, - "loss": 0.823, - "step": 14120 - }, - { - "epoch": 1.0, - "learning_rate": 2.1031410520664597e-05, - "loss": 0.8173, - "step": 14130 - }, - { - "epoch": 1.0, - "learning_rate": 2.1008560733358027e-05, - "loss": 0.8159, - "step": 14140 - }, - { - "epoch": 1.0, - "learning_rate": 2.0985709626162404e-05, - "loss": 0.8173, - "step": 14150 - }, - { - "epoch": 1.0, - "learning_rate": 2.0962857228982636e-05, - "loss": 0.8057, - "step": 14160 - }, - { - "epoch": 1.0, - "learning_rate": 2.0940003571725346e-05, - "loss": 0.8023, - "step": 14170 - }, - { - "epoch": 1.0, - "learning_rate": 2.0917148684298773e-05, - "loss": 0.7996, - "step": 14180 - }, - { - "epoch": 1.0, - "learning_rate": 2.089429259661279e-05, - "loss": 0.7989, - "step": 14190 - }, - { - "epoch": 1.0, - "learning_rate": 2.0871435338578833e-05, - "loss": 0.8005, - "step": 14200 - }, - { - "epoch": 1.0, - "learning_rate": 2.084857694010987e-05, - "loss": 0.8064, - "step": 14210 - }, - { - "epoch": 1.0, - "learning_rate": 2.0825717431120362e-05, - "loss": 0.8033, - "step": 14220 - }, - { - "epoch": 1.01, - "learning_rate": 2.0802856841526217e-05, - "loss": 0.8011, - "step": 14230 - }, - { - "epoch": 1.01, - "learning_rate": 2.0779995201244773e-05, - "loss": 0.7976, - "step": 14240 - }, - { - "epoch": 1.01, - "learning_rate": 2.0757132540194722e-05, - "loss": 0.7957, - "step": 14250 - }, - { - "epoch": 1.01, - "learning_rate": 2.0734268888296105e-05, - "loss": 0.7972, - "step": 14260 - }, - { - "epoch": 1.01, - "learning_rate": 2.071140427547026e-05, - "loss": 0.8002, - "step": 14270 - }, - { - "epoch": 1.01, - "learning_rate": 2.068853873163979e-05, - "loss": 0.8021, - "step": 14280 - }, - { - "epoch": 1.01, - "learning_rate": 2.0665672286728484e-05, - "loss": 0.7892, - "step": 14290 - }, - { - "epoch": 1.01, - "learning_rate": 2.064280497066135e-05, - "loss": 0.799, - "step": 14300 - }, - { - "epoch": 1.01, - "learning_rate": 2.0619936813364506e-05, - "loss": 0.7956, - "step": 14310 - }, - { - "epoch": 1.01, - "learning_rate": 2.0597067844765202e-05, - "loss": 0.7962, - "step": 14320 - }, - { - "epoch": 1.01, - "learning_rate": 2.0574198094791713e-05, - "loss": 0.8008, - "step": 14330 - }, - { - "epoch": 1.01, - "learning_rate": 2.0551327593373357e-05, - "loss": 0.7955, - "step": 14340 - }, - { - "epoch": 1.01, - "learning_rate": 2.0528456370440445e-05, - "loss": 0.799, - "step": 14350 - }, - { - "epoch": 1.01, - "learning_rate": 2.0505584455924214e-05, - "loss": 0.7979, - "step": 14360 - }, - { - "epoch": 1.02, - "learning_rate": 2.0482711879756808e-05, - "loss": 0.7982, - "step": 14370 - }, - { - "epoch": 1.02, - "learning_rate": 2.0459838671871247e-05, - "loss": 0.8023, - "step": 14380 - }, - { - "epoch": 1.02, - "learning_rate": 2.0436964862201365e-05, - "loss": 0.7957, - "step": 14390 - }, - { - "epoch": 1.02, - "learning_rate": 2.04140904806818e-05, - "loss": 0.7962, - "step": 14400 - }, - { - "epoch": 1.02, - "learning_rate": 2.0391215557247933e-05, - "loss": 0.7968, - "step": 14410 - }, - { - "epoch": 1.02, - "learning_rate": 2.036834012183583e-05, - "loss": 0.7918, - "step": 14420 - }, - { - "epoch": 1.02, - "learning_rate": 2.0345464204382262e-05, - "loss": 0.7985, - "step": 14430 - }, - { - "epoch": 1.02, - "learning_rate": 2.032258783482462e-05, - "loss": 0.7925, - "step": 14440 - }, - { - "epoch": 1.02, - "learning_rate": 2.0299711043100867e-05, - "loss": 0.7933, - "step": 14450 - }, - { - "epoch": 1.02, - "learning_rate": 2.0276833859149553e-05, - "loss": 0.7961, - "step": 14460 - }, - { - "epoch": 1.02, - "learning_rate": 2.025395631290971e-05, - "loss": 0.7971, - "step": 14470 - }, - { - "epoch": 1.02, - "learning_rate": 2.023107843432086e-05, - "loss": 0.7971, - "step": 14480 - }, - { - "epoch": 1.02, - "learning_rate": 2.0208200253322957e-05, - "loss": 0.7985, - "step": 14490 - }, - { - "epoch": 1.02, - "learning_rate": 2.0185321799856345e-05, - "loss": 0.7994, - "step": 14500 - }, - { - "epoch": 1.02, - "learning_rate": 2.0162443103861746e-05, - "loss": 0.7939, - "step": 14510 - }, - { - "epoch": 1.03, - "learning_rate": 2.0139564195280164e-05, - "loss": 0.7873, - "step": 14520 - }, - { - "epoch": 1.03, - "learning_rate": 2.0116685104052908e-05, - "loss": 0.7951, - "step": 14530 - }, - { - "epoch": 1.03, - "learning_rate": 2.009380586012152e-05, - "loss": 0.7948, - "step": 14540 - }, - { - "epoch": 1.03, - "learning_rate": 2.007092649342775e-05, - "loss": 0.7902, - "step": 14550 - }, - { - "epoch": 1.03, - "learning_rate": 2.0048047033913475e-05, - "loss": 0.7946, - "step": 14560 - }, - { - "epoch": 1.03, - "learning_rate": 2.002516751152074e-05, - "loss": 0.7934, - "step": 14570 - }, - { - "epoch": 1.03, - "learning_rate": 2.000228795619164e-05, - "loss": 0.7947, - "step": 14580 - }, - { - "epoch": 1.03, - "learning_rate": 1.9979408397868325e-05, - "loss": 0.7976, - "step": 14590 - }, - { - "epoch": 1.03, - "learning_rate": 1.9956528866492944e-05, - "loss": 0.7934, - "step": 14600 - }, - { - "epoch": 1.03, - "learning_rate": 1.9933649392007616e-05, - "loss": 0.7982, - "step": 14610 - }, - { - "epoch": 1.03, - "learning_rate": 1.991077000435438e-05, - "loss": 0.7887, - "step": 14620 - }, - { - "epoch": 1.03, - "learning_rate": 1.988789073347517e-05, - "loss": 0.7873, - "step": 14630 - }, - { - "epoch": 1.03, - "learning_rate": 1.986501160931176e-05, - "loss": 0.7951, - "step": 14640 - }, - { - "epoch": 1.03, - "learning_rate": 1.984213266180574e-05, - "loss": 0.7933, - "step": 14650 - }, - { - "epoch": 1.04, - "learning_rate": 1.981925392089845e-05, - "loss": 0.7912, - "step": 14660 - }, - { - "epoch": 1.04, - "learning_rate": 1.979637541653097e-05, - "loss": 0.7907, - "step": 14670 - }, - { - "epoch": 1.04, - "learning_rate": 1.9773497178644085e-05, - "loss": 0.791, - "step": 14680 - }, - { - "epoch": 1.04, - "learning_rate": 1.9750619237178216e-05, - "loss": 0.7937, - "step": 14690 - }, - { - "epoch": 1.04, - "learning_rate": 1.9727741622073393e-05, - "loss": 0.7979, - "step": 14700 - }, - { - "epoch": 1.04, - "learning_rate": 1.9704864363269232e-05, - "loss": 0.7989, - "step": 14710 - }, - { - "epoch": 1.04, - "learning_rate": 1.9681987490704875e-05, - "loss": 0.7939, - "step": 14720 - }, - { - "epoch": 1.04, - "learning_rate": 1.9659111034318952e-05, - "loss": 0.7917, - "step": 14730 - }, - { - "epoch": 1.04, - "learning_rate": 1.9636235024049562e-05, - "loss": 0.7988, - "step": 14740 - }, - { - "epoch": 1.04, - "learning_rate": 1.961335948983421e-05, - "loss": 0.794, - "step": 14750 - }, - { - "epoch": 1.04, - "learning_rate": 1.959048446160978e-05, - "loss": 0.7931, - "step": 14760 - }, - { - "epoch": 1.04, - "learning_rate": 1.9567609969312497e-05, - "loss": 0.7941, - "step": 14770 - }, - { - "epoch": 1.04, - "learning_rate": 1.9544736042877886e-05, - "loss": 0.7937, - "step": 14780 - }, - { - "epoch": 1.04, - "learning_rate": 1.9521862712240728e-05, - "loss": 0.7994, - "step": 14790 - }, - { - "epoch": 1.05, - "learning_rate": 1.949899000733503e-05, - "loss": 0.7856, - "step": 14800 - }, - { - "epoch": 1.05, - "learning_rate": 1.947611795809396e-05, - "loss": 0.7982, - "step": 14810 - }, - { - "epoch": 1.05, - "learning_rate": 1.945324659444985e-05, - "loss": 0.7924, - "step": 14820 - }, - { - "epoch": 1.05, - "learning_rate": 1.9430375946334134e-05, - "loss": 0.7956, - "step": 14830 - }, - { - "epoch": 1.05, - "learning_rate": 1.9407506043677294e-05, - "loss": 0.7907, - "step": 14840 - }, - { - "epoch": 1.05, - "learning_rate": 1.938463691640885e-05, - "loss": 0.7919, - "step": 14850 - }, - { - "epoch": 1.05, - "learning_rate": 1.9361768594457305e-05, - "loss": 0.7909, - "step": 14860 - }, - { - "epoch": 1.05, - "learning_rate": 1.93389011077501e-05, - "loss": 0.7928, - "step": 14870 - }, - { - "epoch": 1.05, - "learning_rate": 1.9316034486213604e-05, - "loss": 0.7941, - "step": 14880 - }, - { - "epoch": 1.05, - "learning_rate": 1.9293168759773015e-05, - "loss": 0.7874, - "step": 14890 - }, - { - "epoch": 1.05, - "learning_rate": 1.9270303958352398e-05, - "loss": 0.7867, - "step": 14900 - }, - { - "epoch": 1.05, - "learning_rate": 1.9247440111874588e-05, - "loss": 0.7871, - "step": 14910 - }, - { - "epoch": 1.05, - "learning_rate": 1.922457725026118e-05, - "loss": 0.7918, - "step": 14920 - }, - { - "epoch": 1.05, - "learning_rate": 1.920171540343247e-05, - "loss": 0.7964, - "step": 14930 - }, - { - "epoch": 1.06, - "learning_rate": 1.9178854601307433e-05, - "loss": 0.7904, - "step": 14940 - }, - { - "epoch": 1.06, - "learning_rate": 1.915599487380369e-05, - "loss": 0.7914, - "step": 14950 - }, - { - "epoch": 1.06, - "learning_rate": 1.913313625083741e-05, - "loss": 0.7967, - "step": 14960 - }, - { - "epoch": 1.06, - "learning_rate": 1.9110278762323366e-05, - "loss": 0.8, - "step": 14970 - }, - { - "epoch": 1.06, - "learning_rate": 1.9087422438174822e-05, - "loss": 0.7965, - "step": 14980 - }, - { - "epoch": 1.06, - "learning_rate": 1.9064567308303527e-05, - "loss": 0.7912, - "step": 14990 - }, - { - "epoch": 1.06, - "learning_rate": 1.9041713402619662e-05, - "loss": 0.7936, - "step": 15000 - }, - { - "epoch": 1.06, - "learning_rate": 1.901886075103181e-05, - "loss": 0.791, - "step": 15010 - }, - { - "epoch": 1.06, - "learning_rate": 1.899600938344691e-05, - "loss": 0.7937, - "step": 15020 - }, - { - "epoch": 1.06, - "learning_rate": 1.8973159329770226e-05, - "loss": 0.797, - "step": 15030 - }, - { - "epoch": 1.06, - "learning_rate": 1.8950310619905282e-05, - "loss": 0.7926, - "step": 15040 - }, - { - "epoch": 1.06, - "learning_rate": 1.8927463283753872e-05, - "loss": 0.7952, - "step": 15050 - }, - { - "epoch": 1.06, - "learning_rate": 1.890461735121597e-05, - "loss": 0.7869, - "step": 15060 - }, - { - "epoch": 1.06, - "learning_rate": 1.888177285218973e-05, - "loss": 0.7847, - "step": 15070 - }, - { - "epoch": 1.07, - "learning_rate": 1.885892981657142e-05, - "loss": 0.7882, - "step": 15080 - }, - { - "epoch": 1.07, - "learning_rate": 1.8836088274255395e-05, - "loss": 0.7913, - "step": 15090 - }, - { - "epoch": 1.07, - "learning_rate": 1.8813248255134055e-05, - "loss": 0.7918, - "step": 15100 - }, - { - "epoch": 1.07, - "learning_rate": 1.8790409789097815e-05, - "loss": 0.7948, - "step": 15110 - }, - { - "epoch": 1.07, - "learning_rate": 1.8767572906035036e-05, - "loss": 0.7938, - "step": 15120 - }, - { - "epoch": 1.07, - "learning_rate": 1.8744737635832036e-05, - "loss": 0.7946, - "step": 15130 - }, - { - "epoch": 1.07, - "learning_rate": 1.8721904008372994e-05, - "loss": 0.7965, - "step": 15140 - }, - { - "epoch": 1.07, - "learning_rate": 1.8699072053539962e-05, - "loss": 0.7981, - "step": 15150 - }, - { - "epoch": 1.07, - "learning_rate": 1.8676241801212786e-05, - "loss": 0.8019, - "step": 15160 - }, - { - "epoch": 1.07, - "learning_rate": 1.86534132812691e-05, - "loss": 0.7994, - "step": 15170 - }, - { - "epoch": 1.07, - "learning_rate": 1.863058652358426e-05, - "loss": 0.7903, - "step": 15180 - }, - { - "epoch": 1.07, - "learning_rate": 1.8607761558031325e-05, - "loss": 0.7847, - "step": 15190 - }, - { - "epoch": 1.07, - "learning_rate": 1.8584938414480983e-05, - "loss": 0.7934, - "step": 15200 - }, - { - "epoch": 1.07, - "learning_rate": 1.8562117122801576e-05, - "loss": 0.8028, - "step": 15210 - }, - { - "epoch": 1.08, - "learning_rate": 1.853929771285899e-05, - "loss": 0.7888, - "step": 15220 - }, - { - "epoch": 1.08, - "learning_rate": 1.8516480214516674e-05, - "loss": 0.7946, - "step": 15230 - }, - { - "epoch": 1.08, - "learning_rate": 1.8493664657635554e-05, - "loss": 0.7962, - "step": 15240 - }, - { - "epoch": 1.08, - "learning_rate": 1.8470851072074026e-05, - "loss": 0.7919, - "step": 15250 - }, - { - "epoch": 1.08, - "learning_rate": 1.8448039487687907e-05, - "loss": 0.7944, - "step": 15260 - }, - { - "epoch": 1.08, - "learning_rate": 1.8425229934330386e-05, - "loss": 0.7953, - "step": 15270 - }, - { - "epoch": 1.08, - "learning_rate": 1.8402422441852005e-05, - "loss": 0.7964, - "step": 15280 - }, - { - "epoch": 1.08, - "learning_rate": 1.8379617040100602e-05, - "loss": 0.7941, - "step": 15290 - }, - { - "epoch": 1.08, - "learning_rate": 1.8356813758921282e-05, - "loss": 0.7925, - "step": 15300 - }, - { - "epoch": 1.08, - "learning_rate": 1.8334012628156378e-05, - "loss": 0.7921, - "step": 15310 - }, - { - "epoch": 1.08, - "learning_rate": 1.83112136776454e-05, - "loss": 0.7871, - "step": 15320 - }, - { - "epoch": 1.08, - "learning_rate": 1.8288416937225015e-05, - "loss": 0.7935, - "step": 15330 - }, - { - "epoch": 1.08, - "learning_rate": 1.826562243672899e-05, - "loss": 0.798, - "step": 15340 - }, - { - "epoch": 1.08, - "learning_rate": 1.8242830205988157e-05, - "loss": 0.7912, - "step": 15350 - }, - { - "epoch": 1.08, - "learning_rate": 1.8220040274830385e-05, - "loss": 0.7991, - "step": 15360 - }, - { - "epoch": 1.09, - "learning_rate": 1.819725267308054e-05, - "loss": 0.7856, - "step": 15370 - }, - { - "epoch": 1.09, - "learning_rate": 1.8174467430560423e-05, - "loss": 0.8025, - "step": 15380 - }, - { - "epoch": 1.09, - "learning_rate": 1.8151684577088762e-05, - "loss": 0.8004, - "step": 15390 - }, - { - "epoch": 1.09, - "learning_rate": 1.812890414248115e-05, - "loss": 0.7877, - "step": 15400 - }, - { - "epoch": 1.09, - "learning_rate": 1.8106126156550016e-05, - "loss": 0.7864, - "step": 15410 - }, - { - "epoch": 1.09, - "learning_rate": 1.8083350649104597e-05, - "loss": 0.7816, - "step": 15420 - }, - { - "epoch": 1.09, - "learning_rate": 1.8060577649950856e-05, - "loss": 0.7897, - "step": 15430 - }, - { - "epoch": 1.09, - "learning_rate": 1.80378071888915e-05, - "loss": 0.7888, - "step": 15440 - }, - { - "epoch": 1.09, - "learning_rate": 1.801503929572591e-05, - "loss": 0.7932, - "step": 15450 - }, - { - "epoch": 1.09, - "learning_rate": 1.7992274000250098e-05, - "loss": 0.7967, - "step": 15460 - }, - { - "epoch": 1.09, - "learning_rate": 1.7969511332256688e-05, - "loss": 0.7956, - "step": 15470 - }, - { - "epoch": 1.09, - "learning_rate": 1.7946751321534857e-05, - "loss": 0.7927, - "step": 15480 - }, - { - "epoch": 1.09, - "learning_rate": 1.7923993997870312e-05, - "loss": 0.7871, - "step": 15490 - }, - { - "epoch": 1.09, - "learning_rate": 1.7901239391045226e-05, - "loss": 0.7951, - "step": 15500 - }, - { - "epoch": 1.1, - "learning_rate": 1.7878487530838234e-05, - "loss": 0.7905, - "step": 15510 - }, - { - "epoch": 1.1, - "learning_rate": 1.7855738447024372e-05, - "loss": 0.7808, - "step": 15520 - }, - { - "epoch": 1.1, - "learning_rate": 1.783299216937504e-05, - "loss": 0.7829, - "step": 15530 - }, - { - "epoch": 1.1, - "learning_rate": 1.781024872765797e-05, - "loss": 0.7944, - "step": 15540 - }, - { - "epoch": 1.1, - "learning_rate": 1.778750815163718e-05, - "loss": 0.7938, - "step": 15550 - }, - { - "epoch": 1.1, - "learning_rate": 1.7764770471072936e-05, - "loss": 0.7893, - "step": 15560 - }, - { - "epoch": 1.1, - "learning_rate": 1.7742035715721725e-05, - "loss": 0.7819, - "step": 15570 - }, - { - "epoch": 1.1, - "learning_rate": 1.771930391533618e-05, - "loss": 0.7923, - "step": 15580 - }, - { - "epoch": 1.1, - "learning_rate": 1.7696575099665096e-05, - "loss": 0.7904, - "step": 15590 - }, - { - "epoch": 1.1, - "learning_rate": 1.7673849298453347e-05, - "loss": 0.7904, - "step": 15600 - }, - { - "epoch": 1.1, - "learning_rate": 1.7651126541441866e-05, - "loss": 0.7931, - "step": 15610 - }, - { - "epoch": 1.1, - "learning_rate": 1.76284068583676e-05, - "loss": 0.7949, - "step": 15620 - }, - { - "epoch": 1.1, - "learning_rate": 1.7605690278963473e-05, - "loss": 0.7868, - "step": 15630 - }, - { - "epoch": 1.1, - "learning_rate": 1.7582976832958355e-05, - "loss": 0.7861, - "step": 15640 - }, - { - "epoch": 1.11, - "learning_rate": 1.7560266550077015e-05, - "loss": 0.7889, - "step": 15650 - }, - { - "epoch": 1.11, - "learning_rate": 1.7537559460040054e-05, - "loss": 0.7905, - "step": 15660 - }, - { - "epoch": 1.11, - "learning_rate": 1.7514855592563932e-05, - "loss": 0.7918, - "step": 15670 - }, - { - "epoch": 1.11, - "learning_rate": 1.7492154977360875e-05, - "loss": 0.7877, - "step": 15680 - }, - { - "epoch": 1.11, - "learning_rate": 1.746945764413885e-05, - "loss": 0.7881, - "step": 15690 - }, - { - "epoch": 1.11, - "learning_rate": 1.744676362260154e-05, - "loss": 0.7921, - "step": 15700 - }, - { - "epoch": 1.11, - "learning_rate": 1.7424072942448286e-05, - "loss": 0.7937, - "step": 15710 - }, - { - "epoch": 1.11, - "learning_rate": 1.7401385633374065e-05, - "loss": 0.7783, - "step": 15720 - }, - { - "epoch": 1.11, - "learning_rate": 1.7378701725069418e-05, - "loss": 0.7909, - "step": 15730 - }, - { - "epoch": 1.11, - "learning_rate": 1.7356021247220462e-05, - "loss": 0.7827, - "step": 15740 - }, - { - "epoch": 1.11, - "learning_rate": 1.7333344229508814e-05, - "loss": 0.7882, - "step": 15750 - }, - { - "epoch": 1.11, - "learning_rate": 1.7310670701611564e-05, - "loss": 0.7846, - "step": 15760 - }, - { - "epoch": 1.11, - "learning_rate": 1.7288000693201235e-05, - "loss": 0.7869, - "step": 15770 - }, - { - "epoch": 1.11, - "learning_rate": 1.726533423394574e-05, - "loss": 0.7937, - "step": 15780 - }, - { - "epoch": 1.12, - "learning_rate": 1.724267135350836e-05, - "loss": 0.7895, - "step": 15790 - }, - { - "epoch": 1.12, - "learning_rate": 1.7220012081547688e-05, - "loss": 0.7871, - "step": 15800 - }, - { - "epoch": 1.12, - "learning_rate": 1.719735644771757e-05, - "loss": 0.7858, - "step": 15810 - }, - { - "epoch": 1.12, - "learning_rate": 1.7174704481667123e-05, - "loss": 0.7838, - "step": 15820 - }, - { - "epoch": 1.12, - "learning_rate": 1.715205621304065e-05, - "loss": 0.7846, - "step": 15830 - }, - { - "epoch": 1.12, - "learning_rate": 1.712941167147762e-05, - "loss": 0.7829, - "step": 15840 - }, - { - "epoch": 1.12, - "learning_rate": 1.7106770886612616e-05, - "loss": 0.7894, - "step": 15850 - }, - { - "epoch": 1.12, - "learning_rate": 1.7084133888075317e-05, - "loss": 0.7889, - "step": 15860 - }, - { - "epoch": 1.12, - "learning_rate": 1.7061500705490438e-05, - "loss": 0.7868, - "step": 15870 - }, - { - "epoch": 1.12, - "learning_rate": 1.7038871368477707e-05, - "loss": 0.7835, - "step": 15880 - }, - { - "epoch": 1.12, - "learning_rate": 1.7016245906651806e-05, - "loss": 0.7901, - "step": 15890 - }, - { - "epoch": 1.12, - "learning_rate": 1.6993624349622365e-05, - "loss": 0.7892, - "step": 15900 - }, - { - "epoch": 1.12, - "learning_rate": 1.6971006726993878e-05, - "loss": 0.7878, - "step": 15910 - }, - { - "epoch": 1.12, - "learning_rate": 1.694839306836572e-05, - "loss": 0.7886, - "step": 15920 - }, - { - "epoch": 1.13, - "learning_rate": 1.692578340333205e-05, - "loss": 0.7834, - "step": 15930 - }, - { - "epoch": 1.13, - "learning_rate": 1.690317776148183e-05, - "loss": 0.7907, - "step": 15940 - }, - { - "epoch": 1.13, - "learning_rate": 1.6880576172398733e-05, - "loss": 0.7897, - "step": 15950 - }, - { - "epoch": 1.13, - "learning_rate": 1.6857978665661137e-05, - "loss": 0.7907, - "step": 15960 - }, - { - "epoch": 1.13, - "learning_rate": 1.6835385270842075e-05, - "loss": 0.7827, - "step": 15970 - }, - { - "epoch": 1.13, - "learning_rate": 1.6812796017509203e-05, - "loss": 0.7916, - "step": 15980 - }, - { - "epoch": 1.13, - "learning_rate": 1.6790210935224752e-05, - "loss": 0.7851, - "step": 15990 - }, - { - "epoch": 1.13, - "learning_rate": 1.676763005354551e-05, - "loss": 0.7883, - "step": 16000 - }, - { - "epoch": 1.13, - "learning_rate": 1.6745053402022736e-05, - "loss": 0.7907, - "step": 16010 - }, - { - "epoch": 1.13, - "learning_rate": 1.6722481010202182e-05, - "loss": 0.7848, - "step": 16020 - }, - { - "epoch": 1.13, - "learning_rate": 1.6699912907624018e-05, - "loss": 0.7879, - "step": 16030 - }, - { - "epoch": 1.13, - "learning_rate": 1.667734912382279e-05, - "loss": 0.7878, - "step": 16040 - }, - { - "epoch": 1.13, - "learning_rate": 1.665478968832741e-05, - "loss": 0.7869, - "step": 16050 - }, - { - "epoch": 1.13, - "learning_rate": 1.663223463066108e-05, - "loss": 0.7861, - "step": 16060 - }, - { - "epoch": 1.14, - "learning_rate": 1.6609683980341288e-05, - "loss": 0.7858, - "step": 16070 - }, - { - "epoch": 1.14, - "learning_rate": 1.6587137766879748e-05, - "loss": 0.7858, - "step": 16080 - }, - { - "epoch": 1.14, - "learning_rate": 1.6564596019782373e-05, - "loss": 0.795, - "step": 16090 - }, - { - "epoch": 1.14, - "learning_rate": 1.6542058768549216e-05, - "loss": 0.7886, - "step": 16100 - }, - { - "epoch": 1.14, - "learning_rate": 1.6519526042674466e-05, - "loss": 0.7834, - "step": 16110 - }, - { - "epoch": 1.14, - "learning_rate": 1.649699787164637e-05, - "loss": 0.7832, - "step": 16120 - }, - { - "epoch": 1.14, - "learning_rate": 1.6474474284947232e-05, - "loss": 0.7863, - "step": 16130 - }, - { - "epoch": 1.14, - "learning_rate": 1.6451955312053346e-05, - "loss": 0.7867, - "step": 16140 - }, - { - "epoch": 1.14, - "learning_rate": 1.642944098243497e-05, - "loss": 0.7882, - "step": 16150 - }, - { - "epoch": 1.14, - "learning_rate": 1.640693132555629e-05, - "loss": 0.7855, - "step": 16160 - }, - { - "epoch": 1.14, - "learning_rate": 1.638442637087537e-05, - "loss": 0.7836, - "step": 16170 - }, - { - "epoch": 1.14, - "learning_rate": 1.6361926147844137e-05, - "loss": 0.7834, - "step": 16180 - }, - { - "epoch": 1.14, - "learning_rate": 1.6339430685908287e-05, - "loss": 0.7902, - "step": 16190 - }, - { - "epoch": 1.14, - "learning_rate": 1.6316940014507325e-05, - "loss": 0.7874, - "step": 16200 - }, - { - "epoch": 1.15, - "learning_rate": 1.629445416307447e-05, - "loss": 0.7877, - "step": 16210 - }, - { - "epoch": 1.15, - "learning_rate": 1.6271973161036636e-05, - "loss": 0.7844, - "step": 16220 - }, - { - "epoch": 1.15, - "learning_rate": 1.624949703781439e-05, - "loss": 0.7867, - "step": 16230 - }, - { - "epoch": 1.15, - "learning_rate": 1.622702582282191e-05, - "loss": 0.7911, - "step": 16240 - }, - { - "epoch": 1.15, - "learning_rate": 1.6204559545466963e-05, - "loss": 0.7912, - "step": 16250 - }, - { - "epoch": 1.15, - "learning_rate": 1.6182098235150847e-05, - "loss": 0.7861, - "step": 16260 - }, - { - "epoch": 1.15, - "learning_rate": 1.6159641921268347e-05, - "loss": 0.786, - "step": 16270 - }, - { - "epoch": 1.15, - "learning_rate": 1.613719063320772e-05, - "loss": 0.7801, - "step": 16280 - }, - { - "epoch": 1.15, - "learning_rate": 1.611474440035066e-05, - "loss": 0.7904, - "step": 16290 - }, - { - "epoch": 1.15, - "learning_rate": 1.6092303252072224e-05, - "loss": 0.781, - "step": 16300 - }, - { - "epoch": 1.15, - "learning_rate": 1.6069867217740824e-05, - "loss": 0.791, - "step": 16310 - }, - { - "epoch": 1.15, - "learning_rate": 1.604743632671818e-05, - "loss": 0.7813, - "step": 16320 - }, - { - "epoch": 1.15, - "learning_rate": 1.6025010608359277e-05, - "loss": 0.7837, - "step": 16330 - }, - { - "epoch": 1.15, - "learning_rate": 1.6002590092012343e-05, - "loss": 0.7894, - "step": 16340 - }, - { - "epoch": 1.15, - "learning_rate": 1.598017480701877e-05, - "loss": 0.7827, - "step": 16350 - }, - { - "epoch": 1.16, - "learning_rate": 1.595776478271313e-05, - "loss": 0.7891, - "step": 16360 - }, - { - "epoch": 1.16, - "learning_rate": 1.593536004842311e-05, - "loss": 0.7884, - "step": 16370 - }, - { - "epoch": 1.16, - "learning_rate": 1.5912960633469455e-05, - "loss": 0.7927, - "step": 16380 - }, - { - "epoch": 1.16, - "learning_rate": 1.5890566567165967e-05, - "loss": 0.7881, - "step": 16390 - }, - { - "epoch": 1.16, - "learning_rate": 1.5868177878819436e-05, - "loss": 0.7984, - "step": 16400 - }, - { - "epoch": 1.16, - "learning_rate": 1.5845794597729636e-05, - "loss": 0.8095, - "step": 16410 - }, - { - "epoch": 1.16, - "learning_rate": 1.5823416753189224e-05, - "loss": 0.7979, - "step": 16420 - }, - { - "epoch": 1.16, - "learning_rate": 1.5801044374483776e-05, - "loss": 0.7975, - "step": 16430 - }, - { - "epoch": 1.16, - "learning_rate": 1.5778677490891706e-05, - "loss": 0.7938, - "step": 16440 - }, - { - "epoch": 1.16, - "learning_rate": 1.575631613168424e-05, - "loss": 0.7905, - "step": 16450 - }, - { - "epoch": 1.16, - "learning_rate": 1.5733960326125363e-05, - "loss": 0.7916, - "step": 16460 - }, - { - "epoch": 1.16, - "learning_rate": 1.57116101034718e-05, - "loss": 0.7958, - "step": 16470 - }, - { - "epoch": 1.16, - "learning_rate": 1.5689265492972975e-05, - "loss": 0.7927, - "step": 16480 - }, - { - "epoch": 1.16, - "learning_rate": 1.5666926523870964e-05, - "loss": 0.7879, - "step": 16490 - }, - { - "epoch": 1.17, - "learning_rate": 1.5644593225400445e-05, - "loss": 0.7854, - "step": 16500 - }, - { - "epoch": 1.17, - "learning_rate": 1.562226562678869e-05, - "loss": 0.7952, - "step": 16510 - }, - { - "epoch": 1.17, - "learning_rate": 1.559994375725551e-05, - "loss": 0.7925, - "step": 16520 - }, - { - "epoch": 1.17, - "learning_rate": 1.5577627646013223e-05, - "loss": 0.7793, - "step": 16530 - }, - { - "epoch": 1.17, - "learning_rate": 1.55553173222666e-05, - "loss": 0.7753, - "step": 16540 - }, - { - "epoch": 1.17, - "learning_rate": 1.5533012815212848e-05, - "loss": 0.7837, - "step": 16550 - }, - { - "epoch": 1.17, - "learning_rate": 1.5510714154041553e-05, - "loss": 0.7857, - "step": 16560 - }, - { - "epoch": 1.17, - "learning_rate": 1.5488421367934666e-05, - "loss": 0.7915, - "step": 16570 - }, - { - "epoch": 1.17, - "learning_rate": 1.5466134486066425e-05, - "loss": 0.7869, - "step": 16580 - }, - { - "epoch": 1.17, - "learning_rate": 1.5443853537603356e-05, - "loss": 0.789, - "step": 16590 - }, - { - "epoch": 1.17, - "learning_rate": 1.5421578551704222e-05, - "loss": 0.7842, - "step": 16600 - }, - { - "epoch": 1.17, - "learning_rate": 1.539930955751998e-05, - "loss": 0.7865, - "step": 16610 - }, - { - "epoch": 1.17, - "learning_rate": 1.537704658419375e-05, - "loss": 0.7831, - "step": 16620 - }, - { - "epoch": 1.17, - "learning_rate": 1.5354789660860758e-05, - "loss": 0.7936, - "step": 16630 - }, - { - "epoch": 1.18, - "learning_rate": 1.5332538816648327e-05, - "loss": 0.7854, - "step": 16640 - }, - { - "epoch": 1.18, - "learning_rate": 1.5310294080675828e-05, - "loss": 0.7841, - "step": 16650 - }, - { - "epoch": 1.18, - "learning_rate": 1.528805548205462e-05, - "loss": 0.7887, - "step": 16660 - }, - { - "epoch": 1.18, - "learning_rate": 1.5265823049888037e-05, - "loss": 0.7822, - "step": 16670 - }, - { - "epoch": 1.18, - "learning_rate": 1.5243596813271349e-05, - "loss": 0.7826, - "step": 16680 - }, - { - "epoch": 1.18, - "learning_rate": 1.5221376801291719e-05, - "loss": 0.7818, - "step": 16690 - }, - { - "epoch": 1.18, - "learning_rate": 1.5199163043028158e-05, - "loss": 0.7868, - "step": 16700 - }, - { - "epoch": 1.18, - "learning_rate": 1.5176955567551495e-05, - "loss": 0.788, - "step": 16710 - }, - { - "epoch": 1.18, - "learning_rate": 1.515475440392434e-05, - "loss": 0.7819, - "step": 16720 - }, - { - "epoch": 1.18, - "learning_rate": 1.5132559581201031e-05, - "loss": 0.7826, - "step": 16730 - }, - { - "epoch": 1.18, - "learning_rate": 1.5110371128427623e-05, - "loss": 0.7797, - "step": 16740 - }, - { - "epoch": 1.18, - "learning_rate": 1.5088189074641826e-05, - "loss": 0.7831, - "step": 16750 - }, - { - "epoch": 1.18, - "learning_rate": 1.5066013448872981e-05, - "loss": 0.7848, - "step": 16760 - }, - { - "epoch": 1.18, - "learning_rate": 1.5043844280142005e-05, - "loss": 0.777, - "step": 16770 - }, - { - "epoch": 1.19, - "learning_rate": 1.502168159746138e-05, - "loss": 0.786, - "step": 16780 - }, - { - "epoch": 1.19, - "learning_rate": 1.499952542983509e-05, - "loss": 0.7905, - "step": 16790 - }, - { - "epoch": 1.19, - "learning_rate": 1.4977375806258599e-05, - "loss": 0.7827, - "step": 16800 - }, - { - "epoch": 1.19, - "learning_rate": 1.4955232755718795e-05, - "loss": 0.7797, - "step": 16810 - }, - { - "epoch": 1.19, - "learning_rate": 1.4933096307193986e-05, - "loss": 0.7855, - "step": 16820 - }, - { - "epoch": 1.19, - "learning_rate": 1.4910966489653814e-05, - "loss": 0.7775, - "step": 16830 - }, - { - "epoch": 1.19, - "learning_rate": 1.4888843332059267e-05, - "loss": 0.7747, - "step": 16840 - }, - { - "epoch": 1.19, - "learning_rate": 1.4866726863362595e-05, - "loss": 0.7908, - "step": 16850 - }, - { - "epoch": 1.19, - "learning_rate": 1.4844617112507317e-05, - "loss": 0.7838, - "step": 16860 - }, - { - "epoch": 1.19, - "learning_rate": 1.482251410842814e-05, - "loss": 0.7815, - "step": 16870 - }, - { - "epoch": 1.19, - "learning_rate": 1.4800417880050955e-05, - "loss": 0.777, - "step": 16880 - }, - { - "epoch": 1.19, - "learning_rate": 1.4778328456292776e-05, - "loss": 0.7814, - "step": 16890 - }, - { - "epoch": 1.19, - "learning_rate": 1.475624586606172e-05, - "loss": 0.7774, - "step": 16900 - }, - { - "epoch": 1.19, - "learning_rate": 1.4734170138256958e-05, - "loss": 0.776, - "step": 16910 - }, - { - "epoch": 1.2, - "learning_rate": 1.4712101301768681e-05, - "loss": 0.7724, - "step": 16920 - }, - { - "epoch": 1.2, - "learning_rate": 1.469003938547806e-05, - "loss": 0.7892, - "step": 16930 - }, - { - "epoch": 1.2, - "learning_rate": 1.4667984418257211e-05, - "loss": 0.7826, - "step": 16940 - }, - { - "epoch": 1.2, - "learning_rate": 1.4645936428969165e-05, - "loss": 0.7769, - "step": 16950 - }, - { - "epoch": 1.2, - "learning_rate": 1.4623895446467789e-05, - "loss": 0.7726, - "step": 16960 - }, - { - "epoch": 1.2, - "learning_rate": 1.4601861499597818e-05, - "loss": 0.7739, - "step": 16970 - }, - { - "epoch": 1.2, - "learning_rate": 1.4579834617194762e-05, - "loss": 0.7842, - "step": 16980 - }, - { - "epoch": 1.2, - "learning_rate": 1.4557814828084884e-05, - "loss": 0.78, - "step": 16990 - }, - { - "epoch": 1.2, - "learning_rate": 1.4535802161085175e-05, - "loss": 0.7809, - "step": 17000 - }, - { - "epoch": 1.2, - "learning_rate": 1.4513796645003293e-05, - "loss": 0.7807, - "step": 17010 - }, - { - "epoch": 1.2, - "learning_rate": 1.4491798308637544e-05, - "loss": 0.7799, - "step": 17020 - }, - { - "epoch": 1.2, - "learning_rate": 1.4469807180776849e-05, - "loss": 0.7815, - "step": 17030 - }, - { - "epoch": 1.2, - "learning_rate": 1.4447823290200664e-05, - "loss": 0.7719, - "step": 17040 - }, - { - "epoch": 1.2, - "learning_rate": 1.4425846665679001e-05, - "loss": 0.7802, - "step": 17050 - }, - { - "epoch": 1.21, - "learning_rate": 1.4403877335972359e-05, - "loss": 0.7795, - "step": 17060 - }, - { - "epoch": 1.21, - "learning_rate": 1.4381915329831682e-05, - "loss": 0.7861, - "step": 17070 - }, - { - "epoch": 1.21, - "learning_rate": 1.4359960675998337e-05, - "loss": 0.7819, - "step": 17080 - }, - { - "epoch": 1.21, - "learning_rate": 1.4338013403204067e-05, - "loss": 0.7751, - "step": 17090 - }, - { - "epoch": 1.21, - "learning_rate": 1.4316073540170953e-05, - "loss": 0.788, - "step": 17100 - }, - { - "epoch": 1.21, - "learning_rate": 1.4294141115611393e-05, - "loss": 0.7822, - "step": 17110 - }, - { - "epoch": 1.21, - "learning_rate": 1.4272216158228017e-05, - "loss": 0.7749, - "step": 17120 - }, - { - "epoch": 1.21, - "learning_rate": 1.4250298696713716e-05, - "loss": 0.7748, - "step": 17130 - }, - { - "epoch": 1.21, - "learning_rate": 1.4228388759751557e-05, - "loss": 0.7839, - "step": 17140 - }, - { - "epoch": 1.21, - "learning_rate": 1.4206486376014765e-05, - "loss": 0.7822, - "step": 17150 - }, - { - "epoch": 1.21, - "learning_rate": 1.4184591574166677e-05, - "loss": 0.787, - "step": 17160 - }, - { - "epoch": 1.21, - "learning_rate": 1.4162704382860707e-05, - "loss": 0.7797, - "step": 17170 - }, - { - "epoch": 1.21, - "learning_rate": 1.4140824830740317e-05, - "loss": 0.7831, - "step": 17180 - }, - { - "epoch": 1.21, - "learning_rate": 1.411895294643895e-05, - "loss": 0.7822, - "step": 17190 - }, - { - "epoch": 1.21, - "learning_rate": 1.4097088758580037e-05, - "loss": 0.7835, - "step": 17200 - }, - { - "epoch": 1.22, - "learning_rate": 1.4075232295776932e-05, - "loss": 0.7766, - "step": 17210 - }, - { - "epoch": 1.22, - "learning_rate": 1.4053383586632871e-05, - "loss": 0.7819, - "step": 17220 - }, - { - "epoch": 1.22, - "learning_rate": 1.4031542659740955e-05, - "loss": 0.7788, - "step": 17230 - }, - { - "epoch": 1.22, - "learning_rate": 1.400970954368409e-05, - "loss": 0.7751, - "step": 17240 - }, - { - "epoch": 1.22, - "learning_rate": 1.3987884267034961e-05, - "loss": 0.7889, - "step": 17250 - }, - { - "epoch": 1.22, - "learning_rate": 1.396606685835601e-05, - "loss": 0.7736, - "step": 17260 - }, - { - "epoch": 1.22, - "learning_rate": 1.3944257346199347e-05, - "loss": 0.7837, - "step": 17270 - }, - { - "epoch": 1.22, - "learning_rate": 1.3922455759106783e-05, - "loss": 0.7826, - "step": 17280 - }, - { - "epoch": 1.22, - "learning_rate": 1.3900662125609738e-05, - "loss": 0.7753, - "step": 17290 - }, - { - "epoch": 1.22, - "learning_rate": 1.3878876474229239e-05, - "loss": 0.7858, - "step": 17300 - }, - { - "epoch": 1.22, - "learning_rate": 1.3857098833475849e-05, - "loss": 0.7784, - "step": 17310 - }, - { - "epoch": 1.22, - "learning_rate": 1.383532923184966e-05, - "loss": 0.7766, - "step": 17320 - }, - { - "epoch": 1.22, - "learning_rate": 1.3813567697840237e-05, - "loss": 0.7798, - "step": 17330 - }, - { - "epoch": 1.22, - "learning_rate": 1.37918142599266e-05, - "loss": 0.7743, - "step": 17340 - }, - { - "epoch": 1.23, - "learning_rate": 1.3770068946577147e-05, - "loss": 0.7714, - "step": 17350 - }, - { - "epoch": 1.23, - "learning_rate": 1.3748331786249665e-05, - "loss": 0.7757, - "step": 17360 - }, - { - "epoch": 1.23, - "learning_rate": 1.3726602807391267e-05, - "loss": 0.7759, - "step": 17370 - }, - { - "epoch": 1.23, - "learning_rate": 1.3704882038438361e-05, - "loss": 0.7728, - "step": 17380 - }, - { - "epoch": 1.23, - "learning_rate": 1.3683169507816603e-05, - "loss": 0.7767, - "step": 17390 - }, - { - "epoch": 1.23, - "learning_rate": 1.3661465243940875e-05, - "loss": 0.7778, - "step": 17400 - }, - { - "epoch": 1.23, - "learning_rate": 1.3639769275215238e-05, - "loss": 0.7761, - "step": 17410 - }, - { - "epoch": 1.23, - "learning_rate": 1.3618081630032895e-05, - "loss": 0.7779, - "step": 17420 - }, - { - "epoch": 1.23, - "learning_rate": 1.3596402336776164e-05, - "loss": 0.7819, - "step": 17430 - }, - { - "epoch": 1.23, - "learning_rate": 1.3574731423816419e-05, - "loss": 0.7729, - "step": 17440 - }, - { - "epoch": 1.23, - "learning_rate": 1.3553068919514076e-05, - "loss": 0.7782, - "step": 17450 - }, - { - "epoch": 1.23, - "learning_rate": 1.353141485221855e-05, - "loss": 0.7793, - "step": 17460 - }, - { - "epoch": 1.23, - "learning_rate": 1.3509769250268208e-05, - "loss": 0.779, - "step": 17470 - }, - { - "epoch": 1.23, - "learning_rate": 1.348813214199034e-05, - "loss": 0.7814, - "step": 17480 - }, - { - "epoch": 1.24, - "learning_rate": 1.3466503555701126e-05, - "loss": 0.7801, - "step": 17490 - }, - { - "epoch": 1.24, - "learning_rate": 1.3444883519705583e-05, - "loss": 0.7762, - "step": 17500 - }, - { - "epoch": 1.24, - "learning_rate": 1.3423272062297543e-05, - "loss": 0.7824, - "step": 17510 - }, - { - "epoch": 1.24, - "learning_rate": 1.3401669211759615e-05, - "loss": 0.7857, - "step": 17520 - }, - { - "epoch": 1.24, - "learning_rate": 1.3380074996363148e-05, - "loss": 0.7791, - "step": 17530 - }, - { - "epoch": 1.24, - "learning_rate": 1.3358489444368173e-05, - "loss": 0.7753, - "step": 17540 - }, - { - "epoch": 1.24, - "learning_rate": 1.3336912584023395e-05, - "loss": 0.776, - "step": 17550 - }, - { - "epoch": 1.24, - "learning_rate": 1.3315344443566152e-05, - "loss": 0.7805, - "step": 17560 - }, - { - "epoch": 1.24, - "learning_rate": 1.329378505122236e-05, - "loss": 0.779, - "step": 17570 - }, - { - "epoch": 1.24, - "learning_rate": 1.327223443520648e-05, - "loss": 0.7764, - "step": 17580 - }, - { - "epoch": 1.24, - "learning_rate": 1.3250692623721504e-05, - "loss": 0.7822, - "step": 17590 - }, - { - "epoch": 1.24, - "learning_rate": 1.322915964495889e-05, - "loss": 0.7789, - "step": 17600 - }, - { - "epoch": 1.24, - "learning_rate": 1.3207635527098543e-05, - "loss": 0.7817, - "step": 17610 - }, - { - "epoch": 1.24, - "learning_rate": 1.3186120298308767e-05, - "loss": 0.7808, - "step": 17620 - }, - { - "epoch": 1.25, - "learning_rate": 1.316461398674624e-05, - "loss": 0.7765, - "step": 17630 - }, - { - "epoch": 1.25, - "learning_rate": 1.3143116620555963e-05, - "loss": 0.7795, - "step": 17640 - }, - { - "epoch": 1.25, - "learning_rate": 1.3121628227871227e-05, - "loss": 0.7727, - "step": 17650 - }, - { - "epoch": 1.25, - "learning_rate": 1.3100148836813591e-05, - "loss": 0.7779, - "step": 17660 - }, - { - "epoch": 1.25, - "learning_rate": 1.3078678475492823e-05, - "loss": 0.7786, - "step": 17670 - }, - { - "epoch": 1.25, - "learning_rate": 1.3057217172006887e-05, - "loss": 0.7838, - "step": 17680 - }, - { - "epoch": 1.25, - "learning_rate": 1.3035764954441876e-05, - "loss": 0.778, - "step": 17690 - }, - { - "epoch": 1.25, - "learning_rate": 1.301432185087201e-05, - "loss": 0.7826, - "step": 17700 - }, - { - "epoch": 1.25, - "learning_rate": 1.2992887889359565e-05, - "loss": 0.7774, - "step": 17710 - }, - { - "epoch": 1.25, - "learning_rate": 1.2971463097954874e-05, - "loss": 0.7824, - "step": 17720 - }, - { - "epoch": 1.25, - "learning_rate": 1.295004750469624e-05, - "loss": 0.7718, - "step": 17730 - }, - { - "epoch": 1.25, - "learning_rate": 1.2928641137609952e-05, - "loss": 0.776, - "step": 17740 - }, - { - "epoch": 1.25, - "learning_rate": 1.2907244024710215e-05, - "loss": 0.783, - "step": 17750 - }, - { - "epoch": 1.25, - "learning_rate": 1.2885856193999128e-05, - "loss": 0.788, - "step": 17760 - }, - { - "epoch": 1.26, - "learning_rate": 1.286447767346664e-05, - "loss": 0.7777, - "step": 17770 - }, - { - "epoch": 1.26, - "learning_rate": 1.2843108491090517e-05, - "loss": 0.7823, - "step": 17780 - }, - { - "epoch": 1.26, - "learning_rate": 1.2821748674836303e-05, - "loss": 0.7774, - "step": 17790 - }, - { - "epoch": 1.26, - "learning_rate": 1.2800398252657292e-05, - "loss": 0.781, - "step": 17800 - }, - { - "epoch": 1.26, - "learning_rate": 1.277905725249446e-05, - "loss": 0.7709, - "step": 17810 - }, - { - "epoch": 1.26, - "learning_rate": 1.275772570227648e-05, - "loss": 0.774, - "step": 17820 - }, - { - "epoch": 1.26, - "learning_rate": 1.273640362991965e-05, - "loss": 0.7757, - "step": 17830 - }, - { - "epoch": 1.26, - "learning_rate": 1.2715091063327854e-05, - "loss": 0.7785, - "step": 17840 - }, - { - "epoch": 1.26, - "learning_rate": 1.2693788030392553e-05, - "loss": 0.7691, - "step": 17850 - }, - { - "epoch": 1.26, - "learning_rate": 1.2672494558992719e-05, - "loss": 0.7802, - "step": 17860 - }, - { - "epoch": 1.26, - "learning_rate": 1.2651210676994821e-05, - "loss": 0.7814, - "step": 17870 - }, - { - "epoch": 1.26, - "learning_rate": 1.262993641225276e-05, - "loss": 0.7712, - "step": 17880 - }, - { - "epoch": 1.26, - "learning_rate": 1.2608671792607866e-05, - "loss": 0.7732, - "step": 17890 - }, - { - "epoch": 1.26, - "learning_rate": 1.2587416845888844e-05, - "loss": 0.7754, - "step": 17900 - }, - { - "epoch": 1.27, - "learning_rate": 1.2566171599911748e-05, - "loss": 0.7761, - "step": 17910 - }, - { - "epoch": 1.27, - "learning_rate": 1.2544936082479917e-05, - "loss": 0.7728, - "step": 17920 - }, - { - "epoch": 1.27, - "learning_rate": 1.252371032138398e-05, - "loss": 0.7741, - "step": 17930 - }, - { - "epoch": 1.27, - "learning_rate": 1.2502494344401783e-05, - "loss": 0.7775, - "step": 17940 - }, - { - "epoch": 1.27, - "learning_rate": 1.2481288179298378e-05, - "loss": 0.7684, - "step": 17950 - }, - { - "epoch": 1.27, - "learning_rate": 1.2460091853825962e-05, - "loss": 0.7746, - "step": 17960 - }, - { - "epoch": 1.27, - "learning_rate": 1.2438905395723867e-05, - "loss": 0.7686, - "step": 17970 - }, - { - "epoch": 1.27, - "learning_rate": 1.241772883271851e-05, - "loss": 0.7773, - "step": 17980 - }, - { - "epoch": 1.27, - "learning_rate": 1.2396562192523356e-05, - "loss": 0.769, - "step": 17990 - }, - { - "epoch": 1.27, - "learning_rate": 1.2375405502838889e-05, - "loss": 0.7747, - "step": 18000 - }, - { - "epoch": 1.27, - "learning_rate": 1.2354258791352562e-05, - "loss": 0.7804, - "step": 18010 - }, - { - "epoch": 1.27, - "learning_rate": 1.233312208573878e-05, - "loss": 0.774, - "step": 18020 - }, - { - "epoch": 1.27, - "learning_rate": 1.2311995413658853e-05, - "loss": 0.7788, - "step": 18030 - }, - { - "epoch": 1.27, - "learning_rate": 1.2290878802760942e-05, - "loss": 0.775, - "step": 18040 - }, - { - "epoch": 1.27, - "learning_rate": 1.2269772280680058e-05, - "loss": 0.7803, - "step": 18050 - }, - { - "epoch": 1.28, - "learning_rate": 1.2248675875038015e-05, - "loss": 0.7619, - "step": 18060 - }, - { - "epoch": 1.28, - "learning_rate": 1.2227589613443367e-05, - "loss": 0.7816, - "step": 18070 - }, - { - "epoch": 1.28, - "learning_rate": 1.2206513523491413e-05, - "loss": 0.7729, - "step": 18080 - }, - { - "epoch": 1.28, - "learning_rate": 1.2185447632764124e-05, - "loss": 0.7765, - "step": 18090 - }, - { - "epoch": 1.28, - "learning_rate": 1.2164391968830138e-05, - "loss": 0.7675, - "step": 18100 - }, - { - "epoch": 1.28, - "learning_rate": 1.2143346559244705e-05, - "loss": 0.7817, - "step": 18110 - }, - { - "epoch": 1.28, - "learning_rate": 1.212231143154964e-05, - "loss": 0.7745, - "step": 18120 - }, - { - "epoch": 1.28, - "learning_rate": 1.2101286613273322e-05, - "loss": 0.7725, - "step": 18130 - }, - { - "epoch": 1.28, - "learning_rate": 1.2080272131930628e-05, - "loss": 0.7739, - "step": 18140 - }, - { - "epoch": 1.28, - "learning_rate": 1.2059268015022916e-05, - "loss": 0.7725, - "step": 18150 - }, - { - "epoch": 1.28, - "learning_rate": 1.2038274290037971e-05, - "loss": 0.763, - "step": 18160 - }, - { - "epoch": 1.28, - "learning_rate": 1.2017290984449979e-05, - "loss": 0.7796, - "step": 18170 - }, - { - "epoch": 1.28, - "learning_rate": 1.19963181257195e-05, - "loss": 0.7732, - "step": 18180 - }, - { - "epoch": 1.28, - "learning_rate": 1.1975355741293412e-05, - "loss": 0.7786, - "step": 18190 - }, - { - "epoch": 1.29, - "learning_rate": 1.1954403858604885e-05, - "loss": 0.7692, - "step": 18200 - }, - { - "epoch": 1.29, - "learning_rate": 1.193346250507335e-05, - "loss": 0.773, - "step": 18210 - }, - { - "epoch": 1.29, - "learning_rate": 1.191253170810446e-05, - "loss": 0.7784, - "step": 18220 - }, - { - "epoch": 1.29, - "learning_rate": 1.1891611495090051e-05, - "loss": 0.7644, - "step": 18230 - }, - { - "epoch": 1.29, - "learning_rate": 1.1870701893408105e-05, - "loss": 0.778, - "step": 18240 - }, - { - "epoch": 1.29, - "learning_rate": 1.1849802930422726e-05, - "loss": 0.771, - "step": 18250 - }, - { - "epoch": 1.29, - "learning_rate": 1.1828914633484087e-05, - "loss": 0.7774, - "step": 18260 - }, - { - "epoch": 1.29, - "learning_rate": 1.1808037029928397e-05, - "loss": 0.77, - "step": 18270 - }, - { - "epoch": 1.29, - "learning_rate": 1.1787170147077884e-05, - "loss": 0.769, - "step": 18280 - }, - { - "epoch": 1.29, - "learning_rate": 1.1766314012240745e-05, - "loss": 0.7719, - "step": 18290 - }, - { - "epoch": 1.29, - "learning_rate": 1.1745468652711094e-05, - "loss": 0.7754, - "step": 18300 - }, - { - "epoch": 1.29, - "learning_rate": 1.1724634095768965e-05, - "loss": 0.7764, - "step": 18310 - }, - { - "epoch": 1.29, - "learning_rate": 1.1703810368680244e-05, - "loss": 0.7751, - "step": 18320 - }, - { - "epoch": 1.29, - "learning_rate": 1.1682997498696645e-05, - "loss": 0.78, - "step": 18330 - }, - { - "epoch": 1.3, - "learning_rate": 1.1662195513055679e-05, - "loss": 0.7641, - "step": 18340 - }, - { - "epoch": 1.3, - "learning_rate": 1.1641404438980604e-05, - "loss": 0.7689, - "step": 18350 - }, - { - "epoch": 1.3, - "learning_rate": 1.1620624303680393e-05, - "loss": 0.7726, - "step": 18360 - }, - { - "epoch": 1.3, - "learning_rate": 1.1599855134349731e-05, - "loss": 0.7788, - "step": 18370 - }, - { - "epoch": 1.3, - "learning_rate": 1.1579096958168914e-05, - "loss": 0.7699, - "step": 18380 - }, - { - "epoch": 1.3, - "learning_rate": 1.1558349802303895e-05, - "loss": 0.7722, - "step": 18390 - }, - { - "epoch": 1.3, - "learning_rate": 1.1537613693906158e-05, - "loss": 0.7759, - "step": 18400 - }, - { - "epoch": 1.3, - "learning_rate": 1.151688866011278e-05, - "loss": 0.7749, - "step": 18410 - }, - { - "epoch": 1.3, - "learning_rate": 1.1496174728046281e-05, - "loss": 0.7713, - "step": 18420 - }, - { - "epoch": 1.3, - "learning_rate": 1.1475471924814717e-05, - "loss": 0.7802, - "step": 18430 - }, - { - "epoch": 1.3, - "learning_rate": 1.1454780277511537e-05, - "loss": 0.7752, - "step": 18440 - }, - { - "epoch": 1.3, - "learning_rate": 1.1434099813215616e-05, - "loss": 0.7708, - "step": 18450 - }, - { - "epoch": 1.3, - "learning_rate": 1.1413430558991169e-05, - "loss": 0.7765, - "step": 18460 - }, - { - "epoch": 1.3, - "learning_rate": 1.1392772541887772e-05, - "loss": 0.769, - "step": 18470 - }, - { - "epoch": 1.31, - "learning_rate": 1.1372125788940257e-05, - "loss": 0.7721, - "step": 18480 - }, - { - "epoch": 1.31, - "learning_rate": 1.1351490327168765e-05, - "loss": 0.7872, - "step": 18490 - }, - { - "epoch": 1.31, - "learning_rate": 1.1330866183578596e-05, - "loss": 0.7731, - "step": 18500 - }, - { - "epoch": 1.31, - "learning_rate": 1.1310253385160296e-05, - "loss": 0.7751, - "step": 18510 - }, - { - "epoch": 1.31, - "learning_rate": 1.1289651958889527e-05, - "loss": 0.7734, - "step": 18520 - }, - { - "epoch": 1.31, - "learning_rate": 1.1269061931727094e-05, - "loss": 0.7737, - "step": 18530 - }, - { - "epoch": 1.31, - "learning_rate": 1.1248483330618858e-05, - "loss": 0.779, - "step": 18540 - }, - { - "epoch": 1.31, - "learning_rate": 1.122791618249576e-05, - "loss": 0.7709, - "step": 18550 - }, - { - "epoch": 1.31, - "learning_rate": 1.1207360514273714e-05, - "loss": 0.7696, - "step": 18560 - }, - { - "epoch": 1.31, - "learning_rate": 1.1186816352853644e-05, - "loss": 0.7815, - "step": 18570 - }, - { - "epoch": 1.31, - "learning_rate": 1.1166283725121398e-05, - "loss": 0.7693, - "step": 18580 - }, - { - "epoch": 1.31, - "learning_rate": 1.114576265794773e-05, - "loss": 0.7704, - "step": 18590 - }, - { - "epoch": 1.31, - "learning_rate": 1.1125253178188263e-05, - "loss": 0.7757, - "step": 18600 - }, - { - "epoch": 1.31, - "learning_rate": 1.1104755312683476e-05, - "loss": 0.7717, - "step": 18610 - }, - { - "epoch": 1.32, - "learning_rate": 1.1084269088258614e-05, - "loss": 0.7692, - "step": 18620 - }, - { - "epoch": 1.32, - "learning_rate": 1.1063794531723729e-05, - "loss": 0.7643, - "step": 18630 - }, - { - "epoch": 1.32, - "learning_rate": 1.1043331669873566e-05, - "loss": 0.7724, - "step": 18640 - }, - { - "epoch": 1.32, - "learning_rate": 1.1022880529487581e-05, - "loss": 0.7716, - "step": 18650 - }, - { - "epoch": 1.32, - "learning_rate": 1.1002441137329898e-05, - "loss": 0.7751, - "step": 18660 - }, - { - "epoch": 1.32, - "learning_rate": 1.0982013520149248e-05, - "loss": 0.7748, - "step": 18670 - }, - { - "epoch": 1.32, - "learning_rate": 1.0961597704678974e-05, - "loss": 0.7717, - "step": 18680 - }, - { - "epoch": 1.32, - "learning_rate": 1.094119371763696e-05, - "loss": 0.7685, - "step": 18690 - }, - { - "epoch": 1.32, - "learning_rate": 1.0920801585725597e-05, - "loss": 0.7703, - "step": 18700 - }, - { - "epoch": 1.32, - "learning_rate": 1.0900421335631802e-05, - "loss": 0.7715, - "step": 18710 - }, - { - "epoch": 1.32, - "learning_rate": 1.0880052994026906e-05, - "loss": 0.762, - "step": 18720 - }, - { - "epoch": 1.32, - "learning_rate": 1.0859696587566657e-05, - "loss": 0.7681, - "step": 18730 - }, - { - "epoch": 1.32, - "learning_rate": 1.0839352142891214e-05, - "loss": 0.771, - "step": 18740 - }, - { - "epoch": 1.32, - "learning_rate": 1.0819019686625046e-05, - "loss": 0.7667, - "step": 18750 - }, - { - "epoch": 1.33, - "learning_rate": 1.0798699245376959e-05, - "loss": 0.7657, - "step": 18760 - }, - { - "epoch": 1.33, - "learning_rate": 1.0778390845740017e-05, - "loss": 0.7642, - "step": 18770 - }, - { - "epoch": 1.33, - "learning_rate": 1.0758094514291541e-05, - "loss": 0.7644, - "step": 18780 - }, - { - "epoch": 1.33, - "learning_rate": 1.0737810277593046e-05, - "loss": 0.7765, - "step": 18790 - }, - { - "epoch": 1.33, - "learning_rate": 1.0717538162190227e-05, - "loss": 0.7701, - "step": 18800 - }, - { - "epoch": 1.33, - "learning_rate": 1.0697278194612899e-05, - "loss": 0.767, - "step": 18810 - }, - { - "epoch": 1.33, - "learning_rate": 1.0677030401375012e-05, - "loss": 0.7637, - "step": 18820 - }, - { - "epoch": 1.33, - "learning_rate": 1.065679480897455e-05, - "loss": 0.7754, - "step": 18830 - }, - { - "epoch": 1.33, - "learning_rate": 1.0636571443893555e-05, - "loss": 0.7745, - "step": 18840 - }, - { - "epoch": 1.33, - "learning_rate": 1.0616360332598044e-05, - "loss": 0.7705, - "step": 18850 - }, - { - "epoch": 1.33, - "learning_rate": 1.0596161501538027e-05, - "loss": 0.7735, - "step": 18860 - }, - { - "epoch": 1.33, - "learning_rate": 1.0575974977147419e-05, - "loss": 0.7734, - "step": 18870 - }, - { - "epoch": 1.33, - "learning_rate": 1.055580078584403e-05, - "loss": 0.7665, - "step": 18880 - }, - { - "epoch": 1.33, - "learning_rate": 1.0535638954029536e-05, - "loss": 0.7688, - "step": 18890 - }, - { - "epoch": 1.34, - "learning_rate": 1.0515489508089453e-05, - "loss": 0.7694, - "step": 18900 - }, - { - "epoch": 1.34, - "learning_rate": 1.0495352474393057e-05, - "loss": 0.7779, - "step": 18910 - }, - { - "epoch": 1.34, - "learning_rate": 1.0475227879293413e-05, - "loss": 0.7756, - "step": 18920 - }, - { - "epoch": 1.34, - "learning_rate": 1.0455115749127276e-05, - "loss": 0.7666, - "step": 18930 - }, - { - "epoch": 1.34, - "learning_rate": 1.0435016110215122e-05, - "loss": 0.7694, - "step": 18940 - }, - { - "epoch": 1.34, - "learning_rate": 1.0414928988861051e-05, - "loss": 0.7677, - "step": 18950 - }, - { - "epoch": 1.34, - "learning_rate": 1.0394854411352791e-05, - "loss": 0.7609, - "step": 18960 - }, - { - "epoch": 1.34, - "learning_rate": 1.0374792403961652e-05, - "loss": 0.7709, - "step": 18970 - }, - { - "epoch": 1.34, - "learning_rate": 1.035474299294251e-05, - "loss": 0.7764, - "step": 18980 - }, - { - "epoch": 1.34, - "learning_rate": 1.033470620453373e-05, - "loss": 0.7632, - "step": 18990 - }, - { - "epoch": 1.34, - "learning_rate": 1.0314682064957182e-05, - "loss": 0.7778, - "step": 19000 - }, - { - "epoch": 1.34, - "learning_rate": 1.0294670600418164e-05, - "loss": 0.7681, - "step": 19010 - }, - { - "epoch": 1.34, - "learning_rate": 1.0274671837105403e-05, - "loss": 0.7676, - "step": 19020 - }, - { - "epoch": 1.34, - "learning_rate": 1.0254685801190993e-05, - "loss": 0.7673, - "step": 19030 - }, - { - "epoch": 1.34, - "learning_rate": 1.023471251883037e-05, - "loss": 0.7622, - "step": 19040 - }, - { - "epoch": 1.35, - "learning_rate": 1.0214752016162281e-05, - "loss": 0.7703, - "step": 19050 - }, - { - "epoch": 1.35, - "learning_rate": 1.0194804319308762e-05, - "loss": 0.7665, - "step": 19060 - }, - { - "epoch": 1.35, - "learning_rate": 1.017486945437507e-05, - "loss": 0.7621, - "step": 19070 - }, - { - "epoch": 1.35, - "learning_rate": 1.0154947447449686e-05, - "loss": 0.7698, - "step": 19080 - }, - { - "epoch": 1.35, - "learning_rate": 1.0135038324604248e-05, - "loss": 0.7778, - "step": 19090 - }, - { - "epoch": 1.35, - "learning_rate": 1.011514211189356e-05, - "loss": 0.7713, - "step": 19100 - }, - { - "epoch": 1.35, - "learning_rate": 1.0095258835355482e-05, - "loss": 0.767, - "step": 19110 - }, - { - "epoch": 1.35, - "learning_rate": 1.0075388521010998e-05, - "loss": 0.7692, - "step": 19120 - }, - { - "epoch": 1.35, - "learning_rate": 1.0055531194864092e-05, - "loss": 0.7625, - "step": 19130 - }, - { - "epoch": 1.35, - "learning_rate": 1.0035686882901778e-05, - "loss": 0.7691, - "step": 19140 - }, - { - "epoch": 1.35, - "learning_rate": 1.0015855611094007e-05, - "loss": 0.7696, - "step": 19150 - }, - { - "epoch": 1.35, - "learning_rate": 9.996037405393702e-06, - "loss": 0.7806, - "step": 19160 - }, - { - "epoch": 1.35, - "learning_rate": 9.976232291736645e-06, - "loss": 0.7691, - "step": 19170 - }, - { - "epoch": 1.35, - "learning_rate": 9.956440296041531e-06, - "loss": 0.7702, - "step": 19180 - }, - { - "epoch": 1.36, - "learning_rate": 9.936661444209832e-06, - "loss": 0.7712, - "step": 19190 - }, - { - "epoch": 1.36, - "learning_rate": 9.916895762125873e-06, - "loss": 0.7694, - "step": 19200 - }, - { - "epoch": 1.36, - "learning_rate": 9.897143275656706e-06, - "loss": 0.7669, - "step": 19210 - }, - { - "epoch": 1.36, - "learning_rate": 9.877404010652143e-06, - "loss": 0.7648, - "step": 19220 - }, - { - "epoch": 1.36, - "learning_rate": 9.857677992944663e-06, - "loss": 0.7595, - "step": 19230 - }, - { - "epoch": 1.36, - "learning_rate": 9.837965248349439e-06, - "loss": 0.7695, - "step": 19240 - }, - { - "epoch": 1.36, - "learning_rate": 9.818265802664245e-06, - "loss": 0.76, - "step": 19250 - }, - { - "epoch": 1.36, - "learning_rate": 9.798579681669484e-06, - "loss": 0.7691, - "step": 19260 - }, - { - "epoch": 1.36, - "learning_rate": 9.778906911128078e-06, - "loss": 0.7775, - "step": 19270 - }, - { - "epoch": 1.36, - "learning_rate": 9.75924751678552e-06, - "loss": 0.7691, - "step": 19280 - }, - { - "epoch": 1.36, - "learning_rate": 9.739601524369765e-06, - "loss": 0.7622, - "step": 19290 - }, - { - "epoch": 1.36, - "learning_rate": 9.719968959591257e-06, - "loss": 0.76, - "step": 19300 - }, - { - "epoch": 1.36, - "learning_rate": 9.70034984814284e-06, - "loss": 0.759, - "step": 19310 - }, - { - "epoch": 1.36, - "learning_rate": 9.68074421569978e-06, - "loss": 0.7698, - "step": 19320 - }, - { - "epoch": 1.37, - "learning_rate": 9.661152087919682e-06, - "loss": 0.7735, - "step": 19330 - }, - { - "epoch": 1.37, - "learning_rate": 9.641573490442484e-06, - "loss": 0.7696, - "step": 19340 - }, - { - "epoch": 1.37, - "learning_rate": 9.62200844889041e-06, - "loss": 0.7689, - "step": 19350 - }, - { - "epoch": 1.37, - "learning_rate": 9.602456988867965e-06, - "loss": 0.7662, - "step": 19360 - }, - { - "epoch": 1.37, - "learning_rate": 9.582919135961852e-06, - "loss": 0.7684, - "step": 19370 - }, - { - "epoch": 1.37, - "learning_rate": 9.563394915740996e-06, - "loss": 0.7652, - "step": 19380 - }, - { - "epoch": 1.37, - "learning_rate": 9.54388435375645e-06, - "loss": 0.769, - "step": 19390 - }, - { - "epoch": 1.37, - "learning_rate": 9.524387475541423e-06, - "loss": 0.7684, - "step": 19400 - }, - { - "epoch": 1.37, - "learning_rate": 9.504904306611196e-06, - "loss": 0.7692, - "step": 19410 - }, - { - "epoch": 1.37, - "learning_rate": 9.485434872463104e-06, - "loss": 0.7589, - "step": 19420 - }, - { - "epoch": 1.37, - "learning_rate": 9.465979198576535e-06, - "loss": 0.7709, - "step": 19430 - }, - { - "epoch": 1.37, - "learning_rate": 9.446537310412845e-06, - "loss": 0.7696, - "step": 19440 - }, - { - "epoch": 1.37, - "learning_rate": 9.427109233415348e-06, - "loss": 0.7601, - "step": 19450 - }, - { - "epoch": 1.37, - "learning_rate": 9.407694993009304e-06, - "loss": 0.7661, - "step": 19460 - }, - { - "epoch": 1.38, - "learning_rate": 9.388294614601843e-06, - "loss": 0.7671, - "step": 19470 - }, - { - "epoch": 1.38, - "learning_rate": 9.368908123581974e-06, - "loss": 0.7629, - "step": 19480 - }, - { - "epoch": 1.38, - "learning_rate": 9.349535545320516e-06, - "loss": 0.7619, - "step": 19490 - }, - { - "epoch": 1.38, - "learning_rate": 9.330176905170077e-06, - "loss": 0.7654, - "step": 19500 - }, - { - "epoch": 1.38, - "learning_rate": 9.31083222846505e-06, - "loss": 0.7666, - "step": 19510 - }, - { - "epoch": 1.38, - "learning_rate": 9.291501540521524e-06, - "loss": 0.7641, - "step": 19520 - }, - { - "epoch": 1.38, - "learning_rate": 9.272184866637307e-06, - "loss": 0.7625, - "step": 19530 - }, - { - "epoch": 1.38, - "learning_rate": 9.252882232091847e-06, - "loss": 0.7728, - "step": 19540 - }, - { - "epoch": 1.38, - "learning_rate": 9.23359366214622e-06, - "loss": 0.7611, - "step": 19550 - }, - { - "epoch": 1.38, - "learning_rate": 9.21431918204312e-06, - "loss": 0.7709, - "step": 19560 - }, - { - "epoch": 1.38, - "learning_rate": 9.195058817006772e-06, - "loss": 0.7546, - "step": 19570 - }, - { - "epoch": 1.38, - "learning_rate": 9.175812592242939e-06, - "loss": 0.7669, - "step": 19580 - }, - { - "epoch": 1.38, - "learning_rate": 9.156580532938895e-06, - "loss": 0.7693, - "step": 19590 - }, - { - "epoch": 1.38, - "learning_rate": 9.137362664263343e-06, - "loss": 0.7698, - "step": 19600 - }, - { - "epoch": 1.39, - "learning_rate": 9.118159011366452e-06, - "loss": 0.7722, - "step": 19610 - }, - { - "epoch": 1.39, - "learning_rate": 9.098969599379754e-06, - "loss": 0.7673, - "step": 19620 - }, - { - "epoch": 1.39, - "learning_rate": 9.079794453416174e-06, - "loss": 0.7636, - "step": 19630 - }, - { - "epoch": 1.39, - "learning_rate": 9.060633598569942e-06, - "loss": 0.7657, - "step": 19640 - }, - { - "epoch": 1.39, - "learning_rate": 9.041487059916596e-06, - "loss": 0.7651, - "step": 19650 - }, - { - "epoch": 1.39, - "learning_rate": 9.022354862512935e-06, - "loss": 0.7616, - "step": 19660 - }, - { - "epoch": 1.39, - "learning_rate": 9.003237031397002e-06, - "loss": 0.7649, - "step": 19670 - }, - { - "epoch": 1.39, - "learning_rate": 8.984133591588022e-06, - "loss": 0.7676, - "step": 19680 - }, - { - "epoch": 1.39, - "learning_rate": 8.965044568086405e-06, - "loss": 0.7708, - "step": 19690 - }, - { - "epoch": 1.39, - "learning_rate": 8.94596998587367e-06, - "loss": 0.7642, - "step": 19700 - }, - { - "epoch": 1.39, - "learning_rate": 8.926909869912463e-06, - "loss": 0.7674, - "step": 19710 - }, - { - "epoch": 1.39, - "learning_rate": 8.907864245146485e-06, - "loss": 0.7593, - "step": 19720 - }, - { - "epoch": 1.39, - "learning_rate": 8.888833136500468e-06, - "loss": 0.7691, - "step": 19730 - }, - { - "epoch": 1.39, - "learning_rate": 8.869816568880144e-06, - "loss": 0.7614, - "step": 19740 - }, - { - "epoch": 1.4, - "learning_rate": 8.850814567172245e-06, - "loss": 0.7645, - "step": 19750 - }, - { - "epoch": 1.4, - "learning_rate": 8.831827156244403e-06, - "loss": 0.7625, - "step": 19760 - }, - { - "epoch": 1.4, - "learning_rate": 8.812854360945185e-06, - "loss": 0.7614, - "step": 19770 - }, - { - "epoch": 1.4, - "learning_rate": 8.793896206104002e-06, - "loss": 0.7694, - "step": 19780 - }, - { - "epoch": 1.4, - "learning_rate": 8.77495271653114e-06, - "loss": 0.7626, - "step": 19790 - }, - { - "epoch": 1.4, - "learning_rate": 8.756023917017662e-06, - "loss": 0.7673, - "step": 19800 - }, - { - "epoch": 1.4, - "learning_rate": 8.737109832335419e-06, - "loss": 0.763, - "step": 19810 - }, - { - "epoch": 1.4, - "learning_rate": 8.718210487237e-06, - "loss": 0.7682, - "step": 19820 - }, - { - "epoch": 1.4, - "learning_rate": 8.69932590645572e-06, - "loss": 0.7612, - "step": 19830 - }, - { - "epoch": 1.4, - "learning_rate": 8.680456114705546e-06, - "loss": 0.7552, - "step": 19840 - }, - { - "epoch": 1.4, - "learning_rate": 8.661601136681124e-06, - "loss": 0.7688, - "step": 19850 - }, - { - "epoch": 1.4, - "learning_rate": 8.642760997057675e-06, - "loss": 0.7632, - "step": 19860 - }, - { - "epoch": 1.4, - "learning_rate": 8.623935720491046e-06, - "loss": 0.7629, - "step": 19870 - }, - { - "epoch": 1.4, - "learning_rate": 8.605125331617578e-06, - "loss": 0.7661, - "step": 19880 - }, - { - "epoch": 1.4, - "learning_rate": 8.586329855054184e-06, - "loss": 0.7627, - "step": 19890 - }, - { - "epoch": 1.41, - "learning_rate": 8.567549315398216e-06, - "loss": 0.763, - "step": 19900 - }, - { - "epoch": 1.41, - "learning_rate": 8.548783737227518e-06, - "loss": 0.7632, - "step": 19910 - }, - { - "epoch": 1.41, - "learning_rate": 8.530033145100318e-06, - "loss": 0.7639, - "step": 19920 - }, - { - "epoch": 1.41, - "learning_rate": 8.511297563555263e-06, - "loss": 0.7601, - "step": 19930 - }, - { - "epoch": 1.41, - "learning_rate": 8.492577017111327e-06, - "loss": 0.7529, - "step": 19940 - }, - { - "epoch": 1.41, - "learning_rate": 8.473871530267846e-06, - "loss": 0.7673, - "step": 19950 - }, - { - "epoch": 1.41, - "learning_rate": 8.455181127504391e-06, - "loss": 0.7625, - "step": 19960 - }, - { - "epoch": 1.41, - "learning_rate": 8.43650583328085e-06, - "loss": 0.764, - "step": 19970 - }, - { - "epoch": 1.41, - "learning_rate": 8.417845672037301e-06, - "loss": 0.7681, - "step": 19980 - }, - { - "epoch": 1.41, - "learning_rate": 8.399200668194045e-06, - "loss": 0.7608, - "step": 19990 - }, - { - "epoch": 1.41, - "learning_rate": 8.380570846151517e-06, - "loss": 0.7686, - "step": 20000 - }, - { - "epoch": 1.41, - "learning_rate": 8.361956230290313e-06, - "loss": 0.7665, - "step": 20010 - }, - { - "epoch": 1.41, - "learning_rate": 8.343356844971105e-06, - "loss": 0.7683, - "step": 20020 - }, - { - "epoch": 1.41, - "learning_rate": 8.324772714534662e-06, - "loss": 0.7626, - "step": 20030 - }, - { - "epoch": 1.42, - "learning_rate": 8.306203863301743e-06, - "loss": 0.7639, - "step": 20040 - }, - { - "epoch": 1.42, - "learning_rate": 8.28765031557316e-06, - "loss": 0.7715, - "step": 20050 - }, - { - "epoch": 1.42, - "learning_rate": 8.269112095629662e-06, - "loss": 0.7636, - "step": 20060 - }, - { - "epoch": 1.42, - "learning_rate": 8.250589227731967e-06, - "loss": 0.7688, - "step": 20070 - }, - { - "epoch": 1.42, - "learning_rate": 8.232081736120676e-06, - "loss": 0.7656, - "step": 20080 - }, - { - "epoch": 1.42, - "learning_rate": 8.213589645016291e-06, - "loss": 0.7715, - "step": 20090 - }, - { - "epoch": 1.42, - "learning_rate": 8.195112978619145e-06, - "loss": 0.7648, - "step": 20100 - }, - { - "epoch": 1.42, - "learning_rate": 8.176651761109381e-06, - "loss": 0.7658, - "step": 20110 - }, - { - "epoch": 1.42, - "learning_rate": 8.15820601664693e-06, - "loss": 0.7699, - "step": 20120 - }, - { - "epoch": 1.42, - "learning_rate": 8.13977576937149e-06, - "loss": 0.7604, - "step": 20130 - }, - { - "epoch": 1.42, - "learning_rate": 8.121361043402442e-06, - "loss": 0.7697, - "step": 20140 - }, - { - "epoch": 1.42, - "learning_rate": 8.102961862838899e-06, - "loss": 0.7677, - "step": 20150 - }, - { - "epoch": 1.42, - "learning_rate": 8.084578251759583e-06, - "loss": 0.7661, - "step": 20160 - }, - { - "epoch": 1.42, - "learning_rate": 8.066210234222882e-06, - "loss": 0.7636, - "step": 20170 - }, - { - "epoch": 1.43, - "learning_rate": 8.047857834266755e-06, - "loss": 0.7559, - "step": 20180 - }, - { - "epoch": 1.43, - "learning_rate": 8.029521075908713e-06, - "loss": 0.7571, - "step": 20190 - }, - { - "epoch": 1.43, - "learning_rate": 8.011199983145827e-06, - "loss": 0.7604, - "step": 20200 - }, - { - "epoch": 1.43, - "learning_rate": 7.992894579954644e-06, - "loss": 0.7662, - "step": 20210 - }, - { - "epoch": 1.43, - "learning_rate": 7.974604890291175e-06, - "loss": 0.7659, - "step": 20220 - }, - { - "epoch": 1.43, - "learning_rate": 7.956330938090892e-06, - "loss": 0.7634, - "step": 20230 - }, - { - "epoch": 1.43, - "learning_rate": 7.938072747268644e-06, - "loss": 0.7661, - "step": 20240 - }, - { - "epoch": 1.43, - "learning_rate": 7.919830341718673e-06, - "loss": 0.7678, - "step": 20250 - }, - { - "epoch": 1.43, - "learning_rate": 7.901603745314552e-06, - "loss": 0.7629, - "step": 20260 - }, - { - "epoch": 1.43, - "learning_rate": 7.88339298190916e-06, - "loss": 0.763, - "step": 20270 - }, - { - "epoch": 1.43, - "learning_rate": 7.865198075334682e-06, - "loss": 0.7637, - "step": 20280 - }, - { - "epoch": 1.43, - "learning_rate": 7.84701904940251e-06, - "loss": 0.758, - "step": 20290 - }, - { - "epoch": 1.43, - "learning_rate": 7.828855927903298e-06, - "loss": 0.7609, - "step": 20300 - }, - { - "epoch": 1.43, - "learning_rate": 7.810708734606854e-06, - "loss": 0.7663, - "step": 20310 - }, - { - "epoch": 1.44, - "learning_rate": 7.792577493262143e-06, - "loss": 0.7644, - "step": 20320 - }, - { - "epoch": 1.44, - "learning_rate": 7.774462227597278e-06, - "loss": 0.7542, - "step": 20330 - }, - { - "epoch": 1.44, - "learning_rate": 7.756362961319442e-06, - "loss": 0.7616, - "step": 20340 - }, - { - "epoch": 1.44, - "learning_rate": 7.738279718114878e-06, - "loss": 0.7644, - "step": 20350 - }, - { - "epoch": 1.44, - "learning_rate": 7.720212521648885e-06, - "loss": 0.7574, - "step": 20360 - }, - { - "epoch": 1.44, - "learning_rate": 7.70216139556573e-06, - "loss": 0.7637, - "step": 20370 - }, - { - "epoch": 1.44, - "learning_rate": 7.68412636348868e-06, - "loss": 0.7657, - "step": 20380 - }, - { - "epoch": 1.44, - "learning_rate": 7.666107449019909e-06, - "loss": 0.7583, - "step": 20390 - }, - { - "epoch": 1.44, - "learning_rate": 7.648104675740527e-06, - "loss": 0.7612, - "step": 20400 - }, - { - "epoch": 1.44, - "learning_rate": 7.630118067210498e-06, - "loss": 0.7544, - "step": 20410 - }, - { - "epoch": 1.44, - "learning_rate": 7.612147646968646e-06, - "loss": 0.7626, - "step": 20420 - }, - { - "epoch": 1.44, - "learning_rate": 7.594193438532596e-06, - "loss": 0.7672, - "step": 20430 - }, - { - "epoch": 1.44, - "learning_rate": 7.576255465398772e-06, - "loss": 0.756, - "step": 20440 - }, - { - "epoch": 1.44, - "learning_rate": 7.558333751042335e-06, - "loss": 0.7611, - "step": 20450 - }, - { - "epoch": 1.45, - "learning_rate": 7.540428318917192e-06, - "loss": 0.7595, - "step": 20460 - }, - { - "epoch": 1.45, - "learning_rate": 7.522539192455909e-06, - "loss": 0.7578, - "step": 20470 - }, - { - "epoch": 1.45, - "learning_rate": 7.504666395069749e-06, - "loss": 0.7634, - "step": 20480 - }, - { - "epoch": 1.45, - "learning_rate": 7.486809950148575e-06, - "loss": 0.7602, - "step": 20490 - }, - { - "epoch": 1.45, - "learning_rate": 7.468969881060868e-06, - "loss": 0.7594, - "step": 20500 - }, - { - "epoch": 1.45, - "learning_rate": 7.451146211153659e-06, - "loss": 0.7609, - "step": 20510 - }, - { - "epoch": 1.45, - "learning_rate": 7.43333896375255e-06, - "loss": 0.7702, - "step": 20520 - }, - { - "epoch": 1.45, - "learning_rate": 7.415548162161617e-06, - "loss": 0.7633, - "step": 20530 - }, - { - "epoch": 1.45, - "learning_rate": 7.39777382966344e-06, - "loss": 0.7584, - "step": 20540 - }, - { - "epoch": 1.45, - "learning_rate": 7.38001598951902e-06, - "loss": 0.7572, - "step": 20550 - }, - { - "epoch": 1.45, - "learning_rate": 7.362274664967814e-06, - "loss": 0.7625, - "step": 20560 - }, - { - "epoch": 1.45, - "learning_rate": 7.3445498792276095e-06, - "loss": 0.7584, - "step": 20570 - }, - { - "epoch": 1.45, - "learning_rate": 7.326841655494605e-06, - "loss": 0.7545, - "step": 20580 - }, - { - "epoch": 1.45, - "learning_rate": 7.309150016943282e-06, - "loss": 0.7573, - "step": 20590 - }, - { - "epoch": 1.46, - "learning_rate": 7.2914749867264545e-06, - "loss": 0.7566, - "step": 20600 - }, - { - "epoch": 1.46, - "learning_rate": 7.273816587975167e-06, - "loss": 0.7626, - "step": 20610 - }, - { - "epoch": 1.46, - "learning_rate": 7.256174843798727e-06, - "loss": 0.7629, - "step": 20620 - }, - { - "epoch": 1.46, - "learning_rate": 7.238549777284618e-06, - "loss": 0.7531, - "step": 20630 - }, - { - "epoch": 1.46, - "learning_rate": 7.220941411498539e-06, - "loss": 0.7574, - "step": 20640 - }, - { - "epoch": 1.46, - "learning_rate": 7.203349769484278e-06, - "loss": 0.7552, - "step": 20650 - }, - { - "epoch": 1.46, - "learning_rate": 7.185774874263785e-06, - "loss": 0.7556, - "step": 20660 - }, - { - "epoch": 1.46, - "learning_rate": 7.16821674883706e-06, - "loss": 0.755, - "step": 20670 - }, - { - "epoch": 1.46, - "learning_rate": 7.1506754161821915e-06, - "loss": 0.7573, - "step": 20680 - }, - { - "epoch": 1.46, - "learning_rate": 7.1331508992552504e-06, - "loss": 0.7631, - "step": 20690 - }, - { - "epoch": 1.46, - "learning_rate": 7.1156432209903405e-06, - "loss": 0.7516, - "step": 20700 - }, - { - "epoch": 1.46, - "learning_rate": 7.098152404299496e-06, - "loss": 0.7592, - "step": 20710 - }, - { - "epoch": 1.46, - "learning_rate": 7.080678472072719e-06, - "loss": 0.7655, - "step": 20720 - }, - { - "epoch": 1.46, - "learning_rate": 7.06322144717787e-06, - "loss": 0.7659, - "step": 20730 - }, - { - "epoch": 1.47, - "learning_rate": 7.045781352460728e-06, - "loss": 0.7642, - "step": 20740 - }, - { - "epoch": 1.47, - "learning_rate": 7.028358210744881e-06, - "loss": 0.7553, - "step": 20750 - }, - { - "epoch": 1.47, - "learning_rate": 7.010952044831765e-06, - "loss": 0.7536, - "step": 20760 - }, - { - "epoch": 1.47, - "learning_rate": 6.993562877500562e-06, - "loss": 0.7636, - "step": 20770 - }, - { - "epoch": 1.47, - "learning_rate": 6.976190731508243e-06, - "loss": 0.7579, - "step": 20780 - }, - { - "epoch": 1.47, - "learning_rate": 6.958835629589482e-06, - "loss": 0.7663, - "step": 20790 - }, - { - "epoch": 1.47, - "learning_rate": 6.941497594456654e-06, - "loss": 0.7531, - "step": 20800 - }, - { - "epoch": 1.47, - "learning_rate": 6.924176648799789e-06, - "loss": 0.761, - "step": 20810 - }, - { - "epoch": 1.47, - "learning_rate": 6.906872815286578e-06, - "loss": 0.755, - "step": 20820 - }, - { - "epoch": 1.47, - "learning_rate": 6.889586116562288e-06, - "loss": 0.7614, - "step": 20830 - }, - { - "epoch": 1.47, - "learning_rate": 6.872316575249785e-06, - "loss": 0.754, - "step": 20840 - }, - { - "epoch": 1.47, - "learning_rate": 6.855064213949461e-06, - "loss": 0.7567, - "step": 20850 - }, - { - "epoch": 1.47, - "learning_rate": 6.837829055239249e-06, - "loss": 0.762, - "step": 20860 - }, - { - "epoch": 1.47, - "learning_rate": 6.820611121674548e-06, - "loss": 0.7569, - "step": 20870 - }, - { - "epoch": 1.47, - "learning_rate": 6.80341043578822e-06, - "loss": 0.7598, - "step": 20880 - }, - { - "epoch": 1.48, - "learning_rate": 6.786227020090554e-06, - "loss": 0.762, - "step": 20890 - }, - { - "epoch": 1.48, - "learning_rate": 6.769060897069255e-06, - "loss": 0.7578, - "step": 20900 - }, - { - "epoch": 1.48, - "learning_rate": 6.75191208918937e-06, - "loss": 0.7582, - "step": 20910 - }, - { - "epoch": 1.48, - "learning_rate": 6.734780618893308e-06, - "loss": 0.7628, - "step": 20920 - }, - { - "epoch": 1.48, - "learning_rate": 6.7176665086007735e-06, - "loss": 0.7613, - "step": 20930 - }, - { - "epoch": 1.48, - "learning_rate": 6.700569780708766e-06, - "loss": 0.7644, - "step": 20940 - }, - { - "epoch": 1.48, - "learning_rate": 6.683490457591528e-06, - "loss": 0.7628, - "step": 20950 - }, - { - "epoch": 1.48, - "learning_rate": 6.666428561600515e-06, - "loss": 0.7623, - "step": 20960 - }, - { - "epoch": 1.48, - "learning_rate": 6.649384115064405e-06, - "loss": 0.7609, - "step": 20970 - }, - { - "epoch": 1.48, - "learning_rate": 6.632357140289012e-06, - "loss": 0.7645, - "step": 20980 - }, - { - "epoch": 1.48, - "learning_rate": 6.6153476595572894e-06, - "loss": 0.7595, - "step": 20990 - }, - { - "epoch": 1.48, - "learning_rate": 6.598355695129317e-06, - "loss": 0.7635, - "step": 21000 - }, - { - "epoch": 1.48, - "learning_rate": 6.581381269242222e-06, - "loss": 0.7535, - "step": 21010 - }, - { - "epoch": 1.48, - "learning_rate": 6.564424404110206e-06, - "loss": 0.7519, - "step": 21020 - }, - { - "epoch": 1.49, - "learning_rate": 6.547485121924473e-06, - "loss": 0.7651, - "step": 21030 - }, - { - "epoch": 1.49, - "learning_rate": 6.530563444853211e-06, - "loss": 0.7534, - "step": 21040 - }, - { - "epoch": 1.49, - "learning_rate": 6.5136593950415895e-06, - "loss": 0.7519, - "step": 21050 - }, - { - "epoch": 1.49, - "learning_rate": 6.4967729946116885e-06, - "loss": 0.7578, - "step": 21060 - }, - { - "epoch": 1.49, - "learning_rate": 6.479904265662509e-06, - "loss": 0.7548, - "step": 21070 - }, - { - "epoch": 1.49, - "learning_rate": 6.463053230269911e-06, - "loss": 0.7487, - "step": 21080 - }, - { - "epoch": 1.49, - "learning_rate": 6.4462199104866e-06, - "loss": 0.7577, - "step": 21090 - }, - { - "epoch": 1.49, - "learning_rate": 6.42940432834211e-06, - "loss": 0.7521, - "step": 21100 - }, - { - "epoch": 1.49, - "learning_rate": 6.412606505842751e-06, - "loss": 0.7612, - "step": 21110 - }, - { - "epoch": 1.49, - "learning_rate": 6.395826464971586e-06, - "loss": 0.7577, - "step": 21120 - }, - { - "epoch": 1.49, - "learning_rate": 6.379064227688427e-06, - "loss": 0.7522, - "step": 21130 - }, - { - "epoch": 1.49, - "learning_rate": 6.362319815929765e-06, - "loss": 0.758, - "step": 21140 - }, - { - "epoch": 1.49, - "learning_rate": 6.345593251608784e-06, - "loss": 0.759, - "step": 21150 - }, - { - "epoch": 1.49, - "learning_rate": 6.328884556615289e-06, - "loss": 0.7535, - "step": 21160 - }, - { - "epoch": 1.5, - "learning_rate": 6.31219375281572e-06, - "loss": 0.7531, - "step": 21170 - }, - { - "epoch": 1.5, - "learning_rate": 6.295520862053093e-06, - "loss": 0.753, - "step": 21180 - }, - { - "epoch": 1.5, - "learning_rate": 6.278865906146978e-06, - "loss": 0.7554, - "step": 21190 - }, - { - "epoch": 1.5, - "learning_rate": 6.262228906893475e-06, - "loss": 0.7584, - "step": 21200 - }, - { - "epoch": 1.5, - "learning_rate": 6.2456098860652e-06, - "loss": 0.7546, - "step": 21210 - }, - { - "epoch": 1.5, - "learning_rate": 6.2290088654112144e-06, - "loss": 0.7531, - "step": 21220 - }, - { - "epoch": 1.5, - "learning_rate": 6.212425866657051e-06, - "loss": 0.7481, - "step": 21230 - }, - { - "epoch": 1.5, - "learning_rate": 6.19586091150463e-06, - "loss": 0.7578, - "step": 21240 - }, - { - "epoch": 1.5, - "learning_rate": 6.179314021632286e-06, - "loss": 0.7568, - "step": 21250 - }, - { - "epoch": 1.5, - "learning_rate": 6.162785218694693e-06, - "loss": 0.7618, - "step": 21260 - }, - { - "epoch": 1.5, - "learning_rate": 6.146274524322857e-06, - "loss": 0.7497, - "step": 21270 - }, - { - "epoch": 1.5, - "learning_rate": 6.1297819601240835e-06, - "loss": 0.7547, - "step": 21280 - }, - { - "epoch": 1.5, - "learning_rate": 6.113307547681971e-06, - "loss": 0.7524, - "step": 21290 - }, - { - "epoch": 1.5, - "learning_rate": 6.09685130855633e-06, - "loss": 0.7544, - "step": 21300 - }, - { - "epoch": 1.51, - "learning_rate": 6.080413264283225e-06, - "loss": 0.7559, - "step": 21310 - }, - { - "epoch": 1.51, - "learning_rate": 6.063993436374873e-06, - "loss": 0.7596, - "step": 21320 - }, - { - "epoch": 1.51, - "learning_rate": 6.0475918463196895e-06, - "loss": 0.755, - "step": 21330 - }, - { - "epoch": 1.51, - "learning_rate": 6.031208515582176e-06, - "loss": 0.7565, - "step": 21340 - }, - { - "epoch": 1.51, - "learning_rate": 6.014843465602984e-06, - "loss": 0.7614, - "step": 21350 - }, - { - "epoch": 1.51, - "learning_rate": 5.998496717798807e-06, - "loss": 0.7482, - "step": 21360 - }, - { - "epoch": 1.51, - "learning_rate": 5.982168293562416e-06, - "loss": 0.7538, - "step": 21370 - }, - { - "epoch": 1.51, - "learning_rate": 5.96585821426257e-06, - "loss": 0.7568, - "step": 21380 - }, - { - "epoch": 1.51, - "learning_rate": 5.949566501244053e-06, - "loss": 0.7567, - "step": 21390 - }, - { - "epoch": 1.51, - "learning_rate": 5.933293175827586e-06, - "loss": 0.7613, - "step": 21400 - }, - { - "epoch": 1.51, - "learning_rate": 5.917038259309853e-06, - "loss": 0.7526, - "step": 21410 - }, - { - "epoch": 1.51, - "learning_rate": 5.900801772963409e-06, - "loss": 0.755, - "step": 21420 - }, - { - "epoch": 1.51, - "learning_rate": 5.884583738036733e-06, - "loss": 0.7552, - "step": 21430 - }, - { - "epoch": 1.51, - "learning_rate": 5.8683841757541205e-06, - "loss": 0.7619, - "step": 21440 - }, - { - "epoch": 1.52, - "learning_rate": 5.852203107315721e-06, - "loss": 0.759, - "step": 21450 - }, - { - "epoch": 1.52, - "learning_rate": 5.836040553897455e-06, - "loss": 0.7568, - "step": 21460 - }, - { - "epoch": 1.52, - "learning_rate": 5.8198965366510415e-06, - "loss": 0.7597, - "step": 21470 - }, - { - "epoch": 1.52, - "learning_rate": 5.8037710767039106e-06, - "loss": 0.7506, - "step": 21480 - }, - { - "epoch": 1.52, - "learning_rate": 5.787664195159239e-06, - "loss": 0.7603, - "step": 21490 - }, - { - "epoch": 1.52, - "learning_rate": 5.771575913095853e-06, - "loss": 0.7516, - "step": 21500 - }, - { - "epoch": 1.52, - "learning_rate": 5.75550625156827e-06, - "loss": 0.753, - "step": 21510 - }, - { - "epoch": 1.52, - "learning_rate": 5.739455231606621e-06, - "loss": 0.7581, - "step": 21520 - }, - { - "epoch": 1.52, - "learning_rate": 5.723422874216656e-06, - "loss": 0.7536, - "step": 21530 - }, - { - "epoch": 1.52, - "learning_rate": 5.707409200379681e-06, - "loss": 0.7588, - "step": 21540 - }, - { - "epoch": 1.52, - "learning_rate": 5.691414231052577e-06, - "loss": 0.7596, - "step": 21550 - }, - { - "epoch": 1.52, - "learning_rate": 5.67543798716772e-06, - "loss": 0.7553, - "step": 21560 - }, - { - "epoch": 1.52, - "learning_rate": 5.659480489633e-06, - "loss": 0.759, - "step": 21570 - }, - { - "epoch": 1.52, - "learning_rate": 5.643541759331756e-06, - "loss": 0.7557, - "step": 21580 - }, - { - "epoch": 1.53, - "learning_rate": 5.627621817122793e-06, - "loss": 0.7551, - "step": 21590 - }, - { - "epoch": 1.53, - "learning_rate": 5.611720683840296e-06, - "loss": 0.7595, - "step": 21600 - }, - { - "epoch": 1.53, - "learning_rate": 5.595838380293865e-06, - "loss": 0.7495, - "step": 21610 - }, - { - "epoch": 1.53, - "learning_rate": 5.579974927268434e-06, - "loss": 0.7483, - "step": 21620 - }, - { - "epoch": 1.53, - "learning_rate": 5.56413034552429e-06, - "loss": 0.7447, - "step": 21630 - }, - { - "epoch": 1.53, - "learning_rate": 5.548304655797003e-06, - "loss": 0.7487, - "step": 21640 - }, - { - "epoch": 1.53, - "learning_rate": 5.532497878797429e-06, - "loss": 0.7528, - "step": 21650 - }, - { - "epoch": 1.53, - "learning_rate": 5.516710035211665e-06, - "loss": 0.7585, - "step": 21660 - }, - { - "epoch": 1.53, - "learning_rate": 5.500941145701048e-06, - "loss": 0.7533, - "step": 21670 - }, - { - "epoch": 1.53, - "learning_rate": 5.485191230902089e-06, - "loss": 0.7564, - "step": 21680 - }, - { - "epoch": 1.53, - "learning_rate": 5.469460311426489e-06, - "loss": 0.7536, - "step": 21690 - }, - { - "epoch": 1.53, - "learning_rate": 5.453748407861066e-06, - "loss": 0.7473, - "step": 21700 - }, - { - "epoch": 1.53, - "learning_rate": 5.438055540767772e-06, - "loss": 0.7541, - "step": 21710 - }, - { - "epoch": 1.53, - "learning_rate": 5.422381730683639e-06, - "loss": 0.7522, - "step": 21720 - }, - { - "epoch": 1.53, - "learning_rate": 5.406726998120751e-06, - "loss": 0.7535, - "step": 21730 - }, - { - "epoch": 1.54, - "learning_rate": 5.391091363566241e-06, - "loss": 0.7538, - "step": 21740 - }, - { - "epoch": 1.54, - "learning_rate": 5.375474847482241e-06, - "loss": 0.7602, - "step": 21750 - }, - { - "epoch": 1.54, - "learning_rate": 5.359877470305852e-06, - "loss": 0.7511, - "step": 21760 - }, - { - "epoch": 1.54, - "learning_rate": 5.344299252449156e-06, - "loss": 0.7533, - "step": 21770 - }, - { - "epoch": 1.54, - "learning_rate": 5.328740214299129e-06, - "loss": 0.7595, - "step": 21780 - }, - { - "epoch": 1.54, - "learning_rate": 5.3132003762176755e-06, - "loss": 0.7527, - "step": 21790 - }, - { - "epoch": 1.54, - "learning_rate": 5.297679758541554e-06, - "loss": 0.7514, - "step": 21800 - }, - { - "epoch": 1.54, - "learning_rate": 5.28217838158237e-06, - "loss": 0.7525, - "step": 21810 - }, - { - "epoch": 1.54, - "learning_rate": 5.2666962656265674e-06, - "loss": 0.7539, - "step": 21820 - }, - { - "epoch": 1.54, - "learning_rate": 5.251233430935358e-06, - "loss": 0.7488, - "step": 21830 - }, - { - "epoch": 1.54, - "learning_rate": 5.235789897744743e-06, - "loss": 0.759, - "step": 21840 - }, - { - "epoch": 1.54, - "learning_rate": 5.220365686265452e-06, - "loss": 0.7564, - "step": 21850 - }, - { - "epoch": 1.54, - "learning_rate": 5.204960816682922e-06, - "loss": 0.7494, - "step": 21860 - }, - { - "epoch": 1.54, - "learning_rate": 5.1895753091572995e-06, - "loss": 0.7569, - "step": 21870 - }, - { - "epoch": 1.55, - "learning_rate": 5.174209183823373e-06, - "loss": 0.7549, - "step": 21880 - }, - { - "epoch": 1.55, - "learning_rate": 5.1588624607905634e-06, - "loss": 0.7524, - "step": 21890 - }, - { - "epoch": 1.55, - "learning_rate": 5.143535160142923e-06, - "loss": 0.7531, - "step": 21900 - }, - { - "epoch": 1.55, - "learning_rate": 5.128227301939059e-06, - "loss": 0.7551, - "step": 21910 - }, - { - "epoch": 1.55, - "learning_rate": 5.1129389062121615e-06, - "loss": 0.755, - "step": 21920 - }, - { - "epoch": 1.55, - "learning_rate": 5.097669992969918e-06, - "loss": 0.754, - "step": 21930 - }, - { - "epoch": 1.55, - "learning_rate": 5.082420582194556e-06, - "loss": 0.7501, - "step": 21940 - }, - { - "epoch": 1.55, - "learning_rate": 5.067190693842752e-06, - "loss": 0.7556, - "step": 21950 - }, - { - "epoch": 1.55, - "learning_rate": 5.051980347845644e-06, - "loss": 0.7575, - "step": 21960 - }, - { - "epoch": 1.55, - "learning_rate": 5.0367895641087925e-06, - "loss": 0.7593, - "step": 21970 - }, - { - "epoch": 1.55, - "learning_rate": 5.0216183625121685e-06, - "loss": 0.7473, - "step": 21980 - }, - { - "epoch": 1.55, - "learning_rate": 5.006466762910096e-06, - "loss": 0.7568, - "step": 21990 - }, - { - "epoch": 1.55, - "learning_rate": 4.99133478513127e-06, - "loss": 0.7501, - "step": 22000 - }, - { - "epoch": 1.55, - "learning_rate": 4.9762224489786805e-06, - "loss": 0.7537, - "step": 22010 - }, - { - "epoch": 1.56, - "learning_rate": 4.961129774229645e-06, - "loss": 0.7477, - "step": 22020 - }, - { - "epoch": 1.56, - "learning_rate": 4.94605678063571e-06, - "loss": 0.7489, - "step": 22030 - }, - { - "epoch": 1.56, - "learning_rate": 4.931003487922703e-06, - "loss": 0.7607, - "step": 22040 - }, - { - "epoch": 1.56, - "learning_rate": 4.915969915790644e-06, - "loss": 0.7579, - "step": 22050 - }, - { - "epoch": 1.56, - "learning_rate": 4.900956083913764e-06, - "loss": 0.7555, - "step": 22060 - }, - { - "epoch": 1.56, - "learning_rate": 4.885962011940439e-06, - "loss": 0.7547, - "step": 22070 - }, - { - "epoch": 1.56, - "learning_rate": 4.870987719493212e-06, - "loss": 0.7514, - "step": 22080 - }, - { - "epoch": 1.56, - "learning_rate": 4.85603322616871e-06, - "loss": 0.7536, - "step": 22090 - }, - { - "epoch": 1.56, - "learning_rate": 4.841098551537686e-06, - "loss": 0.7564, - "step": 22100 - }, - { - "epoch": 1.56, - "learning_rate": 4.826183715144912e-06, - "loss": 0.7546, - "step": 22110 - }, - { - "epoch": 1.56, - "learning_rate": 4.8112887365092385e-06, - "loss": 0.7494, - "step": 22120 - }, - { - "epoch": 1.56, - "learning_rate": 4.7964136351235024e-06, - "loss": 0.7507, - "step": 22130 - }, - { - "epoch": 1.56, - "learning_rate": 4.781558430454544e-06, - "loss": 0.7543, - "step": 22140 - }, - { - "epoch": 1.56, - "learning_rate": 4.7667231419431505e-06, - "loss": 0.7511, - "step": 22150 - }, - { - "epoch": 1.57, - "learning_rate": 4.75190778900406e-06, - "loss": 0.7466, - "step": 22160 - }, - { - "epoch": 1.57, - "learning_rate": 4.737112391025906e-06, - "loss": 0.7514, - "step": 22170 - }, - { - "epoch": 1.57, - "learning_rate": 4.722336967371233e-06, - "loss": 0.747, - "step": 22180 - }, - { - "epoch": 1.57, - "learning_rate": 4.7075815373764e-06, - "loss": 0.7498, - "step": 22190 - }, - { - "epoch": 1.57, - "learning_rate": 4.692846120351646e-06, - "loss": 0.7574, - "step": 22200 - }, - { - "epoch": 1.57, - "learning_rate": 4.678130735580995e-06, - "loss": 0.7525, - "step": 22210 - }, - { - "epoch": 1.57, - "learning_rate": 4.663435402322265e-06, - "loss": 0.744, - "step": 22220 - }, - { - "epoch": 1.57, - "learning_rate": 4.648760139807025e-06, - "loss": 0.747, - "step": 22230 - }, - { - "epoch": 1.57, - "learning_rate": 4.634104967240587e-06, - "loss": 0.7524, - "step": 22240 - }, - { - "epoch": 1.57, - "learning_rate": 4.619469903801965e-06, - "loss": 0.7509, - "step": 22250 - }, - { - "epoch": 1.57, - "learning_rate": 4.604854968643857e-06, - "loss": 0.7456, - "step": 22260 - }, - { - "epoch": 1.57, - "learning_rate": 4.590260180892614e-06, - "loss": 0.7488, - "step": 22270 - }, - { - "epoch": 1.57, - "learning_rate": 4.575685559648238e-06, - "loss": 0.7551, - "step": 22280 - }, - { - "epoch": 1.57, - "learning_rate": 4.561131123984315e-06, - "loss": 0.7491, - "step": 22290 - }, - { - "epoch": 1.58, - "learning_rate": 4.546596892948043e-06, - "loss": 0.7442, - "step": 22300 - }, - { - "epoch": 1.58, - "learning_rate": 4.5320828855601495e-06, - "loss": 0.7534, - "step": 22310 - }, - { - "epoch": 1.58, - "learning_rate": 4.51758912081492e-06, - "loss": 0.7554, - "step": 22320 - }, - { - "epoch": 1.58, - "learning_rate": 4.503115617680134e-06, - "loss": 0.7552, - "step": 22330 - }, - { - "epoch": 1.58, - "learning_rate": 4.488662395097056e-06, - "loss": 0.7464, - "step": 22340 - }, - { - "epoch": 1.58, - "learning_rate": 4.4742294719804095e-06, - "loss": 0.7507, - "step": 22350 - }, - { - "epoch": 1.58, - "learning_rate": 4.4598168672183655e-06, - "loss": 0.7532, - "step": 22360 - }, - { - "epoch": 1.58, - "learning_rate": 4.445424599672481e-06, - "loss": 0.7538, - "step": 22370 - }, - { - "epoch": 1.58, - "learning_rate": 4.431052688177724e-06, - "loss": 0.7575, - "step": 22380 - }, - { - "epoch": 1.58, - "learning_rate": 4.416701151542402e-06, - "loss": 0.7544, - "step": 22390 - }, - { - "epoch": 1.58, - "learning_rate": 4.4023700085481735e-06, - "loss": 0.7555, - "step": 22400 - }, - { - "epoch": 1.58, - "learning_rate": 4.3880592779499965e-06, - "loss": 0.7504, - "step": 22410 - }, - { - "epoch": 1.58, - "learning_rate": 4.373768978476123e-06, - "loss": 0.7582, - "step": 22420 - }, - { - "epoch": 1.58, - "learning_rate": 4.359499128828055e-06, - "loss": 0.7472, - "step": 22430 - }, - { - "epoch": 1.59, - "learning_rate": 4.345249747680553e-06, - "loss": 0.7517, - "step": 22440 - }, - { - "epoch": 1.59, - "learning_rate": 4.331020853681571e-06, - "loss": 0.7523, - "step": 22450 - }, - { - "epoch": 1.59, - "learning_rate": 4.316812465452267e-06, - "loss": 0.75, - "step": 22460 - }, - { - "epoch": 1.59, - "learning_rate": 4.302624601586946e-06, - "loss": 0.7565, - "step": 22470 - }, - { - "epoch": 1.59, - "learning_rate": 4.288457280653077e-06, - "loss": 0.7487, - "step": 22480 - }, - { - "epoch": 1.59, - "learning_rate": 4.274310521191225e-06, - "loss": 0.7505, - "step": 22490 - }, - { - "epoch": 1.59, - "learning_rate": 4.260184341715042e-06, - "loss": 0.7546, - "step": 22500 - }, - { - "epoch": 1.59, - "learning_rate": 4.246078760711274e-06, - "loss": 0.7602, - "step": 22510 - }, - { - "epoch": 1.59, - "learning_rate": 4.231993796639686e-06, - "loss": 0.7505, - "step": 22520 - }, - { - "epoch": 1.59, - "learning_rate": 4.217929467933064e-06, - "loss": 0.7526, - "step": 22530 - }, - { - "epoch": 1.59, - "learning_rate": 4.203885792997207e-06, - "loss": 0.7463, - "step": 22540 - }, - { - "epoch": 1.59, - "learning_rate": 4.1898627902108615e-06, - "loss": 0.7544, - "step": 22550 - }, - { - "epoch": 1.59, - "learning_rate": 4.175860477925739e-06, - "loss": 0.7487, - "step": 22560 - }, - { - "epoch": 1.59, - "learning_rate": 4.1618788744664654e-06, - "loss": 0.7534, - "step": 22570 - }, - { - "epoch": 1.59, - "learning_rate": 4.147917998130555e-06, - "loss": 0.752, - "step": 22580 - }, - { - "epoch": 1.6, - "learning_rate": 4.133977867188423e-06, - "loss": 0.7478, - "step": 22590 - }, - { - "epoch": 1.6, - "learning_rate": 4.120058499883308e-06, - "loss": 0.7511, - "step": 22600 - }, - { - "epoch": 1.6, - "learning_rate": 4.106159914431298e-06, - "loss": 0.7515, - "step": 22610 - }, - { - "epoch": 1.6, - "learning_rate": 4.092282129021268e-06, - "loss": 0.752, - "step": 22620 - }, - { - "epoch": 1.6, - "learning_rate": 4.078425161814874e-06, - "loss": 0.7467, - "step": 22630 - }, - { - "epoch": 1.6, - "learning_rate": 4.06458903094654e-06, - "loss": 0.7492, - "step": 22640 - }, - { - "epoch": 1.6, - "learning_rate": 4.050773754523407e-06, - "loss": 0.7421, - "step": 22650 - }, - { - "epoch": 1.6, - "learning_rate": 4.03697935062533e-06, - "loss": 0.7538, - "step": 22660 - }, - { - "epoch": 1.6, - "learning_rate": 4.023205837304855e-06, - "loss": 0.7488, - "step": 22670 - }, - { - "epoch": 1.6, - "learning_rate": 4.009453232587175e-06, - "loss": 0.7499, - "step": 22680 - }, - { - "epoch": 1.6, - "learning_rate": 3.995721554470135e-06, - "loss": 0.7542, - "step": 22690 - }, - { - "epoch": 1.6, - "learning_rate": 3.982010820924178e-06, - "loss": 0.7567, - "step": 22700 - }, - { - "epoch": 1.6, - "learning_rate": 3.968321049892356e-06, - "loss": 0.7429, - "step": 22710 - }, - { - "epoch": 1.6, - "learning_rate": 3.954652259290272e-06, - "loss": 0.752, - "step": 22720 - }, - { - "epoch": 1.61, - "learning_rate": 3.941004467006075e-06, - "loss": 0.7506, - "step": 22730 - }, - { - "epoch": 1.61, - "learning_rate": 3.927377690900436e-06, - "loss": 0.7478, - "step": 22740 - }, - { - "epoch": 1.61, - "learning_rate": 3.913771948806529e-06, - "loss": 0.7505, - "step": 22750 - }, - { - "epoch": 1.61, - "learning_rate": 3.900187258529986e-06, - "loss": 0.7535, - "step": 22760 - }, - { - "epoch": 1.61, - "learning_rate": 3.886623637848908e-06, - "loss": 0.7531, - "step": 22770 - }, - { - "epoch": 1.61, - "learning_rate": 3.8730811045138e-06, - "loss": 0.7439, - "step": 22780 - }, - { - "epoch": 1.61, - "learning_rate": 3.8595596762476e-06, - "loss": 0.7466, - "step": 22790 - }, - { - "epoch": 1.61, - "learning_rate": 3.846059370745585e-06, - "loss": 0.745, - "step": 22800 - }, - { - "epoch": 1.61, - "learning_rate": 3.832580205675431e-06, - "loss": 0.7564, - "step": 22810 - }, - { - "epoch": 1.61, - "learning_rate": 3.819122198677119e-06, - "loss": 0.7542, - "step": 22820 - }, - { - "epoch": 1.61, - "learning_rate": 3.805685367362957e-06, - "loss": 0.7505, - "step": 22830 - }, - { - "epoch": 1.61, - "learning_rate": 3.792269729317528e-06, - "loss": 0.7534, - "step": 22840 - }, - { - "epoch": 1.61, - "learning_rate": 3.7788753020976975e-06, - "loss": 0.7504, - "step": 22850 - }, - { - "epoch": 1.61, - "learning_rate": 3.7655021032325477e-06, - "loss": 0.7537, - "step": 22860 - }, - { - "epoch": 1.62, - "learning_rate": 3.7521501502234105e-06, - "loss": 0.7492, - "step": 22870 - }, - { - "epoch": 1.62, - "learning_rate": 3.7388194605437765e-06, - "loss": 0.755, - "step": 22880 - }, - { - "epoch": 1.62, - "learning_rate": 3.725510051639345e-06, - "loss": 0.7457, - "step": 22890 - }, - { - "epoch": 1.62, - "learning_rate": 3.712221940927938e-06, - "loss": 0.7531, - "step": 22900 - }, - { - "epoch": 1.62, - "learning_rate": 3.698955145799528e-06, - "loss": 0.7467, - "step": 22910 - }, - { - "epoch": 1.62, - "learning_rate": 3.6857096836161676e-06, - "loss": 0.7501, - "step": 22920 - }, - { - "epoch": 1.62, - "learning_rate": 3.6724855717120187e-06, - "loss": 0.744, - "step": 22930 - }, - { - "epoch": 1.62, - "learning_rate": 3.6592828273932756e-06, - "loss": 0.7464, - "step": 22940 - }, - { - "epoch": 1.62, - "learning_rate": 3.646101467938199e-06, - "loss": 0.7469, - "step": 22950 - }, - { - "epoch": 1.62, - "learning_rate": 3.632941510597021e-06, - "loss": 0.7519, - "step": 22960 - }, - { - "epoch": 1.62, - "learning_rate": 3.61980297259201e-06, - "loss": 0.7477, - "step": 22970 - }, - { - "epoch": 1.62, - "learning_rate": 3.6066858711173723e-06, - "loss": 0.7529, - "step": 22980 - }, - { - "epoch": 1.62, - "learning_rate": 3.593590223339283e-06, - "loss": 0.7442, - "step": 22990 - }, - { - "epoch": 1.62, - "learning_rate": 3.5805160463958145e-06, - "loss": 0.7517, - "step": 23000 - }, - { - "epoch": 1.63, - "learning_rate": 3.567463357396972e-06, - "loss": 0.7467, - "step": 23010 - }, - { - "epoch": 1.63, - "learning_rate": 3.5544321734246133e-06, - "loss": 0.7475, - "step": 23020 - }, - { - "epoch": 1.63, - "learning_rate": 3.541422511532466e-06, - "loss": 0.7469, - "step": 23030 - }, - { - "epoch": 1.63, - "learning_rate": 3.5284343887460846e-06, - "loss": 0.752, - "step": 23040 - }, - { - "epoch": 1.63, - "learning_rate": 3.5154678220628503e-06, - "loss": 0.7487, - "step": 23050 - }, - { - "epoch": 1.63, - "learning_rate": 3.502522828451913e-06, - "loss": 0.752, - "step": 23060 - }, - { - "epoch": 1.63, - "learning_rate": 3.4895994248542107e-06, - "loss": 0.7544, - "step": 23070 - }, - { - "epoch": 1.63, - "learning_rate": 3.4766976281824106e-06, - "loss": 0.7512, - "step": 23080 - }, - { - "epoch": 1.63, - "learning_rate": 3.4638174553209146e-06, - "loss": 0.7582, - "step": 23090 - }, - { - "epoch": 1.63, - "learning_rate": 3.4509589231258223e-06, - "loss": 0.7454, - "step": 23100 - }, - { - "epoch": 1.63, - "learning_rate": 3.438122048424908e-06, - "loss": 0.7476, - "step": 23110 - }, - { - "epoch": 1.63, - "learning_rate": 3.425306848017602e-06, - "loss": 0.7484, - "step": 23120 - }, - { - "epoch": 1.63, - "learning_rate": 3.4125133386749856e-06, - "loss": 0.7507, - "step": 23130 - }, - { - "epoch": 1.63, - "learning_rate": 3.3997415371397313e-06, - "loss": 0.7465, - "step": 23140 - }, - { - "epoch": 1.64, - "learning_rate": 3.3869914601261235e-06, - "loss": 0.7483, - "step": 23150 - }, - { - "epoch": 1.64, - "learning_rate": 3.3742631243199964e-06, - "loss": 0.744, - "step": 23160 - }, - { - "epoch": 1.64, - "learning_rate": 3.3615565463787523e-06, - "loss": 0.743, - "step": 23170 - }, - { - "epoch": 1.64, - "learning_rate": 3.3488717429313034e-06, - "loss": 0.744, - "step": 23180 - }, - { - "epoch": 1.64, - "learning_rate": 3.336208730578072e-06, - "loss": 0.7441, - "step": 23190 - }, - { - "epoch": 1.64, - "learning_rate": 3.323567525890954e-06, - "loss": 0.7523, - "step": 23200 - }, - { - "epoch": 1.64, - "learning_rate": 3.3109481454133262e-06, - "loss": 0.7553, - "step": 23210 - }, - { - "epoch": 1.64, - "learning_rate": 3.2983506056599834e-06, - "loss": 0.7473, - "step": 23220 - }, - { - "epoch": 1.64, - "learning_rate": 3.285774923117151e-06, - "loss": 0.7498, - "step": 23230 - }, - { - "epoch": 1.64, - "learning_rate": 3.273221114242442e-06, - "loss": 0.753, - "step": 23240 - }, - { - "epoch": 1.64, - "learning_rate": 3.2606891954648524e-06, - "loss": 0.7515, - "step": 23250 - }, - { - "epoch": 1.64, - "learning_rate": 3.248179183184723e-06, - "loss": 0.7489, - "step": 23260 - }, - { - "epoch": 1.64, - "learning_rate": 3.2356910937737273e-06, - "loss": 0.7497, - "step": 23270 - }, - { - "epoch": 1.64, - "learning_rate": 3.223224943574845e-06, - "loss": 0.7461, - "step": 23280 - }, - { - "epoch": 1.65, - "learning_rate": 3.21078074890236e-06, - "loss": 0.7511, - "step": 23290 - }, - { - "epoch": 1.65, - "learning_rate": 3.1983585260418026e-06, - "loss": 0.7473, - "step": 23300 - }, - { - "epoch": 1.65, - "learning_rate": 3.1859582912499663e-06, - "loss": 0.742, - "step": 23310 - }, - { - "epoch": 1.65, - "learning_rate": 3.173580060754857e-06, - "loss": 0.7606, - "step": 23320 - }, - { - "epoch": 1.65, - "learning_rate": 3.1612238507556925e-06, - "loss": 0.7481, - "step": 23330 - }, - { - "epoch": 1.65, - "learning_rate": 3.1488896774228682e-06, - "loss": 0.7473, - "step": 23340 - }, - { - "epoch": 1.65, - "learning_rate": 3.136577556897933e-06, - "loss": 0.753, - "step": 23350 - }, - { - "epoch": 1.65, - "learning_rate": 3.124287505293595e-06, - "loss": 0.7524, - "step": 23360 - }, - { - "epoch": 1.65, - "learning_rate": 3.112019538693665e-06, - "loss": 0.7407, - "step": 23370 - }, - { - "epoch": 1.65, - "learning_rate": 3.0997736731530504e-06, - "loss": 0.7406, - "step": 23380 - }, - { - "epoch": 1.65, - "learning_rate": 3.087549924697748e-06, - "loss": 0.7502, - "step": 23390 - }, - { - "epoch": 1.65, - "learning_rate": 3.0753483093247993e-06, - "loss": 0.7511, - "step": 23400 - }, - { - "epoch": 1.65, - "learning_rate": 3.0631688430022886e-06, - "loss": 0.7491, - "step": 23410 - }, - { - "epoch": 1.65, - "learning_rate": 3.051011541669309e-06, - "loss": 0.7446, - "step": 23420 - }, - { - "epoch": 1.66, - "learning_rate": 3.038876421235939e-06, - "loss": 0.7499, - "step": 23430 - }, - { - "epoch": 1.66, - "learning_rate": 3.0267634975832517e-06, - "loss": 0.7489, - "step": 23440 - }, - { - "epoch": 1.66, - "learning_rate": 3.014672786563244e-06, - "loss": 0.7445, - "step": 23450 - }, - { - "epoch": 1.66, - "learning_rate": 3.0026043039988707e-06, - "loss": 0.7436, - "step": 23460 - }, - { - "epoch": 1.66, - "learning_rate": 2.9905580656839728e-06, - "loss": 0.748, - "step": 23470 - }, - { - "epoch": 1.66, - "learning_rate": 2.9785340873832956e-06, - "loss": 0.7476, - "step": 23480 - }, - { - "epoch": 1.66, - "learning_rate": 2.966532384832441e-06, - "loss": 0.7489, - "step": 23490 - }, - { - "epoch": 1.66, - "learning_rate": 2.954552973737874e-06, - "loss": 0.7518, - "step": 23500 - }, - { - "epoch": 1.66, - "learning_rate": 2.942595869776874e-06, - "loss": 0.7474, - "step": 23510 - }, - { - "epoch": 1.66, - "learning_rate": 2.9306610885975394e-06, - "loss": 0.7465, - "step": 23520 - }, - { - "epoch": 1.66, - "learning_rate": 2.9187486458187385e-06, - "loss": 0.7451, - "step": 23530 - }, - { - "epoch": 1.66, - "learning_rate": 2.9068585570301256e-06, - "loss": 0.7474, - "step": 23540 - }, - { - "epoch": 1.66, - "learning_rate": 2.894990837792082e-06, - "loss": 0.7497, - "step": 23550 - }, - { - "epoch": 1.66, - "learning_rate": 2.8831455036357338e-06, - "loss": 0.7509, - "step": 23560 - }, - { - "epoch": 1.66, - "learning_rate": 2.8713225700628844e-06, - "loss": 0.7473, - "step": 23570 - }, - { - "epoch": 1.67, - "learning_rate": 2.859522052546051e-06, - "loss": 0.7461, - "step": 23580 - }, - { - "epoch": 1.67, - "learning_rate": 2.8477439665283956e-06, - "loss": 0.7453, - "step": 23590 - }, - { - "epoch": 1.67, - "learning_rate": 2.83598832742374e-06, - "loss": 0.7396, - "step": 23600 - }, - { - "epoch": 1.67, - "learning_rate": 2.8242551506165108e-06, - "loss": 0.7526, - "step": 23610 - }, - { - "epoch": 1.67, - "learning_rate": 2.8125444514617606e-06, - "loss": 0.7451, - "step": 23620 - }, - { - "epoch": 1.67, - "learning_rate": 2.8008562452851042e-06, - "loss": 0.7437, - "step": 23630 - }, - { - "epoch": 1.67, - "learning_rate": 2.789190547382743e-06, - "loss": 0.7421, - "step": 23640 - }, - { - "epoch": 1.67, - "learning_rate": 2.7775473730213942e-06, - "loss": 0.7453, - "step": 23650 - }, - { - "epoch": 1.67, - "learning_rate": 2.7659267374383247e-06, - "loss": 0.7381, - "step": 23660 - }, - { - "epoch": 1.67, - "learning_rate": 2.7543286558412874e-06, - "loss": 0.7502, - "step": 23670 - }, - { - "epoch": 1.67, - "learning_rate": 2.7427531434085365e-06, - "loss": 0.7403, - "step": 23680 - }, - { - "epoch": 1.67, - "learning_rate": 2.7312002152887673e-06, - "loss": 0.7411, - "step": 23690 - }, - { - "epoch": 1.67, - "learning_rate": 2.7196698866011437e-06, - "loss": 0.7453, - "step": 23700 - }, - { - "epoch": 1.67, - "learning_rate": 2.708162172435238e-06, - "loss": 0.7438, - "step": 23710 - }, - { - "epoch": 1.68, - "learning_rate": 2.6966770878510317e-06, - "loss": 0.7451, - "step": 23720 - }, - { - "epoch": 1.68, - "learning_rate": 2.6852146478788886e-06, - "loss": 0.7396, - "step": 23730 - }, - { - "epoch": 1.68, - "learning_rate": 2.673774867519545e-06, - "loss": 0.745, - "step": 23740 - }, - { - "epoch": 1.68, - "learning_rate": 2.662357761744072e-06, - "loss": 0.7442, - "step": 23750 - }, - { - "epoch": 1.68, - "learning_rate": 2.650963345493882e-06, - "loss": 0.7468, - "step": 23760 - }, - { - "epoch": 1.68, - "learning_rate": 2.6395916336806784e-06, - "loss": 0.752, - "step": 23770 - }, - { - "epoch": 1.68, - "learning_rate": 2.6282426411864625e-06, - "loss": 0.7467, - "step": 23780 - }, - { - "epoch": 1.68, - "learning_rate": 2.616916382863499e-06, - "loss": 0.7407, - "step": 23790 - }, - { - "epoch": 1.68, - "learning_rate": 2.605612873534298e-06, - "loss": 0.7452, - "step": 23800 - }, - { - "epoch": 1.68, - "learning_rate": 2.594332127991599e-06, - "loss": 0.7442, - "step": 23810 - }, - { - "epoch": 1.68, - "learning_rate": 2.5830741609983577e-06, - "loss": 0.7419, - "step": 23820 - }, - { - "epoch": 1.68, - "learning_rate": 2.5718389872877114e-06, - "loss": 0.7486, - "step": 23830 - }, - { - "epoch": 1.68, - "learning_rate": 2.5606266215629783e-06, - "loss": 0.7488, - "step": 23840 - }, - { - "epoch": 1.68, - "learning_rate": 2.5494370784976095e-06, - "loss": 0.7454, - "step": 23850 - }, - { - "epoch": 1.69, - "learning_rate": 2.5382703727352144e-06, - "loss": 0.7504, - "step": 23860 - }, - { - "epoch": 1.69, - "learning_rate": 2.5271265188894957e-06, - "loss": 0.7443, - "step": 23870 - }, - { - "epoch": 1.69, - "learning_rate": 2.5160055315442542e-06, - "loss": 0.749, - "step": 23880 - }, - { - "epoch": 1.69, - "learning_rate": 2.5049074252533667e-06, - "loss": 0.7426, - "step": 23890 - }, - { - "epoch": 1.69, - "learning_rate": 2.49383221454077e-06, - "loss": 0.7457, - "step": 23900 - }, - { - "epoch": 1.69, - "learning_rate": 2.482779913900433e-06, - "loss": 0.7461, - "step": 23910 - }, - { - "epoch": 1.69, - "learning_rate": 2.4717505377963467e-06, - "loss": 0.7436, - "step": 23920 - }, - { - "epoch": 1.69, - "learning_rate": 2.460744100662491e-06, - "loss": 0.7469, - "step": 23930 - }, - { - "epoch": 1.69, - "learning_rate": 2.4497606169028474e-06, - "loss": 0.7501, - "step": 23940 - }, - { - "epoch": 1.69, - "learning_rate": 2.4388001008913296e-06, - "loss": 0.7466, - "step": 23950 - }, - { - "epoch": 1.69, - "learning_rate": 2.427862566971817e-06, - "loss": 0.7483, - "step": 23960 - }, - { - "epoch": 1.69, - "learning_rate": 2.416948029458097e-06, - "loss": 0.7462, - "step": 23970 - }, - { - "epoch": 1.69, - "learning_rate": 2.40605650263388e-06, - "loss": 0.7428, - "step": 23980 - }, - { - "epoch": 1.69, - "learning_rate": 2.3951880007527417e-06, - "loss": 0.7382, - "step": 23990 - }, - { - "epoch": 1.7, - "learning_rate": 2.3843425380381446e-06, - "loss": 0.7405, - "step": 24000 - }, - { - "epoch": 1.7, - "learning_rate": 2.373520128683382e-06, - "loss": 0.7453, - "step": 24010 - }, - { - "epoch": 1.7, - "learning_rate": 2.3627207868515956e-06, - "loss": 0.745, - "step": 24020 - }, - { - "epoch": 1.7, - "learning_rate": 2.3519445266757267e-06, - "loss": 0.7444, - "step": 24030 - }, - { - "epoch": 1.7, - "learning_rate": 2.3411913622585127e-06, - "loss": 0.7512, - "step": 24040 - }, - { - "epoch": 1.7, - "learning_rate": 2.3304613076724623e-06, - "loss": 0.7424, - "step": 24050 - }, - { - "epoch": 1.7, - "learning_rate": 2.319754376959853e-06, - "loss": 0.7418, - "step": 24060 - }, - { - "epoch": 1.7, - "learning_rate": 2.3090705841326844e-06, - "loss": 0.7506, - "step": 24070 - }, - { - "epoch": 1.7, - "learning_rate": 2.2984099431726927e-06, - "loss": 0.7404, - "step": 24080 - }, - { - "epoch": 1.7, - "learning_rate": 2.2877724680312996e-06, - "loss": 0.7451, - "step": 24090 - }, - { - "epoch": 1.7, - "learning_rate": 2.277158172629621e-06, - "loss": 0.7411, - "step": 24100 - }, - { - "epoch": 1.7, - "learning_rate": 2.2665670708584364e-06, - "loss": 0.7466, - "step": 24110 - }, - { - "epoch": 1.7, - "learning_rate": 2.2559991765781606e-06, - "loss": 0.7421, - "step": 24120 - }, - { - "epoch": 1.7, - "learning_rate": 2.2454545036188556e-06, - "loss": 0.744, - "step": 24130 - }, - { - "epoch": 1.71, - "learning_rate": 2.2349330657801804e-06, - "loss": 0.742, - "step": 24140 - }, - { - "epoch": 1.71, - "learning_rate": 2.224434876831387e-06, - "loss": 0.7427, - "step": 24150 - }, - { - "epoch": 1.71, - "learning_rate": 2.2139599505113153e-06, - "loss": 0.7437, - "step": 24160 - }, - { - "epoch": 1.71, - "learning_rate": 2.203508300528341e-06, - "loss": 0.7487, - "step": 24170 - }, - { - "epoch": 1.71, - "learning_rate": 2.1930799405604007e-06, - "loss": 0.7491, - "step": 24180 - }, - { - "epoch": 1.71, - "learning_rate": 2.182674884254934e-06, - "loss": 0.7418, - "step": 24190 - }, - { - "epoch": 1.71, - "learning_rate": 2.1722931452288877e-06, - "loss": 0.735, - "step": 24200 - }, - { - "epoch": 1.71, - "learning_rate": 2.1619347370687004e-06, - "loss": 0.7446, - "step": 24210 - }, - { - "epoch": 1.71, - "learning_rate": 2.151599673330269e-06, - "loss": 0.7482, - "step": 24220 - }, - { - "epoch": 1.71, - "learning_rate": 2.14128796753895e-06, - "loss": 0.7475, - "step": 24230 - }, - { - "epoch": 1.71, - "learning_rate": 2.1309996331895235e-06, - "loss": 0.7507, - "step": 24240 - }, - { - "epoch": 1.71, - "learning_rate": 2.120734683746184e-06, - "loss": 0.7439, - "step": 24250 - }, - { - "epoch": 1.71, - "learning_rate": 2.1104931326425194e-06, - "loss": 0.7481, - "step": 24260 - }, - { - "epoch": 1.71, - "learning_rate": 2.1002749932815146e-06, - "loss": 0.7429, - "step": 24270 - }, - { - "epoch": 1.72, - "learning_rate": 2.0900802790354914e-06, - "loss": 0.7458, - "step": 24280 - }, - { - "epoch": 1.72, - "learning_rate": 2.0799090032461346e-06, - "loss": 0.752, - "step": 24290 - }, - { - "epoch": 1.72, - "learning_rate": 2.069761179224441e-06, - "loss": 0.7465, - "step": 24300 - }, - { - "epoch": 1.72, - "learning_rate": 2.05963682025073e-06, - "loss": 0.7504, - "step": 24310 - }, - { - "epoch": 1.72, - "learning_rate": 2.0495359395746005e-06, - "loss": 0.7482, - "step": 24320 - }, - { - "epoch": 1.72, - "learning_rate": 2.039458550414941e-06, - "loss": 0.7421, - "step": 24330 - }, - { - "epoch": 1.72, - "learning_rate": 2.029404665959871e-06, - "loss": 0.7472, - "step": 24340 - }, - { - "epoch": 1.72, - "learning_rate": 2.0193742993667807e-06, - "loss": 0.7486, - "step": 24350 - }, - { - "epoch": 1.72, - "learning_rate": 2.009367463762255e-06, - "loss": 0.7429, - "step": 24360 - }, - { - "epoch": 1.72, - "learning_rate": 1.999384172242109e-06, - "loss": 0.7438, - "step": 24370 - }, - { - "epoch": 1.72, - "learning_rate": 1.9894244378713233e-06, - "loss": 0.7448, - "step": 24380 - }, - { - "epoch": 1.72, - "learning_rate": 1.9794882736840714e-06, - "loss": 0.7365, - "step": 24390 - }, - { - "epoch": 1.72, - "learning_rate": 1.9695756926836586e-06, - "loss": 0.7396, - "step": 24400 - }, - { - "epoch": 1.72, - "learning_rate": 1.959686707842554e-06, - "loss": 0.7328, - "step": 24410 - }, - { - "epoch": 1.72, - "learning_rate": 1.949821332102311e-06, - "loss": 0.7504, - "step": 24420 - }, - { - "epoch": 1.73, - "learning_rate": 1.9399795783736185e-06, - "loss": 0.742, - "step": 24430 - }, - { - "epoch": 1.73, - "learning_rate": 1.930161459536235e-06, - "loss": 0.7398, - "step": 24440 - }, - { - "epoch": 1.73, - "learning_rate": 1.9203669884389998e-06, - "loss": 0.7429, - "step": 24450 - }, - { - "epoch": 1.73, - "learning_rate": 1.910596177899786e-06, - "loss": 0.7433, - "step": 24460 - }, - { - "epoch": 1.73, - "learning_rate": 1.9008490407055258e-06, - "loss": 0.7433, - "step": 24470 - }, - { - "epoch": 1.73, - "learning_rate": 1.8911255896121528e-06, - "loss": 0.7407, - "step": 24480 - }, - { - "epoch": 1.73, - "learning_rate": 1.881425837344608e-06, - "loss": 0.7443, - "step": 24490 - }, - { - "epoch": 1.73, - "learning_rate": 1.87174979659682e-06, - "loss": 0.7432, - "step": 24500 - }, - { - "epoch": 1.73, - "learning_rate": 1.862097480031686e-06, - "loss": 0.7397, - "step": 24510 - }, - { - "epoch": 1.73, - "learning_rate": 1.8524689002810547e-06, - "loss": 0.747, - "step": 24520 - }, - { - "epoch": 1.73, - "learning_rate": 1.8428640699457135e-06, - "loss": 0.7472, - "step": 24530 - }, - { - "epoch": 1.73, - "learning_rate": 1.8332830015953606e-06, - "loss": 0.7493, - "step": 24540 - }, - { - "epoch": 1.73, - "learning_rate": 1.823725707768611e-06, - "loss": 0.7451, - "step": 24550 - }, - { - "epoch": 1.73, - "learning_rate": 1.8141922009729573e-06, - "loss": 0.7415, - "step": 24560 - }, - { - "epoch": 1.74, - "learning_rate": 1.8046824936847617e-06, - "loss": 0.7415, - "step": 24570 - }, - { - "epoch": 1.74, - "learning_rate": 1.7951965983492403e-06, - "loss": 0.7422, - "step": 24580 - }, - { - "epoch": 1.74, - "learning_rate": 1.7857345273804538e-06, - "loss": 0.7478, - "step": 24590 - }, - { - "epoch": 1.74, - "learning_rate": 1.776296293161277e-06, - "loss": 0.7417, - "step": 24600 - }, - { - "epoch": 1.74, - "learning_rate": 1.7668819080433962e-06, - "loss": 0.7413, - "step": 24610 - }, - { - "epoch": 1.74, - "learning_rate": 1.7574913843472763e-06, - "loss": 0.746, - "step": 24620 - }, - { - "epoch": 1.74, - "learning_rate": 1.7481247343621688e-06, - "loss": 0.7476, - "step": 24630 - }, - { - "epoch": 1.74, - "learning_rate": 1.738781970346073e-06, - "loss": 0.7466, - "step": 24640 - }, - { - "epoch": 1.74, - "learning_rate": 1.7294631045257283e-06, - "loss": 0.7496, - "step": 24650 - }, - { - "epoch": 1.74, - "learning_rate": 1.7201681490966016e-06, - "loss": 0.7462, - "step": 24660 - }, - { - "epoch": 1.74, - "learning_rate": 1.7108971162228716e-06, - "loss": 0.7426, - "step": 24670 - }, - { - "epoch": 1.74, - "learning_rate": 1.701650018037404e-06, - "loss": 0.7436, - "step": 24680 - }, - { - "epoch": 1.74, - "learning_rate": 1.6924268666417498e-06, - "loss": 0.7473, - "step": 24690 - }, - { - "epoch": 1.74, - "learning_rate": 1.683227674106107e-06, - "loss": 0.7454, - "step": 24700 - }, - { - "epoch": 1.75, - "learning_rate": 1.6740524524693413e-06, - "loss": 0.7382, - "step": 24710 - }, - { - "epoch": 1.75, - "learning_rate": 1.6649012137389164e-06, - "loss": 0.7424, - "step": 24720 - }, - { - "epoch": 1.75, - "learning_rate": 1.6557739698909436e-06, - "loss": 0.739, - "step": 24730 - }, - { - "epoch": 1.75, - "learning_rate": 1.6466707328701059e-06, - "loss": 0.7401, - "step": 24740 - }, - { - "epoch": 1.75, - "learning_rate": 1.6375915145896871e-06, - "loss": 0.7431, - "step": 24750 - }, - { - "epoch": 1.75, - "learning_rate": 1.6285363269315247e-06, - "loss": 0.7389, - "step": 24760 - }, - { - "epoch": 1.75, - "learning_rate": 1.6195051817460217e-06, - "loss": 0.7445, - "step": 24770 - }, - { - "epoch": 1.75, - "learning_rate": 1.6104980908521017e-06, - "loss": 0.744, - "step": 24780 - }, - { - "epoch": 1.75, - "learning_rate": 1.6015150660372225e-06, - "loss": 0.7454, - "step": 24790 - }, - { - "epoch": 1.75, - "learning_rate": 1.592556119057338e-06, - "loss": 0.7467, - "step": 24800 - }, - { - "epoch": 1.75, - "learning_rate": 1.5836212616368918e-06, - "loss": 0.751, - "step": 24810 - }, - { - "epoch": 1.75, - "learning_rate": 1.574710505468804e-06, - "loss": 0.7426, - "step": 24820 - }, - { - "epoch": 1.75, - "learning_rate": 1.5658238622144595e-06, - "loss": 0.7419, - "step": 24830 - }, - { - "epoch": 1.75, - "learning_rate": 1.5569613435036756e-06, - "loss": 0.7385, - "step": 24840 - }, - { - "epoch": 1.76, - "learning_rate": 1.5481229609347103e-06, - "loss": 0.7417, - "step": 24850 - }, - { - "epoch": 1.76, - "learning_rate": 1.5393087260742222e-06, - "loss": 0.7505, - "step": 24860 - }, - { - "epoch": 1.76, - "learning_rate": 1.5305186504572823e-06, - "loss": 0.7433, - "step": 24870 - }, - { - "epoch": 1.76, - "learning_rate": 1.5217527455873326e-06, - "loss": 0.7432, - "step": 24880 - }, - { - "epoch": 1.76, - "learning_rate": 1.5130110229361884e-06, - "loss": 0.7461, - "step": 24890 - }, - { - "epoch": 1.76, - "learning_rate": 1.504293493944018e-06, - "loss": 0.7399, - "step": 24900 - }, - { - "epoch": 1.76, - "learning_rate": 1.495600170019329e-06, - "loss": 0.744, - "step": 24910 - }, - { - "epoch": 1.76, - "learning_rate": 1.4869310625389433e-06, - "loss": 0.744, - "step": 24920 - }, - { - "epoch": 1.76, - "learning_rate": 1.47828618284801e-06, - "loss": 0.7471, - "step": 24930 - }, - { - "epoch": 1.76, - "learning_rate": 1.4696655422599525e-06, - "loss": 0.7407, - "step": 24940 - }, - { - "epoch": 1.76, - "learning_rate": 1.461069152056478e-06, - "loss": 0.7421, - "step": 24950 - }, - { - "epoch": 1.76, - "learning_rate": 1.4524970234875667e-06, - "loss": 0.7397, - "step": 24960 - }, - { - "epoch": 1.76, - "learning_rate": 1.4439491677714323e-06, - "loss": 0.7438, - "step": 24970 - }, - { - "epoch": 1.76, - "learning_rate": 1.435425596094544e-06, - "loss": 0.7448, - "step": 24980 - }, - { - "epoch": 1.77, - "learning_rate": 1.4269263196115657e-06, - "loss": 0.7468, - "step": 24990 - }, - { - "epoch": 1.77, - "learning_rate": 1.4184513494453888e-06, - "loss": 0.7461, - "step": 25000 - }, - { - "epoch": 1.77, - "learning_rate": 1.410000696687084e-06, - "loss": 0.7434, - "step": 25010 - }, - { - "epoch": 1.77, - "learning_rate": 1.4015743723958975e-06, - "loss": 0.7494, - "step": 25020 - }, - { - "epoch": 1.77, - "learning_rate": 1.3931723875992376e-06, - "loss": 0.7451, - "step": 25030 - }, - { - "epoch": 1.77, - "learning_rate": 1.3847947532926687e-06, - "loss": 0.7415, - "step": 25040 - }, - { - "epoch": 1.77, - "learning_rate": 1.3764414804398763e-06, - "loss": 0.7453, - "step": 25050 - }, - { - "epoch": 1.77, - "learning_rate": 1.3681125799726736e-06, - "loss": 0.7397, - "step": 25060 - }, - { - "epoch": 1.77, - "learning_rate": 1.3598080627909682e-06, - "loss": 0.7449, - "step": 25070 - }, - { - "epoch": 1.77, - "learning_rate": 1.351527939762769e-06, - "loss": 0.7433, - "step": 25080 - }, - { - "epoch": 1.77, - "learning_rate": 1.3432722217241501e-06, - "loss": 0.7437, - "step": 25090 - }, - { - "epoch": 1.77, - "learning_rate": 1.3350409194792625e-06, - "loss": 0.7451, - "step": 25100 - }, - { - "epoch": 1.77, - "learning_rate": 1.326834043800278e-06, - "loss": 0.7513, - "step": 25110 - }, - { - "epoch": 1.77, - "learning_rate": 1.31865160542743e-06, - "loss": 0.7445, - "step": 25120 - }, - { - "epoch": 1.78, - "learning_rate": 1.310493615068953e-06, - "loss": 0.7422, - "step": 25130 - }, - { - "epoch": 1.78, - "learning_rate": 1.3023600834010974e-06, - "loss": 0.7436, - "step": 25140 - }, - { - "epoch": 1.78, - "learning_rate": 1.2942510210680935e-06, - "loss": 0.738, - "step": 25150 - }, - { - "epoch": 1.78, - "learning_rate": 1.2861664386821638e-06, - "loss": 0.7458, - "step": 25160 - }, - { - "epoch": 1.78, - "learning_rate": 1.2781063468234823e-06, - "loss": 0.7391, - "step": 25170 - }, - { - "epoch": 1.78, - "learning_rate": 1.270070756040176e-06, - "loss": 0.7378, - "step": 25180 - }, - { - "epoch": 1.78, - "learning_rate": 1.262059676848304e-06, - "loss": 0.738, - "step": 25190 - }, - { - "epoch": 1.78, - "learning_rate": 1.2540731197318578e-06, - "loss": 0.7463, - "step": 25200 - }, - { - "epoch": 1.78, - "learning_rate": 1.2461110951427258e-06, - "loss": 0.7358, - "step": 25210 - }, - { - "epoch": 1.78, - "learning_rate": 1.2381736135007038e-06, - "loss": 0.7372, - "step": 25220 - }, - { - "epoch": 1.78, - "learning_rate": 1.2302606851934518e-06, - "loss": 0.7424, - "step": 25230 - }, - { - "epoch": 1.78, - "learning_rate": 1.2223723205765125e-06, - "loss": 0.7378, - "step": 25240 - }, - { - "epoch": 1.78, - "learning_rate": 1.214508529973275e-06, - "loss": 0.7418, - "step": 25250 - }, - { - "epoch": 1.78, - "learning_rate": 1.2066693236749671e-06, - "loss": 0.7413, - "step": 25260 - }, - { - "epoch": 1.78, - "learning_rate": 1.1988547119406468e-06, - "loss": 0.7428, - "step": 25270 - }, - { - "epoch": 1.79, - "learning_rate": 1.1910647049971913e-06, - "loss": 0.7375, - "step": 25280 - }, - { - "epoch": 1.79, - "learning_rate": 1.1832993130392612e-06, - "loss": 0.7435, - "step": 25290 - }, - { - "epoch": 1.79, - "learning_rate": 1.175558546229325e-06, - "loss": 0.7422, - "step": 25300 - }, - { - "epoch": 1.79, - "learning_rate": 1.1678424146976064e-06, - "loss": 0.7391, - "step": 25310 - }, - { - "epoch": 1.79, - "learning_rate": 1.1601509285421009e-06, - "loss": 0.7358, - "step": 25320 - }, - { - "epoch": 1.79, - "learning_rate": 1.1524840978285456e-06, - "loss": 0.7418, - "step": 25330 - }, - { - "epoch": 1.79, - "learning_rate": 1.1448419325904102e-06, - "loss": 0.7392, - "step": 25340 - }, - { - "epoch": 1.79, - "learning_rate": 1.1372244428288904e-06, - "loss": 0.7473, - "step": 25350 - }, - { - "epoch": 1.79, - "learning_rate": 1.1296316385128847e-06, - "loss": 0.7468, - "step": 25360 - }, - { - "epoch": 1.79, - "learning_rate": 1.1220635295789872e-06, - "loss": 0.7424, - "step": 25370 - }, - { - "epoch": 1.79, - "learning_rate": 1.1145201259314797e-06, - "loss": 0.7498, - "step": 25380 - }, - { - "epoch": 1.79, - "learning_rate": 1.1070014374422988e-06, - "loss": 0.7421, - "step": 25390 - }, - { - "epoch": 1.79, - "learning_rate": 1.099507473951056e-06, - "loss": 0.7492, - "step": 25400 - }, - { - "epoch": 1.79, - "learning_rate": 1.0920382452649814e-06, - "loss": 0.746, - "step": 25410 - }, - { - "epoch": 1.8, - "learning_rate": 1.0845937611589564e-06, - "loss": 0.7377, - "step": 25420 - }, - { - "epoch": 1.8, - "learning_rate": 1.0771740313754674e-06, - "loss": 0.7419, - "step": 25430 - }, - { - "epoch": 1.8, - "learning_rate": 1.0697790656246121e-06, - "loss": 0.7442, - "step": 25440 - }, - { - "epoch": 1.8, - "learning_rate": 1.0624088735840755e-06, - "loss": 0.7429, - "step": 25450 - }, - { - "epoch": 1.8, - "learning_rate": 1.055063464899122e-06, - "loss": 0.737, - "step": 25460 - }, - { - "epoch": 1.8, - "learning_rate": 1.0477428491825848e-06, - "loss": 0.7399, - "step": 25470 - }, - { - "epoch": 1.8, - "learning_rate": 1.0404470360148533e-06, - "loss": 0.7374, - "step": 25480 - }, - { - "epoch": 1.8, - "learning_rate": 1.0331760349438435e-06, - "loss": 0.7455, - "step": 25490 - }, - { - "epoch": 1.8, - "learning_rate": 1.0259298554850195e-06, - "loss": 0.7437, - "step": 25500 - }, - { - "epoch": 1.8, - "learning_rate": 1.0187085071213465e-06, - "loss": 0.7442, - "step": 25510 - }, - { - "epoch": 1.8, - "learning_rate": 1.0115119993033096e-06, - "loss": 0.744, - "step": 25520 - }, - { - "epoch": 1.8, - "learning_rate": 1.0043403414488662e-06, - "loss": 0.7413, - "step": 25530 - }, - { - "epoch": 1.8, - "learning_rate": 9.971935429434709e-07, - "loss": 0.7419, - "step": 25540 - }, - { - "epoch": 1.8, - "learning_rate": 9.90071613140029e-07, - "loss": 0.7504, - "step": 25550 - }, - { - "epoch": 1.81, - "learning_rate": 9.82974561358918e-07, - "loss": 0.7532, - "step": 25560 - }, - { - "epoch": 1.81, - "learning_rate": 9.7590239688794e-07, - "loss": 0.7443, - "step": 25570 - }, - { - "epoch": 1.81, - "learning_rate": 9.688551289823422e-07, - "loss": 0.7488, - "step": 25580 - }, - { - "epoch": 1.81, - "learning_rate": 9.618327668647765e-07, - "loss": 0.7493, - "step": 25590 - }, - { - "epoch": 1.81, - "learning_rate": 9.548353197253135e-07, - "loss": 0.7431, - "step": 25600 - }, - { - "epoch": 1.81, - "learning_rate": 9.478627967214104e-07, - "loss": 0.7412, - "step": 25610 - }, - { - "epoch": 1.81, - "learning_rate": 9.409152069779104e-07, - "loss": 0.7416, - "step": 25620 - }, - { - "epoch": 1.81, - "learning_rate": 9.339925595870225e-07, - "loss": 0.743, - "step": 25630 - }, - { - "epoch": 1.81, - "learning_rate": 9.270948636083221e-07, - "loss": 0.7482, - "step": 25640 - }, - { - "epoch": 1.81, - "learning_rate": 9.202221280687196e-07, - "loss": 0.7479, - "step": 25650 - }, - { - "epoch": 1.81, - "learning_rate": 9.133743619624669e-07, - "loss": 0.74, - "step": 25660 - }, - { - "epoch": 1.81, - "learning_rate": 9.065515742511421e-07, - "loss": 0.7431, - "step": 25670 - }, - { - "epoch": 1.81, - "learning_rate": 8.997537738636275e-07, - "loss": 0.7466, - "step": 25680 - }, - { - "epoch": 1.81, - "learning_rate": 8.929809696961044e-07, - "loss": 0.7428, - "step": 25690 - }, - { - "epoch": 1.82, - "learning_rate": 8.862331706120497e-07, - "loss": 0.7492, - "step": 25700 - }, - { - "epoch": 1.82, - "learning_rate": 8.795103854422127e-07, - "loss": 0.7444, - "step": 25710 - }, - { - "epoch": 1.82, - "learning_rate": 8.728126229846001e-07, - "loss": 0.7422, - "step": 25720 - }, - { - "epoch": 1.82, - "learning_rate": 8.66139892004485e-07, - "loss": 0.7455, - "step": 25730 - }, - { - "epoch": 1.82, - "learning_rate": 8.594922012343709e-07, - "loss": 0.7393, - "step": 25740 - }, - { - "epoch": 1.82, - "learning_rate": 8.528695593739988e-07, - "loss": 0.7382, - "step": 25750 - }, - { - "epoch": 1.82, - "learning_rate": 8.462719750903225e-07, - "loss": 0.7391, - "step": 25760 - }, - { - "epoch": 1.82, - "learning_rate": 8.396994570175132e-07, - "loss": 0.7367, - "step": 25770 - }, - { - "epoch": 1.82, - "learning_rate": 8.331520137569238e-07, - "loss": 0.7451, - "step": 25780 - }, - { - "epoch": 1.82, - "learning_rate": 8.266296538771046e-07, - "loss": 0.7361, - "step": 25790 - }, - { - "epoch": 1.82, - "learning_rate": 8.201323859137722e-07, - "loss": 0.7364, - "step": 25800 - }, - { - "epoch": 1.82, - "learning_rate": 8.136602183698094e-07, - "loss": 0.7475, - "step": 25810 - }, - { - "epoch": 1.82, - "learning_rate": 8.072131597152499e-07, - "loss": 0.7416, - "step": 25820 - }, - { - "epoch": 1.82, - "learning_rate": 8.007912183872712e-07, - "loss": 0.7433, - "step": 25830 - }, - { - "epoch": 1.83, - "learning_rate": 7.943944027901684e-07, - "loss": 0.7406, - "step": 25840 - }, - { - "epoch": 1.83, - "learning_rate": 7.880227212953717e-07, - "loss": 0.7435, - "step": 25850 - }, - { - "epoch": 1.83, - "learning_rate": 7.81676182241402e-07, - "loss": 0.7407, - "step": 25860 - }, - { - "epoch": 1.83, - "learning_rate": 7.753547939338912e-07, - "loss": 0.7368, - "step": 25870 - }, - { - "epoch": 1.83, - "learning_rate": 7.690585646455439e-07, - "loss": 0.7374, - "step": 25880 - }, - { - "epoch": 1.83, - "learning_rate": 7.627875026161514e-07, - "loss": 0.7376, - "step": 25890 - }, - { - "epoch": 1.83, - "learning_rate": 7.565416160525596e-07, - "loss": 0.7456, - "step": 25900 - }, - { - "epoch": 1.83, - "learning_rate": 7.503209131286727e-07, - "loss": 0.7358, - "step": 25910 - }, - { - "epoch": 1.83, - "learning_rate": 7.441254019854316e-07, - "loss": 0.7419, - "step": 25920 - }, - { - "epoch": 1.83, - "learning_rate": 7.379550907308219e-07, - "loss": 0.7456, - "step": 25930 - }, - { - "epoch": 1.83, - "learning_rate": 7.318099874398355e-07, - "loss": 0.7345, - "step": 25940 - }, - { - "epoch": 1.83, - "learning_rate": 7.256901001544836e-07, - "loss": 0.737, - "step": 25950 - }, - { - "epoch": 1.83, - "learning_rate": 7.195954368837732e-07, - "loss": 0.7425, - "step": 25960 - }, - { - "epoch": 1.83, - "learning_rate": 7.135260056037086e-07, - "loss": 0.733, - "step": 25970 - }, - { - "epoch": 1.84, - "learning_rate": 7.074818142572604e-07, - "loss": 0.7378, - "step": 25980 - }, - { - "epoch": 1.84, - "learning_rate": 7.014628707543836e-07, - "loss": 0.7504, - "step": 25990 - }, - { - "epoch": 1.84, - "learning_rate": 6.954691829719773e-07, - "loss": 0.7406, - "step": 26000 - }, - { - "epoch": 1.84, - "learning_rate": 6.895007587538982e-07, - "loss": 0.7431, - "step": 26010 - }, - { - "epoch": 1.84, - "learning_rate": 6.835576059109406e-07, - "loss": 0.7386, - "step": 26020 - }, - { - "epoch": 1.84, - "learning_rate": 6.776397322208205e-07, - "loss": 0.7413, - "step": 26030 - }, - { - "epoch": 1.84, - "learning_rate": 6.717471454281721e-07, - "loss": 0.7458, - "step": 26040 - }, - { - "epoch": 1.84, - "learning_rate": 6.658798532445465e-07, - "loss": 0.7445, - "step": 26050 - }, - { - "epoch": 1.84, - "learning_rate": 6.600378633483795e-07, - "loss": 0.736, - "step": 26060 - }, - { - "epoch": 1.84, - "learning_rate": 6.542211833850043e-07, - "loss": 0.7434, - "step": 26070 - }, - { - "epoch": 1.84, - "learning_rate": 6.484298209666229e-07, - "loss": 0.7394, - "step": 26080 - }, - { - "epoch": 1.84, - "learning_rate": 6.426637836723126e-07, - "loss": 0.738, - "step": 26090 - }, - { - "epoch": 1.84, - "learning_rate": 6.369230790480041e-07, - "loss": 0.735, - "step": 26100 - }, - { - "epoch": 1.84, - "learning_rate": 6.312077146064743e-07, - "loss": 0.7378, - "step": 26110 - }, - { - "epoch": 1.85, - "learning_rate": 6.255176978273358e-07, - "loss": 0.7453, - "step": 26120 - }, - { - "epoch": 1.85, - "learning_rate": 6.198530361570387e-07, - "loss": 0.7322, - "step": 26130 - }, - { - "epoch": 1.85, - "learning_rate": 6.142137370088397e-07, - "loss": 0.7367, - "step": 26140 - }, - { - "epoch": 1.85, - "learning_rate": 6.085998077628152e-07, - "loss": 0.7477, - "step": 26150 - }, - { - "epoch": 1.85, - "learning_rate": 6.030112557658264e-07, - "loss": 0.738, - "step": 26160 - }, - { - "epoch": 1.85, - "learning_rate": 5.974480883315425e-07, - "loss": 0.7499, - "step": 26170 - }, - { - "epoch": 1.85, - "learning_rate": 5.919103127403891e-07, - "loss": 0.745, - "step": 26180 - }, - { - "epoch": 1.85, - "learning_rate": 5.863979362395844e-07, - "loss": 0.746, - "step": 26190 - }, - { - "epoch": 1.85, - "learning_rate": 5.80910966043089e-07, - "loss": 0.7375, - "step": 26200 - }, - { - "epoch": 1.85, - "learning_rate": 5.754494093316276e-07, - "loss": 0.7428, - "step": 26210 - }, - { - "epoch": 1.85, - "learning_rate": 5.700132732526586e-07, - "loss": 0.7325, - "step": 26220 - }, - { - "epoch": 1.85, - "learning_rate": 5.646025649203801e-07, - "loss": 0.7417, - "step": 26230 - }, - { - "epoch": 1.85, - "learning_rate": 5.592172914157057e-07, - "loss": 0.7385, - "step": 26240 - }, - { - "epoch": 1.85, - "learning_rate": 5.538574597862689e-07, - "loss": 0.7347, - "step": 26250 - }, - { - "epoch": 1.85, - "learning_rate": 5.48523077046399e-07, - "loss": 0.7477, - "step": 26260 - }, - { - "epoch": 1.86, - "learning_rate": 5.432141501771316e-07, - "loss": 0.7356, - "step": 26270 - }, - { - "epoch": 1.86, - "learning_rate": 5.379306861261824e-07, - "loss": 0.7392, - "step": 26280 - }, - { - "epoch": 1.86, - "learning_rate": 5.326726918079472e-07, - "loss": 0.7339, - "step": 26290 - }, - { - "epoch": 1.86, - "learning_rate": 5.27440174103484e-07, - "loss": 0.7443, - "step": 26300 - }, - { - "epoch": 1.86, - "learning_rate": 5.222331398605174e-07, - "loss": 0.7421, - "step": 26310 - }, - { - "epoch": 1.86, - "learning_rate": 5.170515958934185e-07, - "loss": 0.7405, - "step": 26320 - }, - { - "epoch": 1.86, - "learning_rate": 5.118955489832012e-07, - "loss": 0.7418, - "step": 26330 - }, - { - "epoch": 1.86, - "learning_rate": 5.067650058775076e-07, - "loss": 0.7438, - "step": 26340 - }, - { - "epoch": 1.86, - "learning_rate": 5.016599732906091e-07, - "loss": 0.7483, - "step": 26350 - }, - { - "epoch": 1.86, - "learning_rate": 4.965804579033861e-07, - "loss": 0.7471, - "step": 26360 - }, - { - "epoch": 1.86, - "learning_rate": 4.915264663633301e-07, - "loss": 0.7394, - "step": 26370 - }, - { - "epoch": 1.86, - "learning_rate": 4.864980052845281e-07, - "loss": 0.7347, - "step": 26380 - }, - { - "epoch": 1.86, - "learning_rate": 4.814950812476559e-07, - "loss": 0.7383, - "step": 26390 - }, - { - "epoch": 1.86, - "learning_rate": 4.765177007999677e-07, - "loss": 0.7392, - "step": 26400 - }, - { - "epoch": 1.87, - "learning_rate": 4.7156587045528834e-07, - "loss": 0.7341, - "step": 26410 - }, - { - "epoch": 1.87, - "learning_rate": 4.666395966940096e-07, - "loss": 0.7451, - "step": 26420 - }, - { - "epoch": 1.87, - "learning_rate": 4.617388859630767e-07, - "loss": 0.7451, - "step": 26430 - }, - { - "epoch": 1.87, - "learning_rate": 4.5686374467597936e-07, - "loss": 0.7447, - "step": 26440 - }, - { - "epoch": 1.87, - "learning_rate": 4.5201417921274524e-07, - "loss": 0.7386, - "step": 26450 - }, - { - "epoch": 1.87, - "learning_rate": 4.47190195919931e-07, - "loss": 0.7451, - "step": 26460 - }, - { - "epoch": 1.87, - "learning_rate": 4.4239180111061763e-07, - "loss": 0.7415, - "step": 26470 - }, - { - "epoch": 1.87, - "learning_rate": 4.37619001064391e-07, - "loss": 0.7482, - "step": 26480 - }, - { - "epoch": 1.87, - "learning_rate": 4.3287180202735033e-07, - "loss": 0.7386, - "step": 26490 - }, - { - "epoch": 1.87, - "learning_rate": 4.2815021021208604e-07, - "loss": 0.7418, - "step": 26500 - }, - { - "epoch": 1.87, - "learning_rate": 4.234542317976753e-07, - "loss": 0.749, - "step": 26510 - }, - { - "epoch": 1.87, - "learning_rate": 4.187838729296845e-07, - "loss": 0.7392, - "step": 26520 - }, - { - "epoch": 1.87, - "learning_rate": 4.141391397201422e-07, - "loss": 0.7388, - "step": 26530 - }, - { - "epoch": 1.87, - "learning_rate": 4.0952003824754396e-07, - "loss": 0.7429, - "step": 26540 - }, - { - "epoch": 1.88, - "learning_rate": 4.0492657455684314e-07, - "loss": 0.7443, - "step": 26550 - }, - { - "epoch": 1.88, - "learning_rate": 4.003587546594401e-07, - "loss": 0.7467, - "step": 26560 - }, - { - "epoch": 1.88, - "learning_rate": 3.9581658453317294e-07, - "loss": 0.7502, - "step": 26570 - }, - { - "epoch": 1.88, - "learning_rate": 3.913000701223202e-07, - "loss": 0.7382, - "step": 26580 - }, - { - "epoch": 1.88, - "learning_rate": 3.8680921733757373e-07, - "loss": 0.7415, - "step": 26590 - }, - { - "epoch": 1.88, - "learning_rate": 3.823440320560545e-07, - "loss": 0.7477, - "step": 26600 - }, - { - "epoch": 1.88, - "learning_rate": 3.7790452012128167e-07, - "loss": 0.7433, - "step": 26610 - }, - { - "epoch": 1.88, - "learning_rate": 3.734906873431876e-07, - "loss": 0.7484, - "step": 26620 - }, - { - "epoch": 1.88, - "learning_rate": 3.6910253949808517e-07, - "loss": 0.7384, - "step": 26630 - }, - { - "epoch": 1.88, - "learning_rate": 3.6474008232868727e-07, - "loss": 0.7437, - "step": 26640 - }, - { - "epoch": 1.88, - "learning_rate": 3.604033215440739e-07, - "loss": 0.7392, - "step": 26650 - }, - { - "epoch": 1.88, - "learning_rate": 3.5609226281970497e-07, - "loss": 0.739, - "step": 26660 - }, - { - "epoch": 1.88, - "learning_rate": 3.51806911797401e-07, - "loss": 0.7391, - "step": 26670 - }, - { - "epoch": 1.88, - "learning_rate": 3.475472740853403e-07, - "loss": 0.7352, - "step": 26680 - }, - { - "epoch": 1.89, - "learning_rate": 3.433133552580503e-07, - "loss": 0.7362, - "step": 26690 - }, - { - "epoch": 1.89, - "learning_rate": 3.391051608563989e-07, - "loss": 0.7474, - "step": 26700 - }, - { - "epoch": 1.89, - "learning_rate": 3.3492269638759176e-07, - "loss": 0.7354, - "step": 26710 - }, - { - "epoch": 1.89, - "learning_rate": 3.307659673251595e-07, - "loss": 0.7399, - "step": 26720 - }, - { - "epoch": 1.89, - "learning_rate": 3.266349791089529e-07, - "loss": 0.7369, - "step": 26730 - }, - { - "epoch": 1.89, - "learning_rate": 3.225297371451408e-07, - "loss": 0.7404, - "step": 26740 - }, - { - "epoch": 1.89, - "learning_rate": 3.1845024680619007e-07, - "loss": 0.7451, - "step": 26750 - }, - { - "epoch": 1.89, - "learning_rate": 3.1439651343087683e-07, - "loss": 0.7424, - "step": 26760 - }, - { - "epoch": 1.89, - "learning_rate": 3.103685423242597e-07, - "loss": 0.7387, - "step": 26770 - }, - { - "epoch": 1.89, - "learning_rate": 3.0636633875769094e-07, - "loss": 0.7374, - "step": 26780 + "epoch": 0.04, + "learning_rate": 9.416195856873823e-06, + "loss": 0.7169, + "step": 500 }, { - "epoch": 1.89, - "learning_rate": 3.023899079687942e-07, - "loss": 0.7432, - "step": 26790 + "epoch": 0.07, + "learning_rate": 1.8832391713747646e-05, + "loss": 0.7282, + "step": 1000 }, { - "epoch": 1.89, - "learning_rate": 2.9843925516147123e-07, - "loss": 0.7372, - "step": 26800 + "epoch": 0.11, + "learning_rate": 2.8248587570621472e-05, + "loss": 0.7519, + "step": 1500 }, { - "epoch": 1.89, - "learning_rate": 2.945143855058796e-07, - "loss": 0.74, - "step": 26810 + "epoch": 0.14, + "learning_rate": 3.766478342749529e-05, + "loss": 0.7757, + "step": 2000 }, { - "epoch": 1.89, - "learning_rate": 2.906153041384441e-07, - "loss": 0.7438, - "step": 26820 + "epoch": 0.18, + "learning_rate": 3.9997039890834935e-05, + "loss": 0.8085, + "step": 2500 }, { - "epoch": 1.9, - "learning_rate": 2.8674201616183617e-07, - "loss": 0.7433, - "step": 26830 + "epoch": 0.21, + "learning_rate": 3.998393455306432e-05, + "loss": 0.8176, + "step": 3000 }, { - "epoch": 1.9, - "learning_rate": 2.8289452664497453e-07, - "loss": 0.7367, - "step": 26840 + "epoch": 0.25, + "learning_rate": 3.9960368899072094e-05, + "loss": 0.8158, + "step": 3500 }, { - "epoch": 1.9, - "learning_rate": 2.790728406230092e-07, - "loss": 0.7389, - "step": 26850 + "epoch": 0.28, + "learning_rate": 3.992635526397635e-05, + "loss": 0.8239, + "step": 4000 }, { - "epoch": 1.9, - "learning_rate": 2.752769630973329e-07, - "loss": 0.7485, - "step": 26860 + "epoch": 0.32, + "learning_rate": 3.9881911451747484e-05, + "loss": 0.8059, + "step": 4500 }, { - "epoch": 1.9, - "learning_rate": 2.715068990355496e-07, - "loss": 0.7371, - "step": 26870 + "epoch": 0.35, + "learning_rate": 3.9827060725888914e-05, + "loss": 0.806, + "step": 5000 }, { - "epoch": 1.9, - "learning_rate": 2.6776265337149277e-07, - "loss": 0.7361, - "step": 26880 + "epoch": 0.39, + "learning_rate": 3.9761831797260154e-05, + "loss": 0.8018, + "step": 5500 }, { - "epoch": 1.9, - "learning_rate": 2.640442310052027e-07, - "loss": 0.74, - "step": 26890 + "epoch": 0.42, + "learning_rate": 3.96862588090485e-05, + "loss": 0.7967, + "step": 6000 }, { - "epoch": 1.9, - "learning_rate": 2.6035163680292464e-07, - "loss": 0.7406, - "step": 26900 + "epoch": 0.46, + "learning_rate": 3.960038131889723e-05, + "loss": 0.8037, + "step": 6500 }, { - "epoch": 1.9, - "learning_rate": 2.5668487559710184e-07, - "loss": 0.7445, - "step": 26910 + "epoch": 0.49, + "learning_rate": 3.9504244278199726e-05, + "loss": 0.7933, + "step": 7000 }, { - "epoch": 1.9, - "learning_rate": 2.530439521863781e-07, - "loss": 0.7422, - "step": 26920 + "epoch": 0.53, + "learning_rate": 3.9397898008570265e-05, + "loss": 0.7962, + "step": 7500 }, { - "epoch": 1.9, - "learning_rate": 2.494288713355708e-07, - "loss": 0.7398, - "step": 26930 + "epoch": 0.57, + "learning_rate": 3.9281398175503866e-05, + "loss": 0.7918, + "step": 8000 }, { - "epoch": 1.9, - "learning_rate": 2.458396377756955e-07, - "loss": 0.7338, - "step": 26940 + "epoch": 0.6, + "learning_rate": 3.915480575923895e-05, + "loss": 0.7868, + "step": 8500 }, { - "epoch": 1.9, - "learning_rate": 2.4227625620391936e-07, - "loss": 0.7503, - "step": 26950 + "epoch": 0.64, + "learning_rate": 3.901818702283807e-05, + "loss": 0.7887, + "step": 9000 }, { - "epoch": 1.9, - "learning_rate": 2.3873873128359424e-07, - "loss": 0.7438, - "step": 26960 + "epoch": 0.67, + "learning_rate": 3.8871613477503424e-05, + "loss": 0.7835, + "step": 9500 }, { - "epoch": 1.91, - "learning_rate": 2.3522706764422808e-07, - "loss": 0.7441, - "step": 26970 + "epoch": 0.71, + "learning_rate": 3.87151618451453e-05, + "loss": 0.7867, + "step": 10000 }, { - "epoch": 1.91, - "learning_rate": 2.3174126988148692e-07, - "loss": 0.7454, - "step": 26980 + "epoch": 0.74, + "learning_rate": 3.854891401822304e-05, + "loss": 0.7844, + "step": 10500 }, { - "epoch": 1.91, - "learning_rate": 2.2828134255718171e-07, - "loss": 0.744, - "step": 26990 + "epoch": 0.78, + "learning_rate": 3.837295701687955e-05, + "loss": 0.7791, + "step": 11000 }, { - "epoch": 1.91, - "learning_rate": 2.2484729019927265e-07, - "loss": 0.7411, - "step": 27000 + "epoch": 0.81, + "learning_rate": 3.818738294339182e-05, + "loss": 0.785, + "step": 11500 }, { - "epoch": 1.91, - "learning_rate": 2.2143911730185152e-07, - "loss": 0.7417, - "step": 27010 + "epoch": 0.85, + "learning_rate": 3.799228893396123e-05, + "loss": 0.7791, + "step": 12000 }, { - "epoch": 1.91, - "learning_rate": 2.1805682832515053e-07, - "loss": 0.7407, - "step": 27020 + "epoch": 0.88, + "learning_rate": 3.778777710786896e-05, + "loss": 0.7804, + "step": 12500 }, { - "epoch": 1.91, - "learning_rate": 2.147004276955178e-07, - "loss": 0.7449, - "step": 27030 + "epoch": 0.92, + "learning_rate": 3.757395451402304e-05, + "loss": 0.7794, + "step": 13000 }, { - "epoch": 1.91, - "learning_rate": 2.1136991980543086e-07, - "loss": 0.7399, - "step": 27040 + "epoch": 0.95, + "learning_rate": 3.735093307492506e-05, + "loss": 0.7736, + "step": 13500 }, { - "epoch": 1.91, - "learning_rate": 2.0806530901347655e-07, - "loss": 0.7404, - "step": 27050 + "epoch": 0.99, + "learning_rate": 3.7118829528085897e-05, + "loss": 0.7837, + "step": 14000 }, { - "epoch": 1.91, - "learning_rate": 2.047865996443532e-07, - "loss": 0.7438, - "step": 27060 + "epoch": 1.02, + "learning_rate": 3.687776536492105e-05, + "loss": 0.7527, + "step": 14500 }, { - "epoch": 1.91, - "learning_rate": 2.0153379598885748e-07, - "loss": 0.7461, - "step": 27070 + "epoch": 1.06, + "learning_rate": 3.662786676715763e-05, + "loss": 0.7403, + "step": 15000 }, { - "epoch": 1.91, - "learning_rate": 1.9830690230389082e-07, - "loss": 0.7495, - "step": 27080 + "epoch": 1.09, + "learning_rate": 3.636926454078625e-05, + "loss": 0.7547, + "step": 15500 }, { - "epoch": 1.91, - "learning_rate": 1.9510592281243968e-07, - "loss": 0.7421, - "step": 27090 + "epoch": 1.13, + "learning_rate": 3.610209404759251e-05, + "loss": 0.7476, + "step": 16000 }, { - "epoch": 1.91, - "learning_rate": 1.9193086170358643e-07, - "loss": 0.7392, - "step": 27100 + "epoch": 1.17, + "learning_rate": 3.5826495134303565e-05, + "loss": 0.7474, + "step": 16500 }, { - "epoch": 1.91, - "learning_rate": 1.8878172313248067e-07, - "loss": 0.7402, - "step": 27110 + "epoch": 1.2, + "learning_rate": 3.5542612059387445e-05, + "loss": 0.747, + "step": 17000 }, { - "epoch": 1.92, - "learning_rate": 1.8565851122035904e-07, - "loss": 0.7453, - "step": 27120 + "epoch": 1.24, + "learning_rate": 3.5250593417542837e-05, + "loss": 0.7441, + "step": 17500 }, { - "epoch": 1.92, - "learning_rate": 1.825612300545232e-07, - "loss": 0.735, - "step": 27130 + "epoch": 1.27, + "learning_rate": 3.495059206191926e-05, + "loss": 0.7463, + "step": 18000 }, { - "epoch": 1.92, - "learning_rate": 1.7948988368834408e-07, - "loss": 0.7369, - "step": 27140 + "epoch": 1.31, + "learning_rate": 3.464276502410819e-05, + "loss": 0.7452, + "step": 18500 }, { - "epoch": 1.92, - "learning_rate": 1.7644447614124427e-07, - "loss": 0.7383, - "step": 27150 + "epoch": 1.34, + "learning_rate": 3.432727343194701e-05, + "loss": 0.7439, + "step": 19000 }, { - "epoch": 1.92, - "learning_rate": 1.7342501139870904e-07, - "loss": 0.748, - "step": 27160 + "epoch": 1.38, + "learning_rate": 3.400428242517889e-05, + "loss": 0.7438, + "step": 19500 }, { - "epoch": 1.92, - "learning_rate": 1.704314934122686e-07, - "loss": 0.7387, - "step": 27170 + "epoch": 1.41, + "learning_rate": 3.367396106901259e-05, + "loss": 0.7427, + "step": 20000 }, { - "epoch": 1.92, - "learning_rate": 1.674639260994937e-07, - "loss": 0.7493, - "step": 27180 + "epoch": 1.45, + "learning_rate": 3.3336482265627675e-05, + "loss": 0.7471, + "step": 20500 }, { - "epoch": 1.92, - "learning_rate": 1.6452231334399993e-07, - "loss": 0.7348, - "step": 27190 + "epoch": 1.48, + "learning_rate": 3.299202266367119e-05, + "loss": 0.745, + "step": 21000 }, { - "epoch": 1.92, - "learning_rate": 1.616066589954346e-07, - "loss": 0.7424, - "step": 27200 + "epoch": 1.52, + "learning_rate": 3.2640762565793374e-05, + "loss": 0.7418, + "step": 21500 }, { - "epoch": 1.92, - "learning_rate": 1.587169668694699e-07, - "loss": 0.7419, - "step": 27210 + "epoch": 1.55, + "learning_rate": 3.2282885834270696e-05, + "loss": 0.7465, + "step": 22000 }, { - "epoch": 1.92, - "learning_rate": 1.5585324074780972e-07, + "epoch": 1.59, + "learning_rate": 3.191857979476569e-05, "loss": 0.7416, - "step": 27220 - }, - { - "epoch": 1.92, - "learning_rate": 1.5301548437816726e-07, - "loss": 0.7357, - "step": 27230 - }, - { - "epoch": 1.92, - "learning_rate": 1.502037014742763e-07, - "loss": 0.7384, - "step": 27240 - }, - { - "epoch": 1.92, - "learning_rate": 1.474178957158756e-07, - "loss": 0.7415, - "step": 27250 - }, - { - "epoch": 1.93, - "learning_rate": 1.4465807074870885e-07, - "loss": 0.7404, - "step": 27260 - }, - { - "epoch": 1.93, - "learning_rate": 1.419242301845225e-07, - "loss": 0.7369, - "step": 27270 - }, - { - "epoch": 1.93, - "learning_rate": 1.3921637760105022e-07, - "loss": 0.7335, - "step": 27280 - }, - { - "epoch": 1.93, - "learning_rate": 1.3653451654202622e-07, - "loss": 0.7362, - "step": 27290 - }, - { - "epoch": 1.93, - "learning_rate": 1.3387865051715854e-07, - "loss": 0.7372, - "step": 27300 - }, - { - "epoch": 1.93, - "learning_rate": 1.312487830021447e-07, - "loss": 0.7433, - "step": 27310 - }, - { - "epoch": 1.93, - "learning_rate": 1.286449174386517e-07, - "loss": 0.7417, - "step": 27320 - }, - { - "epoch": 1.93, - "learning_rate": 1.2606705723432476e-07, - "loss": 0.7447, - "step": 27330 - }, - { - "epoch": 1.93, - "learning_rate": 1.2351520576276755e-07, - "loss": 0.7474, - "step": 27340 - }, - { - "epoch": 1.93, - "learning_rate": 1.209893663635575e-07, - "loss": 0.7361, - "step": 27350 + "step": 22500 }, { - "epoch": 1.93, - "learning_rate": 1.1848954234222166e-07, - "loss": 0.7401, - "step": 27360 + "epoch": 1.62, + "learning_rate": 3.154803513827388e-05, + "loss": 0.7421, + "step": 23000 }, { - "epoch": 1.93, - "learning_rate": 1.1601573697024526e-07, - "loss": 0.742, - "step": 27370 + "epoch": 1.66, + "learning_rate": 3.117144582130925e-05, + "loss": 0.7396, + "step": 23500 }, { - "epoch": 1.93, - "learning_rate": 1.1356795348506088e-07, - "loss": 0.7335, - "step": 27380 + "epoch": 1.7, + "learning_rate": 3.078900896438028e-05, + "loss": 0.7371, + "step": 24000 }, { - "epoch": 1.93, - "learning_rate": 1.1114619509004831e-07, - "loss": 0.7407, - "step": 27390 + "epoch": 1.73, + "learning_rate": 3.040092474881003e-05, + "loss": 0.7393, + "step": 24500 }, { - "epoch": 1.94, - "learning_rate": 1.0875046495453012e-07, - "loss": 0.7432, - "step": 27400 + "epoch": 1.77, + "learning_rate": 3.0007396311953882e-05, + "loss": 0.7366, + "step": 25000 }, { - "epoch": 1.94, - "learning_rate": 1.0638076621376059e-07, - "loss": 0.7333, - "step": 27410 + "epoch": 1.8, + "learning_rate": 2.9608629640870138e-05, + "loss": 0.7455, + "step": 25500 }, { - "epoch": 1.94, - "learning_rate": 1.0403710196893235e-07, - "loss": 0.7341, - "step": 27420 + "epoch": 1.84, + "learning_rate": 2.9204833464498878e-05, + "loss": 0.7387, + "step": 26000 }, { - "epoch": 1.94, - "learning_rate": 1.0171947528716753e-07, - "loss": 0.7447, - "step": 27430 + "epoch": 1.87, + "learning_rate": 2.8796219144405713e-05, + "loss": 0.7363, + "step": 26500 }, { - "epoch": 1.94, - "learning_rate": 9.942788920150881e-08, - "loss": 0.7455, - "step": 27440 + "epoch": 1.91, + "learning_rate": 2.838300056414743e-05, + "loss": 0.7387, + "step": 27000 }, { "epoch": 1.94, - "learning_rate": 9.716234671092173e-08, - "loss": 0.7393, - "step": 27450 + "learning_rate": 2.7965394017317587e-05, + "loss": 0.7317, + "step": 27500 }, { - "epoch": 1.94, - "learning_rate": 9.492285078029017e-08, - "loss": 0.7416, - "step": 27460 + "epoch": 1.98, + "learning_rate": 2.754361809433063e-05, + "loss": 0.7331, + "step": 28000 }, { - "epoch": 1.94, - "learning_rate": 9.270940434041198e-08, - "loss": 0.7408, - "step": 27470 + "epoch": 2.01, + "learning_rate": 2.711789356800372e-05, + "loss": 0.7049, + "step": 28500 }, { - "epoch": 1.94, - "learning_rate": 9.052201028799224e-08, - "loss": 0.7361, - "step": 27480 + "epoch": 2.05, + "learning_rate": 2.6688443277996225e-05, + "loss": 0.6657, + "step": 29000 }, { - "epoch": 1.94, - "learning_rate": 8.836067148564109e-08, - "loss": 0.7428, - "step": 27490 + "epoch": 2.08, + "learning_rate": 2.6255492014167356e-05, + "loss": 0.6651, + "step": 29500 }, { - "epoch": 1.94, - "learning_rate": 8.622539076187153e-08, - "loss": 0.7449, - "step": 27500 + "epoch": 2.12, + "learning_rate": 2.581926639891304e-05, + "loss": 0.6717, + "step": 30000 }, { - "epoch": 1.94, - "learning_rate": 8.411617091109714e-08, - "loss": 0.7376, - "step": 27510 + "epoch": 2.15, + "learning_rate": 2.537999476854349e-05, + "loss": 0.6688, + "step": 30500 }, { - "epoch": 1.94, - "learning_rate": 8.203301469362102e-08, - "loss": 0.7401, - "step": 27520 + "epoch": 2.19, + "learning_rate": 2.4937907053763732e-05, + "loss": 0.6726, + "step": 31000 }, { - "epoch": 1.94, - "learning_rate": 7.997592483564465e-08, - "loss": 0.7405, - "step": 27530 + "epoch": 2.23, + "learning_rate": 2.4493234659319507e-05, + "loss": 0.6681, + "step": 31500 }, { - "epoch": 1.95, - "learning_rate": 7.794490402924793e-08, - "loss": 0.7414, - "step": 27540 + "epoch": 2.26, + "learning_rate": 2.404621034287166e-05, + "loss": 0.6656, + "step": 32000 }, { - "epoch": 1.95, - "learning_rate": 7.593995493240025e-08, - "loss": 0.7392, - "step": 27550 + "epoch": 2.3, + "learning_rate": 2.359706809316231e-05, + "loss": 0.6633, + "step": 32500 }, { - "epoch": 1.95, - "learning_rate": 7.396108016894943e-08, - "loss": 0.7384, - "step": 27560 + "epoch": 2.33, + "learning_rate": 2.314604300753667e-05, + "loss": 0.6647, + "step": 33000 }, { - "epoch": 1.95, - "learning_rate": 7.200828232862388e-08, - "loss": 0.749, - "step": 27570 + "epoch": 2.37, + "learning_rate": 2.2693371168884593e-05, + "loss": 0.6632, + "step": 33500 }, { - "epoch": 1.95, - "learning_rate": 7.008156396701716e-08, - "loss": 0.7342, - "step": 27580 + "epoch": 2.4, + "learning_rate": 2.2239289522066157e-05, + "loss": 0.6642, + "step": 34000 }, { - "epoch": 1.95, - "learning_rate": 6.818092760560113e-08, - "loss": 0.7367, - "step": 27590 + "epoch": 2.44, + "learning_rate": 2.178403574988621e-05, + "loss": 0.6626, + "step": 34500 }, { - "epoch": 1.95, - "learning_rate": 6.630637573171061e-08, - "loss": 0.7325, - "step": 27600 + "epoch": 2.47, + "learning_rate": 2.1327848148682503e-05, + "loss": 0.659, + "step": 35000 }, { - "epoch": 1.95, - "learning_rate": 6.445791079854547e-08, - "loss": 0.746, - "step": 27610 + "epoch": 2.51, + "learning_rate": 2.0870965503592795e-05, + "loss": 0.6626, + "step": 35500 }, { - "epoch": 1.95, - "learning_rate": 6.263553522516396e-08, - "loss": 0.7448, - "step": 27620 + "epoch": 2.54, + "learning_rate": 2.0413626963566004e-05, + "loss": 0.6605, + "step": 36000 }, { - "epoch": 1.95, - "learning_rate": 6.083925139648727e-08, - "loss": 0.7466, - "step": 27630 + "epoch": 2.58, + "learning_rate": 1.9956071916183e-05, + "loss": 0.6583, + "step": 36500 }, { - "epoch": 1.95, - "learning_rate": 5.906906166328164e-08, - "loss": 0.7364, - "step": 27640 + "epoch": 2.61, + "learning_rate": 1.9498539862352476e-05, + "loss": 0.6554, + "step": 37000 }, { - "epoch": 1.95, - "learning_rate": 5.732496834217172e-08, - "loss": 0.75, - "step": 27650 + "epoch": 2.65, + "learning_rate": 1.904127029094744e-05, + "loss": 0.6588, + "step": 37500 }, { - "epoch": 1.95, - "learning_rate": 5.560697371562507e-08, - "loss": 0.741, - "step": 27660 + "epoch": 2.68, + "learning_rate": 1.8584502553448085e-05, + "loss": 0.6561, + "step": 38000 }, { - "epoch": 1.95, - "learning_rate": 5.391508003195878e-08, - "loss": 0.7501, - "step": 27670 + "epoch": 2.72, + "learning_rate": 1.812847573865655e-05, + "loss": 0.6529, + "step": 38500 }, { - "epoch": 1.96, - "learning_rate": 5.224928950533059e-08, - "loss": 0.7428, - "step": 27680 + "epoch": 2.75, + "learning_rate": 1.7673428547549134e-05, + "loss": 0.6499, + "step": 39000 }, { - "epoch": 1.96, - "learning_rate": 5.0609604315736693e-08, - "loss": 0.7423, - "step": 27690 + "epoch": 2.79, + "learning_rate": 1.721959916833157e-05, + "loss": 0.648, + "step": 39500 }, { - "epoch": 1.96, - "learning_rate": 4.8996026609007265e-08, - "loss": 0.7412, - "step": 27700 + "epoch": 2.83, + "learning_rate": 1.6767225151762676e-05, + "loss": 0.6479, + "step": 40000 }, { - "epoch": 1.96, - "learning_rate": 4.740855849681314e-08, - "loss": 0.7376, - "step": 27710 + "epoch": 2.86, + "learning_rate": 1.631654328681168e-05, + "loss": 0.6482, + "step": 40500 }, { - "epoch": 1.96, - "learning_rate": 4.5847202056645836e-08, - "loss": 0.7412, - "step": 27720 + "epoch": 2.9, + "learning_rate": 1.586778947671426e-05, + "loss": 0.6446, + "step": 41000 }, { - "epoch": 1.96, - "learning_rate": 4.4311959331833075e-08, - "loss": 0.7412, - "step": 27730 + "epoch": 2.93, + "learning_rate": 1.5421198615492244e-05, + "loss": 0.6432, + "step": 41500 }, { - "epoch": 1.96, - "learning_rate": 4.280283233152327e-08, - "loss": 0.7395, - "step": 27740 + "epoch": 2.97, + "learning_rate": 1.4977004465001586e-05, + "loss": 0.6425, + "step": 42000 }, { - "epoch": 1.96, - "learning_rate": 4.131982303069437e-08, - "loss": 0.7357, - "step": 27750 + "epoch": 3.0, + "learning_rate": 1.4535439532572877e-05, + "loss": 0.6361, + "step": 42500 }, { - "epoch": 1.96, - "learning_rate": 3.98629333701317e-08, - "loss": 0.7429, - "step": 27760 + "epoch": 3.04, + "learning_rate": 1.4096734949308623e-05, + "loss": 0.5411, + "step": 43000 }, { - "epoch": 1.96, - "learning_rate": 3.8432165256454543e-08, - "loss": 0.7428, - "step": 27770 + "epoch": 3.07, + "learning_rate": 1.3661120349100823e-05, + "loss": 0.5375, + "step": 43500 }, { - "epoch": 1.96, - "learning_rate": 3.70275205620807e-08, - "loss": 0.7393, - "step": 27780 + "epoch": 3.11, + "learning_rate": 1.3228823748432258e-05, + "loss": 0.5394, + "step": 44000 }, { - "epoch": 1.96, - "learning_rate": 3.564900112525527e-08, - "loss": 0.7445, - "step": 27790 + "epoch": 3.14, + "learning_rate": 1.2800071427024391e-05, + "loss": 0.5369, + "step": 44500 }, { - "epoch": 1.96, - "learning_rate": 3.429660875002405e-08, - "loss": 0.7409, - "step": 27800 + "epoch": 3.18, + "learning_rate": 1.2375087809394368e-05, + "loss": 0.5378, + "step": 45000 }, { - "epoch": 1.96, - "learning_rate": 3.297034520624243e-08, - "loss": 0.7491, - "step": 27810 + "epoch": 3.21, + "learning_rate": 1.1954095347383076e-05, + "loss": 0.5365, + "step": 45500 }, { - "epoch": 1.97, - "learning_rate": 3.1670212229575336e-08, - "loss": 0.7507, - "step": 27820 + "epoch": 3.25, + "learning_rate": 1.153731440371577e-05, + "loss": 0.5358, + "step": 46000 }, { - "epoch": 1.97, - "learning_rate": 3.03962115214862e-08, - "loss": 0.7378, - "step": 27830 + "epoch": 3.28, + "learning_rate": 1.1124963136656253e-05, + "loss": 0.5375, + "step": 46500 }, { - "epoch": 1.97, - "learning_rate": 2.914834474924133e-08, - "loss": 0.7389, - "step": 27840 + "epoch": 3.32, + "learning_rate": 1.0717257385814897e-05, + "loss": 0.537, + "step": 47000 }, { - "epoch": 1.97, - "learning_rate": 2.7926613545907753e-08, - "loss": 0.7383, - "step": 27850 + "epoch": 3.36, + "learning_rate": 1.0314410559170397e-05, + "loss": 0.5348, + "step": 47500 }, { - "epoch": 1.97, - "learning_rate": 2.6731019510348732e-08, - "loss": 0.7411, - "step": 27860 + "epoch": 3.39, + "learning_rate": 9.916633521364266e-06, + "loss": 0.5342, + "step": 48000 }, { - "epoch": 1.97, - "learning_rate": 2.5561564207217115e-08, - "loss": 0.7375, - "step": 27870 + "epoch": 3.43, + "learning_rate": 9.524134483326633e-06, + "loss": 0.5321, + "step": 48500 }, { - "epoch": 1.97, - "learning_rate": 2.4418249166966447e-08, - "loss": 0.7404, - "step": 27880 + "epoch": 3.46, + "learning_rate": 9.137118893291118e-06, + "loss": 0.5311, + "step": 49000 }, { - "epoch": 1.97, - "learning_rate": 2.3301075885835413e-08, - "loss": 0.7414, - "step": 27890 + "epoch": 3.5, + "learning_rate": 8.755789329255755e-06, + "loss": 0.5303, + "step": 49500 }, { - "epoch": 1.97, - "learning_rate": 2.221004582585007e-08, - "loss": 0.7382, - "step": 27900 + "epoch": 3.53, + "learning_rate": 8.3803453929463e-06, + "loss": 0.5296, + "step": 50000 }, { - "epoch": 1.97, - "learning_rate": 2.11451604148305e-08, - "loss": 0.741, - "step": 27910 + "epoch": 3.57, + "learning_rate": 8.01098360533749e-06, + "loss": 0.5288, + "step": 50500 }, { - "epoch": 1.97, - "learning_rate": 2.010642104637528e-08, - "loss": 0.7394, - "step": 27920 + "epoch": 3.6, + "learning_rate": 7.647897303786813e-06, + "loss": 0.5266, + "step": 51000 }, { - "epoch": 1.97, - "learning_rate": 1.90938290798659e-08, - "loss": 0.733, - "step": 27930 + "epoch": 3.64, + "learning_rate": 7.291276540834699e-06, + "loss": 0.5263, + "step": 51500 }, { - "epoch": 1.97, - "learning_rate": 1.8107385840469006e-08, - "loss": 0.7422, - "step": 27940 + "epoch": 3.67, + "learning_rate": 6.941307984724182e-06, + "loss": 0.5259, + "step": 52000 }, { - "epoch": 1.97, - "learning_rate": 1.7147092619127504e-08, - "loss": 0.733, - "step": 27950 + "epoch": 3.71, + "learning_rate": 6.598174821691929e-06, + "loss": 0.5231, + "step": 52500 }, { - "epoch": 1.98, - "learning_rate": 1.621295067256723e-08, - "loss": 0.7319, - "step": 27960 + "epoch": 3.74, + "learning_rate": 6.262056660081919e-06, + "loss": 0.5255, + "step": 53000 }, { - "epoch": 1.98, - "learning_rate": 1.53049612232814e-08, - "loss": 0.7442, - "step": 27970 + "epoch": 3.78, + "learning_rate": 5.933129436331942e-06, + "loss": 0.5237, + "step": 53500 }, { - "epoch": 1.98, - "learning_rate": 1.4423125459543941e-08, - "loss": 0.7435, - "step": 27980 + "epoch": 3.81, + "learning_rate": 5.611565322882084e-06, + "loss": 0.5213, + "step": 54000 }, { - "epoch": 1.98, - "learning_rate": 1.3567444535402818e-08, - "loss": 0.7445, - "step": 27990 + "epoch": 3.85, + "learning_rate": 5.297532638053395e-06, + "loss": 0.5203, + "step": 54500 }, { - "epoch": 1.98, - "learning_rate": 1.2737919570675606e-08, - "loss": 0.7415, - "step": 28000 + "epoch": 3.89, + "learning_rate": 4.991195757944023e-06, + "loss": 0.5191, + "step": 55000 }, { - "epoch": 1.98, - "learning_rate": 1.1934551650947257e-08, - "loss": 0.74, - "step": 28010 + "epoch": 3.92, + "learning_rate": 4.6927150303887505e-06, + "loss": 0.5177, + "step": 55500 }, { - "epoch": 1.98, - "learning_rate": 1.1157341827574552e-08, - "loss": 0.7343, - "step": 28020 + "epoch": 3.96, + "learning_rate": 4.402246691027168e-06, + "loss": 0.5161, + "step": 56000 }, { - "epoch": 1.98, - "learning_rate": 1.0406291117683875e-08, - "loss": 0.7388, - "step": 28030 + "epoch": 3.99, + "learning_rate": 4.119942781524248e-06, + "loss": 0.516, + "step": 56500 }, { - "epoch": 1.98, - "learning_rate": 9.68140050416011e-09, - "loss": 0.7415, - "step": 28040 + "epoch": 4.03, + "learning_rate": 3.845951069986216e-06, + "loss": 0.4345, + "step": 57000 }, { - "epoch": 1.98, - "learning_rate": 8.982670935659966e-09, - "loss": 0.7397, - "step": 28050 + "epoch": 4.06, + "learning_rate": 3.5804149736133887e-06, + "loss": 0.4057, + "step": 57500 }, { - "epoch": 1.98, - "learning_rate": 8.310103326603092e-09, - "loss": 0.7381, - "step": 28060 + "epoch": 4.1, + "learning_rate": 3.3234734836303883e-06, + "loss": 0.4048, + "step": 58000 }, { - "epoch": 1.98, - "learning_rate": 7.663698557163201e-09, - "loss": 0.7388, - "step": 28070 + "epoch": 4.13, + "learning_rate": 3.075261092533097e-06, + "loss": 0.4048, + "step": 58500 }, { - "epoch": 1.98, - "learning_rate": 7.043457473285831e-09, - "loss": 0.7467, - "step": 28080 + "epoch": 4.17, + "learning_rate": 2.8359077236904165e-06, + "loss": 0.4041, + "step": 59000 }, { - "epoch": 1.98, - "learning_rate": 6.449380886668355e-09, - "loss": 0.7417, - "step": 28090 + "epoch": 4.2, + "learning_rate": 2.6055386633376613e-06, + "loss": 0.4026, + "step": 59500 }, { - "epoch": 1.98, - "learning_rate": 5.881469574775533e-09, - "loss": 0.7335, - "step": 28100 + "epoch": 4.24, + "learning_rate": 2.3842744949971765e-06, + "loss": 0.4039, + "step": 60000 }, { - "epoch": 1.99, - "learning_rate": 5.339724280817304e-09, - "loss": 0.7385, - "step": 28110 + "epoch": 4.27, + "learning_rate": 2.172231036360588e-06, + "loss": 0.403, + "step": 60500 }, { - "epoch": 1.99, - "learning_rate": 4.82414571377543e-09, - "loss": 0.7382, - "step": 28120 + "epoch": 4.31, + "learning_rate": 1.9695192786655902e-06, + "loss": 0.4038, + "step": 61000 }, { - "epoch": 1.99, - "learning_rate": 4.334734548374631e-09, - "loss": 0.739, - "step": 28130 + "epoch": 4.34, + "learning_rate": 1.776245328599111e-06, + "loss": 0.4023, + "step": 61500 }, { - "epoch": 1.99, - "learning_rate": 3.871491425102569e-09, - "loss": 0.7399, - "step": 28140 + "epoch": 4.38, + "learning_rate": 1.5925103527572395e-06, + "loss": 0.4018, + "step": 62000 }, { - "epoch": 1.99, - "learning_rate": 3.434416950198749e-09, - "loss": 0.7416, - "step": 28150 + "epoch": 4.41, + "learning_rate": 1.4184105246909429e-06, + "loss": 0.4017, + "step": 62500 }, { - "epoch": 1.99, - "learning_rate": 3.023511695658954e-09, - "loss": 0.7326, - "step": 28160 + "epoch": 4.45, + "learning_rate": 1.2540369745653446e-06, + "loss": 0.4022, + "step": 63000 }, { - "epoch": 1.99, - "learning_rate": 2.6387761992241467e-09, - "loss": 0.7333, - "step": 28170 + "epoch": 4.49, + "learning_rate": 1.099475741458904e-06, + "loss": 0.4016, + "step": 63500 }, { - "epoch": 1.99, - "learning_rate": 2.280210964393792e-09, - "loss": 0.738, - "step": 28180 + "epoch": 4.52, + "learning_rate": 9.548077283274115e-07, + "loss": 0.4021, + "step": 64000 }, { - "epoch": 1.99, - "learning_rate": 1.947816460419194e-09, - "loss": 0.7372, - "step": 28190 + "epoch": 4.56, + "learning_rate": 8.201086596564867e-07, + "loss": 0.4003, + "step": 64500 }, { - "epoch": 1.99, - "learning_rate": 1.6415931222990567e-09, - "loss": 0.7342, - "step": 28200 + "epoch": 4.59, + "learning_rate": 6.954490418246052e-07, + "loss": 0.4001, + "step": 65000 }, { - "epoch": 1.99, - "learning_rate": 1.3615413507839237e-09, - "loss": 0.7401, - "step": 28210 + "epoch": 4.63, + "learning_rate": 5.808941261975087e-07, + "loss": 0.4004, + "step": 65500 }, { - "epoch": 1.99, - "learning_rate": 1.1076615123717381e-09, - "loss": 0.7416, - "step": 28220 + "epoch": 4.66, + "learning_rate": 4.765038749732864e-07, + "loss": 0.4003, + "step": 66000 }, { - "epoch": 1.99, - "learning_rate": 8.79953939314504e-10, - "loss": 0.7394, - "step": 28230 + "epoch": 4.7, + "learning_rate": 3.823329297959766e-07, + "loss": 0.4002, + "step": 66500 }, { - "epoch": 1.99, - "learning_rate": 6.78418929607183e-10, - "loss": 0.7403, - "step": 28240 + "epoch": 4.73, + "learning_rate": 2.98430583154139e-07, + "loss": 0.4002, + "step": 67000 }, { - "epoch": 2.0, - "learning_rate": 5.030567469965775e-10, - "loss": 0.7358, - "step": 28250 + "epoch": 4.77, + "learning_rate": 2.2484075257939165e-07, + "loss": 0.4006, + "step": 67500 }, { - "epoch": 2.0, - "learning_rate": 3.538676209746683e-10, - "loss": 0.7399, - "step": 28260 + "epoch": 4.8, + "learning_rate": 1.6160195765838605e-07, + "loss": 0.3993, + "step": 68000 }, { - "epoch": 2.0, - "learning_rate": 2.308517467874971e-10, - "loss": 0.7328, - "step": 28270 + "epoch": 4.84, + "learning_rate": 1.0874729987023547e-07, + "loss": 0.3989, + "step": 68500 }, { - "epoch": 2.0, - "learning_rate": 1.3400928542184333e-10, - "loss": 0.7416, - "step": 28280 + "epoch": 4.87, + "learning_rate": 6.630444526002367e-08, + "loss": 0.3991, + "step": 69000 }, { - "epoch": 2.0, - "learning_rate": 6.334036361410612e-11, - "loss": 0.7446, - "step": 28290 + "epoch": 4.91, + "learning_rate": 3.4295609957382126e-08, + "loss": 0.3996, + "step": 69500 }, { - "epoch": 2.0, - "learning_rate": 1.884507384808387e-11, - "loss": 0.7405, - "step": 28300 + "epoch": 4.94, + "learning_rate": 1.2737548547760991e-08, + "loss": 0.3992, + "step": 70000 }, { - "epoch": 2.0, - "learning_rate": 5.234743527537944e-13, - "loss": 0.7479, - "step": 28310 + "epoch": 4.98, + "learning_rate": 1.6415453024820617e-09, + "loss": 0.3995, + "step": 70500 }, { - "epoch": 2.0, - "step": 28312, - "total_flos": 4.0304188158043765e+23, - "train_loss": 0.8292061477166914, - "train_runtime": 175162.7697, - "train_samples_per_second": 41.38, - "train_steps_per_second": 0.162 + "epoch": 5.0, + "step": 70780, + "total_flos": 1.007604716180962e+24, + "train_loss": 0.6235090117226821, + "train_runtime": 442317.6902, + "train_samples_per_second": 40.968, + "train_steps_per_second": 0.16 } ], - "logging_steps": 10, - "max_steps": 28312, + "logging_steps": 500, + "max_steps": 70780, "num_input_tokens_seen": 0, - "num_train_epochs": 2, + "num_train_epochs": 5, "save_steps": 3000, - "total_flos": 4.0304188158043765e+23, + "total_flos": 1.007604716180962e+24, "train_batch_size": 2, "trial_name": null, "trial_params": null