|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999365683476055, |
|
"eval_steps": 500, |
|
"global_step": 3941, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002537266095781795, |
|
"grad_norm": 93.43679809570312, |
|
"learning_rate": 2.278481012658228e-07, |
|
"loss": 2.9759, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00507453219156359, |
|
"grad_norm": 27.491559982299805, |
|
"learning_rate": 4.810126582278482e-07, |
|
"loss": 2.6286, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007611798287345386, |
|
"grad_norm": 7.7028632164001465, |
|
"learning_rate": 7.341772151898735e-07, |
|
"loss": 2.043, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01014906438312718, |
|
"grad_norm": 4.423813343048096, |
|
"learning_rate": 9.873417721518988e-07, |
|
"loss": 1.8015, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012686330478908976, |
|
"grad_norm": 3.1954057216644287, |
|
"learning_rate": 1.240506329113924e-06, |
|
"loss": 1.7109, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015223596574690771, |
|
"grad_norm": 2.8202602863311768, |
|
"learning_rate": 1.4936708860759495e-06, |
|
"loss": 1.6439, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.017760862670472565, |
|
"grad_norm": 2.797365427017212, |
|
"learning_rate": 1.7468354430379747e-06, |
|
"loss": 1.6011, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02029812876625436, |
|
"grad_norm": 2.647296667098999, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.5568, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022835394862036156, |
|
"grad_norm": 2.60683274269104, |
|
"learning_rate": 2.2531645569620258e-06, |
|
"loss": 1.5331, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02537266095781795, |
|
"grad_norm": 2.625460386276245, |
|
"learning_rate": 2.5063291139240508e-06, |
|
"loss": 1.5384, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027909927053599747, |
|
"grad_norm": 2.527884006500244, |
|
"learning_rate": 2.7594936708860766e-06, |
|
"loss": 1.4949, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.030447193149381543, |
|
"grad_norm": 2.7804925441741943, |
|
"learning_rate": 3.0126582278481016e-06, |
|
"loss": 1.4907, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.032984459245163335, |
|
"grad_norm": 2.730539321899414, |
|
"learning_rate": 3.265822784810127e-06, |
|
"loss": 1.4794, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03552172534094513, |
|
"grad_norm": 2.6378061771392822, |
|
"learning_rate": 3.518987341772152e-06, |
|
"loss": 1.4549, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.038058991436726926, |
|
"grad_norm": 2.7467641830444336, |
|
"learning_rate": 3.7721518987341775e-06, |
|
"loss": 1.4678, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04059625753250872, |
|
"grad_norm": 2.5447590351104736, |
|
"learning_rate": 4.025316455696203e-06, |
|
"loss": 1.4312, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04313352362829052, |
|
"grad_norm": 2.4532039165496826, |
|
"learning_rate": 4.278481012658228e-06, |
|
"loss": 1.456, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04567078972407231, |
|
"grad_norm": 2.5699751377105713, |
|
"learning_rate": 4.531645569620253e-06, |
|
"loss": 1.4444, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04820805581985411, |
|
"grad_norm": 2.583108425140381, |
|
"learning_rate": 4.784810126582279e-06, |
|
"loss": 1.4284, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0507453219156359, |
|
"grad_norm": 2.6186587810516357, |
|
"learning_rate": 5.037974683544305e-06, |
|
"loss": 1.4493, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0532825880114177, |
|
"grad_norm": 2.6218819618225098, |
|
"learning_rate": 5.29113924050633e-06, |
|
"loss": 1.4177, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.055819854107199494, |
|
"grad_norm": 2.581024646759033, |
|
"learning_rate": 5.544303797468355e-06, |
|
"loss": 1.4254, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05835712020298129, |
|
"grad_norm": 2.5171055793762207, |
|
"learning_rate": 5.79746835443038e-06, |
|
"loss": 1.4257, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.060894386298763085, |
|
"grad_norm": 2.5574731826782227, |
|
"learning_rate": 6.050632911392406e-06, |
|
"loss": 1.4126, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06343165239454487, |
|
"grad_norm": 2.3908963203430176, |
|
"learning_rate": 6.303797468354431e-06, |
|
"loss": 1.4096, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06596891849032667, |
|
"grad_norm": 2.561500310897827, |
|
"learning_rate": 6.5569620253164564e-06, |
|
"loss": 1.4148, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06850618458610847, |
|
"grad_norm": 2.5578243732452393, |
|
"learning_rate": 6.810126582278481e-06, |
|
"loss": 1.4126, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07104345068189026, |
|
"grad_norm": 2.5203826427459717, |
|
"learning_rate": 7.0632911392405065e-06, |
|
"loss": 1.4237, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07358071677767206, |
|
"grad_norm": 2.444420337677002, |
|
"learning_rate": 7.316455696202533e-06, |
|
"loss": 1.4053, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07611798287345385, |
|
"grad_norm": 2.3801279067993164, |
|
"learning_rate": 7.569620253164558e-06, |
|
"loss": 1.4021, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07865524896923565, |
|
"grad_norm": 2.3352255821228027, |
|
"learning_rate": 7.822784810126582e-06, |
|
"loss": 1.4092, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08119251506501744, |
|
"grad_norm": 2.3657045364379883, |
|
"learning_rate": 8.075949367088608e-06, |
|
"loss": 1.3745, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08372978116079924, |
|
"grad_norm": 2.3734323978424072, |
|
"learning_rate": 8.329113924050633e-06, |
|
"loss": 1.3878, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08626704725658103, |
|
"grad_norm": 2.341235637664795, |
|
"learning_rate": 8.582278481012659e-06, |
|
"loss": 1.397, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08880431335236283, |
|
"grad_norm": 2.304081916809082, |
|
"learning_rate": 8.835443037974685e-06, |
|
"loss": 1.4008, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09134157944814462, |
|
"grad_norm": 2.5316545963287354, |
|
"learning_rate": 9.08860759493671e-06, |
|
"loss": 1.3804, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09387884554392642, |
|
"grad_norm": 2.275545597076416, |
|
"learning_rate": 9.341772151898735e-06, |
|
"loss": 1.386, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09641611163970822, |
|
"grad_norm": 2.518347978591919, |
|
"learning_rate": 9.59493670886076e-06, |
|
"loss": 1.3956, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09895337773549001, |
|
"grad_norm": 2.351276397705078, |
|
"learning_rate": 9.848101265822785e-06, |
|
"loss": 1.3803, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1014906438312718, |
|
"grad_norm": 2.251296043395996, |
|
"learning_rate": 9.999968603457859e-06, |
|
"loss": 1.3898, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1040279099270536, |
|
"grad_norm": 2.2908174991607666, |
|
"learning_rate": 9.999615396887012e-06, |
|
"loss": 1.3835, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1065651760228354, |
|
"grad_norm": 2.127427577972412, |
|
"learning_rate": 9.998869765883566e-06, |
|
"loss": 1.349, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1091024421186172, |
|
"grad_norm": 2.285038471221924, |
|
"learning_rate": 9.997731768972785e-06, |
|
"loss": 1.3973, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11163970821439899, |
|
"grad_norm": 2.093776226043701, |
|
"learning_rate": 9.996201495477102e-06, |
|
"loss": 1.3757, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11417697431018078, |
|
"grad_norm": 2.1693060398101807, |
|
"learning_rate": 9.994279065509094e-06, |
|
"loss": 1.3786, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11671424040596258, |
|
"grad_norm": 2.2407472133636475, |
|
"learning_rate": 9.991964629962067e-06, |
|
"loss": 1.3793, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11925150650174438, |
|
"grad_norm": 2.1516313552856445, |
|
"learning_rate": 9.989258370498208e-06, |
|
"loss": 1.3562, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12178877259752617, |
|
"grad_norm": 2.272516965866089, |
|
"learning_rate": 9.986160499534318e-06, |
|
"loss": 1.3968, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12432603869330797, |
|
"grad_norm": 2.11755108833313, |
|
"learning_rate": 9.982671260225156e-06, |
|
"loss": 1.3714, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12686330478908975, |
|
"grad_norm": 2.0977492332458496, |
|
"learning_rate": 9.97879092644434e-06, |
|
"loss": 1.3549, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12940057088487156, |
|
"grad_norm": 2.2265727519989014, |
|
"learning_rate": 9.974519802762853e-06, |
|
"loss": 1.3699, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13193783698065334, |
|
"grad_norm": 2.15244197845459, |
|
"learning_rate": 9.969858224425138e-06, |
|
"loss": 1.3608, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13447510307643515, |
|
"grad_norm": 2.1284122467041016, |
|
"learning_rate": 9.96480655732279e-06, |
|
"loss": 1.3766, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13701236917221693, |
|
"grad_norm": 2.03669810295105, |
|
"learning_rate": 9.959365197965824e-06, |
|
"loss": 1.3596, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13954963526799874, |
|
"grad_norm": 1.9937801361083984, |
|
"learning_rate": 9.953534573451568e-06, |
|
"loss": 1.3501, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14208690136378052, |
|
"grad_norm": 2.097184419631958, |
|
"learning_rate": 9.947315141431126e-06, |
|
"loss": 1.3443, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14462416745956233, |
|
"grad_norm": 2.1515302658081055, |
|
"learning_rate": 9.940707390073465e-06, |
|
"loss": 1.3548, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1471614335553441, |
|
"grad_norm": 2.027521848678589, |
|
"learning_rate": 9.933711838027096e-06, |
|
"loss": 1.3415, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14969869965112592, |
|
"grad_norm": 2.1078367233276367, |
|
"learning_rate": 9.926329034379361e-06, |
|
"loss": 1.3578, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1522359657469077, |
|
"grad_norm": 2.1043877601623535, |
|
"learning_rate": 9.918559558613344e-06, |
|
"loss": 1.3673, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1547732318426895, |
|
"grad_norm": 2.100511074066162, |
|
"learning_rate": 9.910404020562377e-06, |
|
"loss": 1.3556, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1573104979384713, |
|
"grad_norm": 1.9388527870178223, |
|
"learning_rate": 9.901863060362176e-06, |
|
"loss": 1.3608, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1598477640342531, |
|
"grad_norm": 2.0223309993743896, |
|
"learning_rate": 9.8929373484006e-06, |
|
"loss": 1.3301, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16238503013003489, |
|
"grad_norm": 2.006113290786743, |
|
"learning_rate": 9.883627585265032e-06, |
|
"loss": 1.3345, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1649222962258167, |
|
"grad_norm": 2.0276896953582764, |
|
"learning_rate": 9.873934501687381e-06, |
|
"loss": 1.3437, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16745956232159848, |
|
"grad_norm": 1.9418350458145142, |
|
"learning_rate": 9.863858858486736e-06, |
|
"loss": 1.3307, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16999682841738029, |
|
"grad_norm": 1.919950008392334, |
|
"learning_rate": 9.853401446509641e-06, |
|
"loss": 1.3478, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17253409451316207, |
|
"grad_norm": 1.9946993589401245, |
|
"learning_rate": 9.842563086568024e-06, |
|
"loss": 1.3491, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17507136060894388, |
|
"grad_norm": 1.9728672504425049, |
|
"learning_rate": 9.831344629374778e-06, |
|
"loss": 1.3603, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17760862670472566, |
|
"grad_norm": 1.9747499227523804, |
|
"learning_rate": 9.81974695547697e-06, |
|
"loss": 1.3278, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18014589280050744, |
|
"grad_norm": 1.931283712387085, |
|
"learning_rate": 9.807770975186743e-06, |
|
"loss": 1.3389, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.18268315889628925, |
|
"grad_norm": 1.8983511924743652, |
|
"learning_rate": 9.795417628509857e-06, |
|
"loss": 1.3369, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18522042499207103, |
|
"grad_norm": 2.0960946083068848, |
|
"learning_rate": 9.78268788507191e-06, |
|
"loss": 1.3519, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18775769108785284, |
|
"grad_norm": 1.9553834199905396, |
|
"learning_rate": 9.769582744042224e-06, |
|
"loss": 1.3383, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19029495718363462, |
|
"grad_norm": 1.8932089805603027, |
|
"learning_rate": 9.756103234055432e-06, |
|
"loss": 1.34, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19283222327941643, |
|
"grad_norm": 1.9457850456237793, |
|
"learning_rate": 9.742250413130728e-06, |
|
"loss": 1.323, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1953694893751982, |
|
"grad_norm": 1.8577946424484253, |
|
"learning_rate": 9.728025368588829e-06, |
|
"loss": 1.3251, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19790675547098002, |
|
"grad_norm": 2.0047388076782227, |
|
"learning_rate": 9.713429216966624e-06, |
|
"loss": 1.3202, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2004440215667618, |
|
"grad_norm": 1.9848380088806152, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 1.3433, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2029812876625436, |
|
"grad_norm": 1.8254072666168213, |
|
"learning_rate": 9.68312820418163e-06, |
|
"loss": 1.3186, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2055185537583254, |
|
"grad_norm": 1.911875605583191, |
|
"learning_rate": 9.667425721373333e-06, |
|
"loss": 1.3379, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2080558198541072, |
|
"grad_norm": 1.9765597581863403, |
|
"learning_rate": 9.651356888007041e-06, |
|
"loss": 1.3319, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.210593085949889, |
|
"grad_norm": 2.052441358566284, |
|
"learning_rate": 9.634922965340334e-06, |
|
"loss": 1.3152, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2131303520456708, |
|
"grad_norm": 1.8989280462265015, |
|
"learning_rate": 9.618125243286989e-06, |
|
"loss": 1.3092, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21566761814145258, |
|
"grad_norm": 1.8644095659255981, |
|
"learning_rate": 9.60096504031573e-06, |
|
"loss": 1.3248, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2182048842372344, |
|
"grad_norm": 1.9270919561386108, |
|
"learning_rate": 9.58344370334675e-06, |
|
"loss": 1.3525, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22074215033301617, |
|
"grad_norm": 1.8386811017990112, |
|
"learning_rate": 9.565562607645974e-06, |
|
"loss": 1.3214, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.22327941642879798, |
|
"grad_norm": 1.8396153450012207, |
|
"learning_rate": 9.547323156717133e-06, |
|
"loss": 1.3247, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.22581668252457976, |
|
"grad_norm": 1.906518578529358, |
|
"learning_rate": 9.52872678219158e-06, |
|
"loss": 1.3132, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22835394862036157, |
|
"grad_norm": 1.9358681440353394, |
|
"learning_rate": 9.50977494371594e-06, |
|
"loss": 1.3199, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23089121471614335, |
|
"grad_norm": 1.8726778030395508, |
|
"learning_rate": 9.490469128837525e-06, |
|
"loss": 1.3058, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.23342848081192516, |
|
"grad_norm": 1.8988134860992432, |
|
"learning_rate": 9.470810852887586e-06, |
|
"loss": 1.3035, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23596574690770694, |
|
"grad_norm": 1.8984090089797974, |
|
"learning_rate": 9.450801658862371e-06, |
|
"loss": 1.321, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23850301300348875, |
|
"grad_norm": 1.8265104293823242, |
|
"learning_rate": 9.430443117302006e-06, |
|
"loss": 1.3089, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24104027909927053, |
|
"grad_norm": 2.0038204193115234, |
|
"learning_rate": 9.409736826167233e-06, |
|
"loss": 1.3185, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24357754519505234, |
|
"grad_norm": 1.9437892436981201, |
|
"learning_rate": 9.388684410713977e-06, |
|
"loss": 1.3148, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24611481129083412, |
|
"grad_norm": 1.902868628501892, |
|
"learning_rate": 9.367287523365782e-06, |
|
"loss": 1.3092, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24865207738661593, |
|
"grad_norm": 1.8983051776885986, |
|
"learning_rate": 9.345547843584108e-06, |
|
"loss": 1.3091, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.25118934348239774, |
|
"grad_norm": 1.9476397037506104, |
|
"learning_rate": 9.323467077736513e-06, |
|
"loss": 1.3149, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2537266095781795, |
|
"grad_norm": 1.9828585386276245, |
|
"learning_rate": 9.301046958962707e-06, |
|
"loss": 1.3149, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2562638756739613, |
|
"grad_norm": 1.8465077877044678, |
|
"learning_rate": 9.278289247038537e-06, |
|
"loss": 1.3113, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2588011417697431, |
|
"grad_norm": 1.7844876050949097, |
|
"learning_rate": 9.255195728237837e-06, |
|
"loss": 1.3075, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2613384078655249, |
|
"grad_norm": 1.8422950506210327, |
|
"learning_rate": 9.231768215192243e-06, |
|
"loss": 1.3071, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2638756739613067, |
|
"grad_norm": 1.9389331340789795, |
|
"learning_rate": 9.2080085467489e-06, |
|
"loss": 1.3315, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2664129400570885, |
|
"grad_norm": 1.8695167303085327, |
|
"learning_rate": 9.183918587826142e-06, |
|
"loss": 1.3203, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2689502061528703, |
|
"grad_norm": 1.843494176864624, |
|
"learning_rate": 9.159500229267103e-06, |
|
"loss": 1.3073, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.27148747224865205, |
|
"grad_norm": 1.819220781326294, |
|
"learning_rate": 9.134755387691315e-06, |
|
"loss": 1.317, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.27402473834443386, |
|
"grad_norm": 1.8392246961593628, |
|
"learning_rate": 9.109686005344258e-06, |
|
"loss": 1.3055, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.27656200444021567, |
|
"grad_norm": 1.9258952140808105, |
|
"learning_rate": 9.084294049944919e-06, |
|
"loss": 1.303, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2790992705359975, |
|
"grad_norm": 1.7235634326934814, |
|
"learning_rate": 9.05858151453134e-06, |
|
"loss": 1.2958, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28163653663177923, |
|
"grad_norm": 1.8805299997329712, |
|
"learning_rate": 9.032550417304189e-06, |
|
"loss": 1.3123, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.28417380272756104, |
|
"grad_norm": 1.7971928119659424, |
|
"learning_rate": 9.006202801468342e-06, |
|
"loss": 1.3181, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28671106882334285, |
|
"grad_norm": 1.8092015981674194, |
|
"learning_rate": 8.979540735072512e-06, |
|
"loss": 1.2802, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.28924833491912466, |
|
"grad_norm": 1.7668564319610596, |
|
"learning_rate": 8.952566310846931e-06, |
|
"loss": 1.2911, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2917856010149064, |
|
"grad_norm": 1.8799540996551514, |
|
"learning_rate": 8.925281646039078e-06, |
|
"loss": 1.2966, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2943228671106882, |
|
"grad_norm": 1.9077820777893066, |
|
"learning_rate": 8.897688882247515e-06, |
|
"loss": 1.2889, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.29686013320647003, |
|
"grad_norm": 1.818803071975708, |
|
"learning_rate": 8.869790185253766e-06, |
|
"loss": 1.2922, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.29939739930225184, |
|
"grad_norm": 1.8670169115066528, |
|
"learning_rate": 8.841587744852339e-06, |
|
"loss": 1.3137, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3019346653980336, |
|
"grad_norm": 1.8387978076934814, |
|
"learning_rate": 8.813083774678841e-06, |
|
"loss": 1.2988, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3044719314938154, |
|
"grad_norm": 1.9049063920974731, |
|
"learning_rate": 8.784280512036235e-06, |
|
"loss": 1.3002, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3070091975895972, |
|
"grad_norm": 1.8128398656845093, |
|
"learning_rate": 8.755180217719218e-06, |
|
"loss": 1.2896, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.309546463685379, |
|
"grad_norm": 1.8100541830062866, |
|
"learning_rate": 8.72578517583679e-06, |
|
"loss": 1.3028, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3120837297811608, |
|
"grad_norm": 1.8533449172973633, |
|
"learning_rate": 8.696097693632944e-06, |
|
"loss": 1.2791, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3146209958769426, |
|
"grad_norm": 1.8735203742980957, |
|
"learning_rate": 8.666120101305596e-06, |
|
"loss": 1.3084, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3171582619727244, |
|
"grad_norm": 1.8829270601272583, |
|
"learning_rate": 8.635854751823666e-06, |
|
"loss": 1.3125, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3196955280685062, |
|
"grad_norm": 1.8787264823913574, |
|
"learning_rate": 8.60530402074241e-06, |
|
"loss": 1.2904, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.32223279416428796, |
|
"grad_norm": 1.7555420398712158, |
|
"learning_rate": 8.574470306016936e-06, |
|
"loss": 1.3098, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.32477006026006977, |
|
"grad_norm": 1.8944544792175293, |
|
"learning_rate": 8.543356027814009e-06, |
|
"loss": 1.2818, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3273073263558516, |
|
"grad_norm": 1.8189594745635986, |
|
"learning_rate": 8.511963628322076e-06, |
|
"loss": 1.2925, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3298445924516334, |
|
"grad_norm": 1.8110857009887695, |
|
"learning_rate": 8.480295571559581e-06, |
|
"loss": 1.2868, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33238185854741514, |
|
"grad_norm": 1.7613568305969238, |
|
"learning_rate": 8.448354343181568e-06, |
|
"loss": 1.2935, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.33491912464319695, |
|
"grad_norm": 1.743290662765503, |
|
"learning_rate": 8.416142450284565e-06, |
|
"loss": 1.3024, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33745639073897876, |
|
"grad_norm": 1.7666162252426147, |
|
"learning_rate": 8.383662421209813e-06, |
|
"loss": 1.2934, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.33999365683476057, |
|
"grad_norm": 1.842170000076294, |
|
"learning_rate": 8.350916805344812e-06, |
|
"loss": 1.3163, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.3425309229305423, |
|
"grad_norm": 1.8108347654342651, |
|
"learning_rate": 8.317908172923207e-06, |
|
"loss": 1.2687, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.34506818902632413, |
|
"grad_norm": 1.832701325416565, |
|
"learning_rate": 8.28463911482306e-06, |
|
"loss": 1.287, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.34760545512210594, |
|
"grad_norm": 1.7689234018325806, |
|
"learning_rate": 8.251112242363488e-06, |
|
"loss": 1.3073, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.35014272121788775, |
|
"grad_norm": 1.7938982248306274, |
|
"learning_rate": 8.217330187099689e-06, |
|
"loss": 1.2734, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3526799873136695, |
|
"grad_norm": 1.7464276552200317, |
|
"learning_rate": 8.183295600616399e-06, |
|
"loss": 1.2746, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.3552172534094513, |
|
"grad_norm": 1.8033825159072876, |
|
"learning_rate": 8.149011154319763e-06, |
|
"loss": 1.2833, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3577545195052331, |
|
"grad_norm": 1.850847601890564, |
|
"learning_rate": 8.114479539227653e-06, |
|
"loss": 1.3033, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3602917856010149, |
|
"grad_norm": 1.7663108110427856, |
|
"learning_rate": 8.079703465758447e-06, |
|
"loss": 1.2756, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3628290516967967, |
|
"grad_norm": 1.8576610088348389, |
|
"learning_rate": 8.044685663518289e-06, |
|
"loss": 1.2871, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3653663177925785, |
|
"grad_norm": 1.7135543823242188, |
|
"learning_rate": 8.009428881086836e-06, |
|
"loss": 1.2825, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3679035838883603, |
|
"grad_norm": 1.8041423559188843, |
|
"learning_rate": 7.97393588580152e-06, |
|
"loss": 1.2726, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.37044084998414206, |
|
"grad_norm": 1.852770447731018, |
|
"learning_rate": 7.93820946354034e-06, |
|
"loss": 1.2754, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.37297811607992387, |
|
"grad_norm": 1.8034069538116455, |
|
"learning_rate": 7.902252418503198e-06, |
|
"loss": 1.2881, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3755153821757057, |
|
"grad_norm": 1.834839105606079, |
|
"learning_rate": 7.86606757299178e-06, |
|
"loss": 1.2717, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3780526482714875, |
|
"grad_norm": 1.8019623756408691, |
|
"learning_rate": 7.829657767188052e-06, |
|
"loss": 1.2863, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.38058991436726924, |
|
"grad_norm": 1.8359981775283813, |
|
"learning_rate": 7.793025858931317e-06, |
|
"loss": 1.2896, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38312718046305105, |
|
"grad_norm": 1.7588212490081787, |
|
"learning_rate": 7.756174723493908e-06, |
|
"loss": 1.298, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.38566444655883286, |
|
"grad_norm": 1.810434103012085, |
|
"learning_rate": 7.719107253355494e-06, |
|
"loss": 1.294, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3882017126546147, |
|
"grad_norm": 1.7165814638137817, |
|
"learning_rate": 7.68182635797606e-06, |
|
"loss": 1.2529, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3907389787503964, |
|
"grad_norm": 1.7785570621490479, |
|
"learning_rate": 7.644334963567542e-06, |
|
"loss": 1.2726, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.39327624484617824, |
|
"grad_norm": 1.8550214767456055, |
|
"learning_rate": 7.606636012864126e-06, |
|
"loss": 1.2866, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.39581351094196005, |
|
"grad_norm": 1.8188886642456055, |
|
"learning_rate": 7.568732464891293e-06, |
|
"loss": 1.2867, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39835077703774185, |
|
"grad_norm": 1.7937846183776855, |
|
"learning_rate": 7.530627294733549e-06, |
|
"loss": 1.2764, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4008880431335236, |
|
"grad_norm": 1.8102056980133057, |
|
"learning_rate": 7.492323493300912e-06, |
|
"loss": 1.2663, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4034253092293054, |
|
"grad_norm": 1.8100765943527222, |
|
"learning_rate": 7.453824067094152e-06, |
|
"loss": 1.2772, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4059625753250872, |
|
"grad_norm": 1.7928545475006104, |
|
"learning_rate": 7.4151320379688105e-06, |
|
"loss": 1.2831, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.40849984142086904, |
|
"grad_norm": 1.7782552242279053, |
|
"learning_rate": 7.376250442898006e-06, |
|
"loss": 1.2701, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4110371075166508, |
|
"grad_norm": 1.7261615991592407, |
|
"learning_rate": 7.33718233373407e-06, |
|
"loss": 1.2761, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4135743736124326, |
|
"grad_norm": 1.8159124851226807, |
|
"learning_rate": 7.297930776968989e-06, |
|
"loss": 1.2817, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.4161116397082144, |
|
"grad_norm": 1.8756343126296997, |
|
"learning_rate": 7.258498853493729e-06, |
|
"loss": 1.2881, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4186489058039962, |
|
"grad_norm": 1.7900598049163818, |
|
"learning_rate": 7.2188896583563984e-06, |
|
"loss": 1.2602, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.421186171899778, |
|
"grad_norm": 1.744611144065857, |
|
"learning_rate": 7.179106300519329e-06, |
|
"loss": 1.2911, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4237234379955598, |
|
"grad_norm": 1.7962532043457031, |
|
"learning_rate": 7.13915190261504e-06, |
|
"loss": 1.2581, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4262607040913416, |
|
"grad_norm": 1.8410406112670898, |
|
"learning_rate": 7.099029600701144e-06, |
|
"loss": 1.2632, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4287979701871234, |
|
"grad_norm": 1.6939560174942017, |
|
"learning_rate": 7.0587425440141955e-06, |
|
"loss": 1.2632, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.43133523628290515, |
|
"grad_norm": 1.7786022424697876, |
|
"learning_rate": 7.0182938947225025e-06, |
|
"loss": 1.2703, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43387250237868696, |
|
"grad_norm": 1.6461378335952759, |
|
"learning_rate": 6.977686827677926e-06, |
|
"loss": 1.2769, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4364097684744688, |
|
"grad_norm": 1.9497860670089722, |
|
"learning_rate": 6.936924530166682e-06, |
|
"loss": 1.288, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4389470345702506, |
|
"grad_norm": 1.6506319046020508, |
|
"learning_rate": 6.896010201659173e-06, |
|
"loss": 1.2687, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.44148430066603234, |
|
"grad_norm": 1.7398324012756348, |
|
"learning_rate": 6.854947053558849e-06, |
|
"loss": 1.27, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.44402156676181415, |
|
"grad_norm": 1.7190606594085693, |
|
"learning_rate": 6.8137383089501526e-06, |
|
"loss": 1.2643, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.44655883285759596, |
|
"grad_norm": 1.7999427318572998, |
|
"learning_rate": 6.772387202345528e-06, |
|
"loss": 1.2713, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4490960989533777, |
|
"grad_norm": 1.7355235815048218, |
|
"learning_rate": 6.730896979431543e-06, |
|
"loss": 1.2786, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4516333650491595, |
|
"grad_norm": 1.7803362607955933, |
|
"learning_rate": 6.689270896814139e-06, |
|
"loss": 1.2664, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.45417063114494133, |
|
"grad_norm": 1.8156557083129883, |
|
"learning_rate": 6.647512221763005e-06, |
|
"loss": 1.2663, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.45670789724072314, |
|
"grad_norm": 1.7821699380874634, |
|
"learning_rate": 6.6056242319551315e-06, |
|
"loss": 1.2662, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4592451633365049, |
|
"grad_norm": 1.8154984712600708, |
|
"learning_rate": 6.563610215217551e-06, |
|
"loss": 1.2605, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4617824294322867, |
|
"grad_norm": 1.7243108749389648, |
|
"learning_rate": 6.5214734692692594e-06, |
|
"loss": 1.272, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4643196955280685, |
|
"grad_norm": 1.7438788414001465, |
|
"learning_rate": 6.479217301462386e-06, |
|
"loss": 1.2607, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4668569616238503, |
|
"grad_norm": 1.7700178623199463, |
|
"learning_rate": 6.43684502852259e-06, |
|
"loss": 1.2536, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4693942277196321, |
|
"grad_norm": 1.6637680530548096, |
|
"learning_rate": 6.394359976288729e-06, |
|
"loss": 1.2542, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4719314938154139, |
|
"grad_norm": 1.7491919994354248, |
|
"learning_rate": 6.3517654794518156e-06, |
|
"loss": 1.26, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4744687599111957, |
|
"grad_norm": 1.7363123893737793, |
|
"learning_rate": 6.309064881293265e-06, |
|
"loss": 1.2713, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4770060260069775, |
|
"grad_norm": 1.7664891481399536, |
|
"learning_rate": 6.266261533422487e-06, |
|
"loss": 1.2626, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.47954329210275926, |
|
"grad_norm": 1.828539252281189, |
|
"learning_rate": 6.223358795513812e-06, |
|
"loss": 1.2598, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.48208055819854106, |
|
"grad_norm": 1.7587262392044067, |
|
"learning_rate": 6.18036003504278e-06, |
|
"loss": 1.2582, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4846178242943229, |
|
"grad_norm": 1.7577661275863647, |
|
"learning_rate": 6.1372686270218385e-06, |
|
"loss": 1.2454, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4871550903901047, |
|
"grad_norm": 1.6523966789245605, |
|
"learning_rate": 6.094087953735423e-06, |
|
"loss": 1.2712, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.48969235648588644, |
|
"grad_norm": 1.8142590522766113, |
|
"learning_rate": 6.050821404474483e-06, |
|
"loss": 1.2506, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.49222962258166825, |
|
"grad_norm": 1.7929701805114746, |
|
"learning_rate": 6.00747237527045e-06, |
|
"loss": 1.2698, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.49476688867745006, |
|
"grad_norm": 1.817660927772522, |
|
"learning_rate": 5.964044268628688e-06, |
|
"loss": 1.2539, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.49730415477323187, |
|
"grad_norm": 1.7909172773361206, |
|
"learning_rate": 5.920540493261415e-06, |
|
"loss": 1.2707, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.4998414208690136, |
|
"grad_norm": 1.8175256252288818, |
|
"learning_rate": 5.8769644638201635e-06, |
|
"loss": 1.2575, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5023786869647955, |
|
"grad_norm": 1.7416564226150513, |
|
"learning_rate": 5.8333196006277536e-06, |
|
"loss": 1.2512, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5049159530605772, |
|
"grad_norm": 1.751071810722351, |
|
"learning_rate": 5.789609329409826e-06, |
|
"loss": 1.2531, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.507453219156359, |
|
"grad_norm": 1.8549920320510864, |
|
"learning_rate": 5.7458370810259635e-06, |
|
"loss": 1.2397, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5099904852521409, |
|
"grad_norm": 1.7362955808639526, |
|
"learning_rate": 5.702006291200389e-06, |
|
"loss": 1.2399, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5125277513479226, |
|
"grad_norm": 1.802452564239502, |
|
"learning_rate": 5.6581204002523e-06, |
|
"loss": 1.2408, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5150650174437044, |
|
"grad_norm": 1.7542202472686768, |
|
"learning_rate": 5.614182852825835e-06, |
|
"loss": 1.2542, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5176022835394862, |
|
"grad_norm": 1.816231369972229, |
|
"learning_rate": 5.570197097619688e-06, |
|
"loss": 1.2637, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.520139549635268, |
|
"grad_norm": 1.6630245447158813, |
|
"learning_rate": 5.526166587116436e-06, |
|
"loss": 1.2488, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5226768157310498, |
|
"grad_norm": 1.8530631065368652, |
|
"learning_rate": 5.4820947773115374e-06, |
|
"loss": 1.2675, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5252140818268316, |
|
"grad_norm": 1.8050600290298462, |
|
"learning_rate": 5.437985127442065e-06, |
|
"loss": 1.2491, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5277513479226134, |
|
"grad_norm": 1.755374789237976, |
|
"learning_rate": 5.393841099715205e-06, |
|
"loss": 1.2401, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5302886140183952, |
|
"grad_norm": 1.7990094423294067, |
|
"learning_rate": 5.349666159036482e-06, |
|
"loss": 1.2447, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.532825880114177, |
|
"grad_norm": 1.8541128635406494, |
|
"learning_rate": 5.305463772737812e-06, |
|
"loss": 1.2422, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5353631462099587, |
|
"grad_norm": 1.7271368503570557, |
|
"learning_rate": 5.261237410305344e-06, |
|
"loss": 1.2508, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5379004123057406, |
|
"grad_norm": 1.8599367141723633, |
|
"learning_rate": 5.2169905431071356e-06, |
|
"loss": 1.2523, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5404376784015223, |
|
"grad_norm": 1.7589484453201294, |
|
"learning_rate": 5.172726644120678e-06, |
|
"loss": 1.2369, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5429749444973041, |
|
"grad_norm": 1.8130236864089966, |
|
"learning_rate": 5.128449187660309e-06, |
|
"loss": 1.2411, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.545512210593086, |
|
"grad_norm": 1.8312796354293823, |
|
"learning_rate": 5.084161649104502e-06, |
|
"loss": 1.2534, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5480494766888677, |
|
"grad_norm": 1.8068058490753174, |
|
"learning_rate": 5.039867504623084e-06, |
|
"loss": 1.2269, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5505867427846496, |
|
"grad_norm": 1.774015188217163, |
|
"learning_rate": 4.995570230904386e-06, |
|
"loss": 1.2254, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5531240088804313, |
|
"grad_norm": 1.788631558418274, |
|
"learning_rate": 4.951273304882358e-06, |
|
"loss": 1.2449, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5556612749762131, |
|
"grad_norm": 1.6815987825393677, |
|
"learning_rate": 4.906980203463659e-06, |
|
"loss": 1.2437, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.558198541071995, |
|
"grad_norm": 1.7828891277313232, |
|
"learning_rate": 4.862694403254747e-06, |
|
"loss": 1.2457, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5607358071677767, |
|
"grad_norm": 1.7972067594528198, |
|
"learning_rate": 4.818419380289009e-06, |
|
"loss": 1.2651, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5632730732635585, |
|
"grad_norm": 1.8437052965164185, |
|
"learning_rate": 4.774158609753908e-06, |
|
"loss": 1.2506, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5658103393593403, |
|
"grad_norm": 1.7658838033676147, |
|
"learning_rate": 4.729915565718223e-06, |
|
"loss": 1.2347, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5683476054551221, |
|
"grad_norm": 1.7554123401641846, |
|
"learning_rate": 4.685693720859369e-06, |
|
"loss": 1.2374, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.570884871550904, |
|
"grad_norm": 1.7203478813171387, |
|
"learning_rate": 4.641496546190813e-06, |
|
"loss": 1.2364, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5734221376466857, |
|
"grad_norm": 1.759904384613037, |
|
"learning_rate": 4.597327510789635e-06, |
|
"loss": 1.2236, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5759594037424675, |
|
"grad_norm": 1.8005478382110596, |
|
"learning_rate": 4.553190081524242e-06, |
|
"loss": 1.2424, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5784966698382493, |
|
"grad_norm": 1.809352159500122, |
|
"learning_rate": 4.5090877227822424e-06, |
|
"loss": 1.2413, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5810339359340311, |
|
"grad_norm": 1.8049602508544922, |
|
"learning_rate": 4.46502389619853e-06, |
|
"loss": 1.2542, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5835712020298128, |
|
"grad_norm": 1.803334355354309, |
|
"learning_rate": 4.421002060383569e-06, |
|
"loss": 1.2353, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5861084681255947, |
|
"grad_norm": 1.7442089319229126, |
|
"learning_rate": 4.3770256706519375e-06, |
|
"loss": 1.2263, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5886457342213764, |
|
"grad_norm": 1.7808609008789062, |
|
"learning_rate": 4.3330981787511006e-06, |
|
"loss": 1.2266, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5911830003171583, |
|
"grad_norm": 1.8719425201416016, |
|
"learning_rate": 4.289223032590491e-06, |
|
"loss": 1.2609, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5937202664129401, |
|
"grad_norm": 1.7992342710494995, |
|
"learning_rate": 4.245403675970877e-06, |
|
"loss": 1.2318, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5962575325087218, |
|
"grad_norm": 1.8241426944732666, |
|
"learning_rate": 4.201643548314051e-06, |
|
"loss": 1.2339, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5987947986045037, |
|
"grad_norm": 1.826092004776001, |
|
"learning_rate": 4.157946084392871e-06, |
|
"loss": 1.2481, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6013320647002854, |
|
"grad_norm": 1.6740612983703613, |
|
"learning_rate": 4.114314714061659e-06, |
|
"loss": 1.2213, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6038693307960672, |
|
"grad_norm": 1.747037649154663, |
|
"learning_rate": 4.0707528619869976e-06, |
|
"loss": 1.2248, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6064065968918491, |
|
"grad_norm": 1.7676377296447754, |
|
"learning_rate": 4.027263947378907e-06, |
|
"loss": 1.2239, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6089438629876308, |
|
"grad_norm": 1.770889163017273, |
|
"learning_rate": 3.9838513837224814e-06, |
|
"loss": 1.2395, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6114811290834127, |
|
"grad_norm": 1.6925628185272217, |
|
"learning_rate": 3.940518578509963e-06, |
|
"loss": 1.2347, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6140183951791944, |
|
"grad_norm": 1.7867980003356934, |
|
"learning_rate": 3.8972689329732725e-06, |
|
"loss": 1.2392, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6165556612749762, |
|
"grad_norm": 1.7436933517456055, |
|
"learning_rate": 3.854105841817056e-06, |
|
"loss": 1.224, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.619092927370758, |
|
"grad_norm": 1.7485663890838623, |
|
"learning_rate": 3.811032692952227e-06, |
|
"loss": 1.2104, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6216301934665398, |
|
"grad_norm": 1.7039844989776611, |
|
"learning_rate": 3.7680528672300404e-06, |
|
"loss": 1.2377, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6241674595623216, |
|
"grad_norm": 1.7440192699432373, |
|
"learning_rate": 3.7251697381767373e-06, |
|
"loss": 1.2385, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6267047256581034, |
|
"grad_norm": 1.716308832168579, |
|
"learning_rate": 3.6823866717287437e-06, |
|
"loss": 1.2349, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6292419917538852, |
|
"grad_norm": 1.8369241952896118, |
|
"learning_rate": 3.6397070259684793e-06, |
|
"loss": 1.233, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6317792578496669, |
|
"grad_norm": 1.7305907011032104, |
|
"learning_rate": 3.5971341508607814e-06, |
|
"loss": 1.2129, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6343165239454488, |
|
"grad_norm": 1.9149237871170044, |
|
"learning_rate": 3.5546713879899563e-06, |
|
"loss": 1.2193, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6368537900412305, |
|
"grad_norm": 1.9159984588623047, |
|
"learning_rate": 3.512322070297503e-06, |
|
"loss": 1.2177, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6393910561370124, |
|
"grad_norm": 1.7947359085083008, |
|
"learning_rate": 3.4700895218205026e-06, |
|
"loss": 1.2315, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6419283222327942, |
|
"grad_norm": 1.7361483573913574, |
|
"learning_rate": 3.4279770574307096e-06, |
|
"loss": 1.2353, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6444655883285759, |
|
"grad_norm": 1.8074885606765747, |
|
"learning_rate": 3.385987982574372e-06, |
|
"loss": 1.2171, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6470028544243578, |
|
"grad_norm": 1.7764816284179688, |
|
"learning_rate": 3.3441255930127752e-06, |
|
"loss": 1.2393, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6495401205201395, |
|
"grad_norm": 2.0956342220306396, |
|
"learning_rate": 3.3023931745635606e-06, |
|
"loss": 1.227, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6520773866159213, |
|
"grad_norm": 1.8369258642196655, |
|
"learning_rate": 3.2607940028428154e-06, |
|
"loss": 1.2378, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6546146527117032, |
|
"grad_norm": 1.7695237398147583, |
|
"learning_rate": 3.2193313430079737e-06, |
|
"loss": 1.2432, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6571519188074849, |
|
"grad_norm": 1.834971308708191, |
|
"learning_rate": 3.178008449501517e-06, |
|
"loss": 1.2215, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6596891849032668, |
|
"grad_norm": 1.7134376764297485, |
|
"learning_rate": 3.1368285657955464e-06, |
|
"loss": 1.2204, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6622264509990485, |
|
"grad_norm": 1.8207467794418335, |
|
"learning_rate": 3.0957949241371845e-06, |
|
"loss": 1.2371, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6647637170948303, |
|
"grad_norm": 1.8170554637908936, |
|
"learning_rate": 3.0549107452948866e-06, |
|
"loss": 1.235, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6673009831906122, |
|
"grad_norm": 1.8893216848373413, |
|
"learning_rate": 3.014179238305629e-06, |
|
"loss": 1.2257, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6698382492863939, |
|
"grad_norm": 1.7861133813858032, |
|
"learning_rate": 2.9736036002230332e-06, |
|
"loss": 1.2061, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6723755153821757, |
|
"grad_norm": 1.8051108121871948, |
|
"learning_rate": 2.933187015866431e-06, |
|
"loss": 1.2432, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6749127814779575, |
|
"grad_norm": 1.710418939590454, |
|
"learning_rate": 2.892932657570878e-06, |
|
"loss": 1.2179, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6774500475737393, |
|
"grad_norm": 1.7585214376449585, |
|
"learning_rate": 2.8528436849381518e-06, |
|
"loss": 1.2522, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6799873136695211, |
|
"grad_norm": 1.8252116441726685, |
|
"learning_rate": 2.8129232445887623e-06, |
|
"loss": 1.2288, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6825245797653029, |
|
"grad_norm": 1.8297280073165894, |
|
"learning_rate": 2.773174469914964e-06, |
|
"loss": 1.2273, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6850618458610847, |
|
"grad_norm": 1.8258917331695557, |
|
"learning_rate": 2.7336004808348094e-06, |
|
"loss": 1.2183, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6875991119568665, |
|
"grad_norm": 1.7145591974258423, |
|
"learning_rate": 2.6942043835472725e-06, |
|
"loss": 1.2234, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6901363780526483, |
|
"grad_norm": 1.8191933631896973, |
|
"learning_rate": 2.654989270288435e-06, |
|
"loss": 1.2301, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.69267364414843, |
|
"grad_norm": 1.793045997619629, |
|
"learning_rate": 2.615958219088776e-06, |
|
"loss": 1.2253, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6952109102442119, |
|
"grad_norm": 1.7396857738494873, |
|
"learning_rate": 2.577114293531571e-06, |
|
"loss": 1.2183, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6977481763399936, |
|
"grad_norm": 1.7470922470092773, |
|
"learning_rate": 2.538460542512435e-06, |
|
"loss": 1.2193, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7002854424357755, |
|
"grad_norm": 1.885988473892212, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 1.2203, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7028227085315573, |
|
"grad_norm": 1.7453058958053589, |
|
"learning_rate": 2.461735684797794e-06, |
|
"loss": 1.2308, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.705359974627339, |
|
"grad_norm": 1.9530824422836304, |
|
"learning_rate": 2.4236706003072733e-06, |
|
"loss": 1.2472, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7078972407231209, |
|
"grad_norm": 1.9082190990447998, |
|
"learning_rate": 2.385807734292097e-06, |
|
"loss": 1.211, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7104345068189026, |
|
"grad_norm": 1.8059364557266235, |
|
"learning_rate": 2.3481500586436067e-06, |
|
"loss": 1.2307, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7129717729146844, |
|
"grad_norm": 2.033775806427002, |
|
"learning_rate": 2.3107005291475653e-06, |
|
"loss": 1.2313, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7155090390104663, |
|
"grad_norm": 1.7707144021987915, |
|
"learning_rate": 2.273462085252146e-06, |
|
"loss": 1.2019, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.718046305106248, |
|
"grad_norm": 1.8923752307891846, |
|
"learning_rate": 2.236437649837223e-06, |
|
"loss": 1.2496, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7205835712020298, |
|
"grad_norm": 1.820609450340271, |
|
"learning_rate": 2.1996301289849474e-06, |
|
"loss": 1.2232, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7231208372978116, |
|
"grad_norm": 1.8817142248153687, |
|
"learning_rate": 2.1630424117516436e-06, |
|
"loss": 1.2134, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7256581033935934, |
|
"grad_norm": 1.7665314674377441, |
|
"learning_rate": 2.126677369941047e-06, |
|
"loss": 1.192, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7281953694893752, |
|
"grad_norm": 1.8656190633773804, |
|
"learning_rate": 2.0905378578788947e-06, |
|
"loss": 1.218, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.730732635585157, |
|
"grad_norm": 1.7951234579086304, |
|
"learning_rate": 2.0546267121888863e-06, |
|
"loss": 1.2099, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7332699016809388, |
|
"grad_norm": 1.8712400197982788, |
|
"learning_rate": 2.0189467515700283e-06, |
|
"loss": 1.2071, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7358071677767206, |
|
"grad_norm": 1.8098704814910889, |
|
"learning_rate": 1.9835007765754035e-06, |
|
"loss": 1.2345, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7383444338725024, |
|
"grad_norm": 1.8739259243011475, |
|
"learning_rate": 1.9482915693923442e-06, |
|
"loss": 1.2138, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7408816999682841, |
|
"grad_norm": 1.8259673118591309, |
|
"learning_rate": 1.913321893624059e-06, |
|
"loss": 1.2103, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.743418966064066, |
|
"grad_norm": 1.879623532295227, |
|
"learning_rate": 1.878594494072713e-06, |
|
"loss": 1.2094, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7459562321598477, |
|
"grad_norm": 1.7758175134658813, |
|
"learning_rate": 1.8441120965239912e-06, |
|
"loss": 1.219, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7484934982556296, |
|
"grad_norm": 1.7712740898132324, |
|
"learning_rate": 1.8098774075331383e-06, |
|
"loss": 1.2312, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7510307643514114, |
|
"grad_norm": 1.8342068195343018, |
|
"learning_rate": 1.7758931142125308e-06, |
|
"loss": 1.2284, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7535680304471931, |
|
"grad_norm": 1.922353982925415, |
|
"learning_rate": 1.7421618840207576e-06, |
|
"loss": 1.2251, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.756105296542975, |
|
"grad_norm": 1.7844698429107666, |
|
"learning_rate": 1.7086863645532425e-06, |
|
"loss": 1.2057, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7586425626387567, |
|
"grad_norm": 1.8080805540084839, |
|
"learning_rate": 1.6754691833344472e-06, |
|
"loss": 1.2355, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7611798287345385, |
|
"grad_norm": 1.791804313659668, |
|
"learning_rate": 1.642512947611622e-06, |
|
"loss": 1.2406, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7637170948303204, |
|
"grad_norm": 1.8191893100738525, |
|
"learning_rate": 1.6098202441501599e-06, |
|
"loss": 1.2101, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7662543609261021, |
|
"grad_norm": 1.8108185529708862, |
|
"learning_rate": 1.5773936390305678e-06, |
|
"loss": 1.1916, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.768791627021884, |
|
"grad_norm": 1.7712724208831787, |
|
"learning_rate": 1.5452356774470468e-06, |
|
"loss": 1.2103, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7713288931176657, |
|
"grad_norm": 1.8046748638153076, |
|
"learning_rate": 1.5133488835077204e-06, |
|
"loss": 1.2147, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7738661592134475, |
|
"grad_norm": 1.8228182792663574, |
|
"learning_rate": 1.4817357600365061e-06, |
|
"loss": 1.2153, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7764034253092293, |
|
"grad_norm": 1.9025073051452637, |
|
"learning_rate": 1.4503987883766857e-06, |
|
"loss": 1.2207, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7789406914050111, |
|
"grad_norm": 1.8209235668182373, |
|
"learning_rate": 1.4193404281961172e-06, |
|
"loss": 1.2225, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7814779575007929, |
|
"grad_norm": 1.8447314500808716, |
|
"learning_rate": 1.3885631172941932e-06, |
|
"loss": 1.2265, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7840152235965747, |
|
"grad_norm": 1.8785367012023926, |
|
"learning_rate": 1.3580692714104887e-06, |
|
"loss": 1.2053, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7865524896923565, |
|
"grad_norm": 1.9189660549163818, |
|
"learning_rate": 1.3278612840351468e-06, |
|
"loss": 1.2253, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7890897557881383, |
|
"grad_norm": 1.7978157997131348, |
|
"learning_rate": 1.2979415262210089e-06, |
|
"loss": 1.2183, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7916270218839201, |
|
"grad_norm": 1.769068956375122, |
|
"learning_rate": 1.2683123463975144e-06, |
|
"loss": 1.2057, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7941642879797018, |
|
"grad_norm": 1.7503260374069214, |
|
"learning_rate": 1.2389760701863717e-06, |
|
"loss": 1.2295, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7967015540754837, |
|
"grad_norm": 1.8129879236221313, |
|
"learning_rate": 1.2099350002190063e-06, |
|
"loss": 1.2066, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7992388201712655, |
|
"grad_norm": 1.824507713317871, |
|
"learning_rate": 1.1811914159558374e-06, |
|
"loss": 1.2385, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8017760862670472, |
|
"grad_norm": 1.8247697353363037, |
|
"learning_rate": 1.1527475735073574e-06, |
|
"loss": 1.2372, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8043133523628291, |
|
"grad_norm": 1.7803229093551636, |
|
"learning_rate": 1.1246057054570414e-06, |
|
"loss": 1.2009, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8068506184586108, |
|
"grad_norm": 1.7703157663345337, |
|
"learning_rate": 1.0967680206861198e-06, |
|
"loss": 1.1976, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8093878845543926, |
|
"grad_norm": 1.7612484693527222, |
|
"learning_rate": 1.069236704200195e-06, |
|
"loss": 1.206, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8119251506501745, |
|
"grad_norm": 1.739809274673462, |
|
"learning_rate": 1.0420139169577393e-06, |
|
"loss": 1.2068, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8144624167459562, |
|
"grad_norm": 1.7609456777572632, |
|
"learning_rate": 1.01510179570048e-06, |
|
"loss": 1.2115, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8169996828417381, |
|
"grad_norm": 1.8573811054229736, |
|
"learning_rate": 9.88502452785685e-07, |
|
"loss": 1.2151, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8195369489375198, |
|
"grad_norm": 1.7674131393432617, |
|
"learning_rate": 9.62217976020357e-07, |
|
"loss": 1.221, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8220742150333016, |
|
"grad_norm": 1.9328287839889526, |
|
"learning_rate": 9.362504284973683e-07, |
|
"loss": 1.2013, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8246114811290834, |
|
"grad_norm": 1.881496787071228, |
|
"learning_rate": 9.1060184843352e-07, |
|
"loss": 1.2104, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8271487472248652, |
|
"grad_norm": 1.797257661819458, |
|
"learning_rate": 8.852742490095628e-07, |
|
"loss": 1.2026, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.829686013320647, |
|
"grad_norm": 1.7446339130401611, |
|
"learning_rate": 8.602696182121812e-07, |
|
"loss": 1.2078, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8322232794164288, |
|
"grad_norm": 1.8723726272583008, |
|
"learning_rate": 8.35589918677952e-07, |
|
"loss": 1.2117, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.8347605455122106, |
|
"grad_norm": 1.7332195043563843, |
|
"learning_rate": 8.112370875393e-07, |
|
"loss": 1.2154, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.8372978116079924, |
|
"grad_norm": 1.7967216968536377, |
|
"learning_rate": 7.872130362724422e-07, |
|
"loss": 1.1956, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8398350777037742, |
|
"grad_norm": 1.889953851699829, |
|
"learning_rate": 7.635196505473652e-07, |
|
"loss": 1.2149, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.842372343799556, |
|
"grad_norm": 1.8281513452529907, |
|
"learning_rate": 7.401587900798091e-07, |
|
"loss": 1.2106, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8449096098953378, |
|
"grad_norm": 1.917148232460022, |
|
"learning_rate": 7.171322884852988e-07, |
|
"loss": 1.2256, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8474468759911196, |
|
"grad_norm": 1.8084789514541626, |
|
"learning_rate": 6.944419531352236e-07, |
|
"loss": 1.2162, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8499841420869013, |
|
"grad_norm": 1.7765891551971436, |
|
"learning_rate": 6.720895650149744e-07, |
|
"loss": 1.2236, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8525214081826832, |
|
"grad_norm": 1.8284307718276978, |
|
"learning_rate": 6.500768785841482e-07, |
|
"loss": 1.2111, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8550586742784649, |
|
"grad_norm": 1.8736671209335327, |
|
"learning_rate": 6.284056216388451e-07, |
|
"loss": 1.2111, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8575959403742468, |
|
"grad_norm": 1.8187748193740845, |
|
"learning_rate": 6.070774951760505e-07, |
|
"loss": 1.2058, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8601332064700286, |
|
"grad_norm": 1.811501145362854, |
|
"learning_rate": 5.860941732601166e-07, |
|
"loss": 1.1993, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8626704725658103, |
|
"grad_norm": 1.8596574068069458, |
|
"learning_rate": 5.654573028913735e-07, |
|
"loss": 1.2039, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8652077386615922, |
|
"grad_norm": 1.8533954620361328, |
|
"learning_rate": 5.451685038768473e-07, |
|
"loss": 1.228, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8677450047573739, |
|
"grad_norm": 1.7679084539413452, |
|
"learning_rate": 5.252293687031196e-07, |
|
"loss": 1.1993, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8702822708531557, |
|
"grad_norm": 1.8449029922485352, |
|
"learning_rate": 5.05641462411336e-07, |
|
"loss": 1.2015, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8728195369489375, |
|
"grad_norm": 1.8610782623291016, |
|
"learning_rate": 4.864063224743626e-07, |
|
"loss": 1.2049, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8753568030447193, |
|
"grad_norm": 1.7010682821273804, |
|
"learning_rate": 4.6752545867610963e-07, |
|
"loss": 1.2047, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8778940691405012, |
|
"grad_norm": 1.7259236574172974, |
|
"learning_rate": 4.4900035299302036e-07, |
|
"loss": 1.204, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8804313352362829, |
|
"grad_norm": 1.7741477489471436, |
|
"learning_rate": 4.308324594777635e-07, |
|
"loss": 1.2025, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8829686013320647, |
|
"grad_norm": 1.8965697288513184, |
|
"learning_rate": 4.130232041450866e-07, |
|
"loss": 1.2034, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8855058674278465, |
|
"grad_norm": 1.7573304176330566, |
|
"learning_rate": 3.9557398485989884e-07, |
|
"loss": 1.1985, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8880431335236283, |
|
"grad_norm": 1.7425026893615723, |
|
"learning_rate": 3.784861712275467e-07, |
|
"loss": 1.1938, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89058039961941, |
|
"grad_norm": 1.7624136209487915, |
|
"learning_rate": 3.61761104486314e-07, |
|
"loss": 1.2098, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8931176657151919, |
|
"grad_norm": 1.8496613502502441, |
|
"learning_rate": 3.454000974021432e-07, |
|
"loss": 1.2179, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8956549318109737, |
|
"grad_norm": 1.796911597251892, |
|
"learning_rate": 3.294044341655983e-07, |
|
"loss": 1.1989, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8981921979067554, |
|
"grad_norm": 1.854840874671936, |
|
"learning_rate": 3.1377537029107174e-07, |
|
"loss": 1.1939, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9007294640025373, |
|
"grad_norm": 1.7666873931884766, |
|
"learning_rate": 2.985141325182267e-07, |
|
"loss": 1.2132, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.903266730098319, |
|
"grad_norm": 1.8497194051742554, |
|
"learning_rate": 2.836219187157202e-07, |
|
"loss": 1.2106, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9058039961941009, |
|
"grad_norm": 1.865463376045227, |
|
"learning_rate": 2.69099897787175e-07, |
|
"loss": 1.1999, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9083412622898827, |
|
"grad_norm": 1.7132046222686768, |
|
"learning_rate": 2.5494920957943314e-07, |
|
"loss": 1.1973, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9108785283856644, |
|
"grad_norm": 1.8095914125442505, |
|
"learning_rate": 2.411709647930882e-07, |
|
"loss": 1.2137, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.9134157944814463, |
|
"grad_norm": 1.745731234550476, |
|
"learning_rate": 2.2776624489530664e-07, |
|
"loss": 1.2098, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.915953060577228, |
|
"grad_norm": 1.834447979927063, |
|
"learning_rate": 2.1473610203494032e-07, |
|
"loss": 1.2122, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9184903266730098, |
|
"grad_norm": 1.8160932064056396, |
|
"learning_rate": 2.0208155895994343e-07, |
|
"loss": 1.1939, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9210275927687916, |
|
"grad_norm": 1.8465439081192017, |
|
"learning_rate": 1.8980360893709582e-07, |
|
"loss": 1.2068, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9235648588645734, |
|
"grad_norm": 1.8164421319961548, |
|
"learning_rate": 1.7790321567404011e-07, |
|
"loss": 1.2053, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9261021249603553, |
|
"grad_norm": 1.8823806047439575, |
|
"learning_rate": 1.6638131324364094e-07, |
|
"loss": 1.2077, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.928639391056137, |
|
"grad_norm": 1.8699020147323608, |
|
"learning_rate": 1.55238806010668e-07, |
|
"loss": 1.1787, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9311766571519188, |
|
"grad_norm": 1.856771469116211, |
|
"learning_rate": 1.444765685608096e-07, |
|
"loss": 1.2126, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9337139232477006, |
|
"grad_norm": 1.7968028783798218, |
|
"learning_rate": 1.340954456320287e-07, |
|
"loss": 1.2085, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9362511893434824, |
|
"grad_norm": 1.8331055641174316, |
|
"learning_rate": 1.2409625204825802e-07, |
|
"loss": 1.2081, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9387884554392641, |
|
"grad_norm": 1.7524057626724243, |
|
"learning_rate": 1.1447977265544141e-07, |
|
"loss": 1.2121, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.941325721535046, |
|
"grad_norm": 1.8265066146850586, |
|
"learning_rate": 1.052467622599329e-07, |
|
"loss": 1.1897, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9438629876308278, |
|
"grad_norm": 1.8773318529129028, |
|
"learning_rate": 9.639794556925041e-08, |
|
"loss": 1.2053, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9464002537266096, |
|
"grad_norm": 1.8355180025100708, |
|
"learning_rate": 8.793401713519333e-08, |
|
"loss": 1.2044, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9489375198223914, |
|
"grad_norm": 1.8861024379730225, |
|
"learning_rate": 7.985564129932566e-08, |
|
"loss": 1.2143, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9514747859181731, |
|
"grad_norm": 1.8514657020568848, |
|
"learning_rate": 7.216345214083264e-08, |
|
"loss": 1.2143, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.954012052013955, |
|
"grad_norm": 1.7812689542770386, |
|
"learning_rate": 6.485805342674901e-08, |
|
"loss": 1.1858, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9565493181097368, |
|
"grad_norm": 1.838179349899292, |
|
"learning_rate": 5.7940018564570654e-08, |
|
"loss": 1.2116, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9590865842055185, |
|
"grad_norm": 1.7216005325317383, |
|
"learning_rate": 5.1409890557246876e-08, |
|
"loss": 1.2106, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9616238503013004, |
|
"grad_norm": 1.8689485788345337, |
|
"learning_rate": 4.526818196055938e-08, |
|
"loss": 1.2112, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9641611163970821, |
|
"grad_norm": 1.773857593536377, |
|
"learning_rate": 3.951537484289114e-08, |
|
"loss": 1.2254, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.966698382492864, |
|
"grad_norm": 1.8553355932235718, |
|
"learning_rate": 3.4151920747390044e-08, |
|
"loss": 1.1961, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9692356485886457, |
|
"grad_norm": 1.7645963430404663, |
|
"learning_rate": 2.9178240656523305e-08, |
|
"loss": 1.2079, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9717729146844275, |
|
"grad_norm": 1.8326435089111328, |
|
"learning_rate": 2.4594724959037253e-08, |
|
"loss": 1.2201, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9743101807802094, |
|
"grad_norm": 1.8248659372329712, |
|
"learning_rate": 2.0401733419315727e-08, |
|
"loss": 1.2018, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9768474468759911, |
|
"grad_norm": 1.7907987833023071, |
|
"learning_rate": 1.659959514913767e-08, |
|
"loss": 1.2187, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9793847129717729, |
|
"grad_norm": 1.8781341314315796, |
|
"learning_rate": 1.3188608581851114e-08, |
|
"loss": 1.1951, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9819219790675547, |
|
"grad_norm": 1.8266582489013672, |
|
"learning_rate": 1.016904144894304e-08, |
|
"loss": 1.2178, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9844592451633365, |
|
"grad_norm": 1.7504289150238037, |
|
"learning_rate": 7.541130759027848e-09, |
|
"loss": 1.2076, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9869965112591182, |
|
"grad_norm": 1.7687255144119263, |
|
"learning_rate": 5.305082779244464e-09, |
|
"loss": 1.1923, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9895337773549001, |
|
"grad_norm": 1.7166252136230469, |
|
"learning_rate": 3.4610730190648423e-09, |
|
"loss": 1.223, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9920710434506819, |
|
"grad_norm": 1.792605996131897, |
|
"learning_rate": 2.0092462165194337e-09, |
|
"loss": 1.2253, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9946083095464637, |
|
"grad_norm": 1.7338823080062866, |
|
"learning_rate": 9.497163268351595e-10, |
|
"loss": 1.186, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9971455756422455, |
|
"grad_norm": 1.8996716737747192, |
|
"learning_rate": 2.825665134920108e-10, |
|
"loss": 1.2094, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9996828417380272, |
|
"grad_norm": 1.8665469884872437, |
|
"learning_rate": 7.849141696048002e-12, |
|
"loss": 1.2135, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9999365683476055, |
|
"step": 3941, |
|
"total_flos": 6.337025018626572e+18, |
|
"train_loss": 1.2871940559434636, |
|
"train_runtime": 18302.3162, |
|
"train_samples_per_second": 27.564, |
|
"train_steps_per_second": 0.215 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3941, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.337025018626572e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|