|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 12620, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01584786053882726, |
|
"grad_norm": 6.662916660308838, |
|
"learning_rate": 1.584786053882726e-06, |
|
"loss": 1.0759, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03169572107765452, |
|
"grad_norm": 5.666072845458984, |
|
"learning_rate": 3.169572107765452e-06, |
|
"loss": 1.0851, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04754358161648178, |
|
"grad_norm": 5.542662620544434, |
|
"learning_rate": 4.754358161648178e-06, |
|
"loss": 1.0049, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06339144215530904, |
|
"grad_norm": 3.666578769683838, |
|
"learning_rate": 6.339144215530904e-06, |
|
"loss": 0.7551, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07923930269413629, |
|
"grad_norm": 1.8371094465255737, |
|
"learning_rate": 7.923930269413629e-06, |
|
"loss": 0.5051, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09508716323296355, |
|
"grad_norm": 1.8207030296325684, |
|
"learning_rate": 9.508716323296357e-06, |
|
"loss": 0.3629, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1109350237717908, |
|
"grad_norm": 1.8528467416763306, |
|
"learning_rate": 1.109350237717908e-05, |
|
"loss": 0.2969, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12678288431061807, |
|
"grad_norm": 1.4607429504394531, |
|
"learning_rate": 1.2678288431061808e-05, |
|
"loss": 0.2692, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14263074484944532, |
|
"grad_norm": 1.288856029510498, |
|
"learning_rate": 1.4263074484944532e-05, |
|
"loss": 0.2774, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15847860538827258, |
|
"grad_norm": 1.119730830192566, |
|
"learning_rate": 1.5847860538827258e-05, |
|
"loss": 0.2193, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17432646592709986, |
|
"grad_norm": 0.9573730826377869, |
|
"learning_rate": 1.7432646592709986e-05, |
|
"loss": 0.2068, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1901743264659271, |
|
"grad_norm": 0.9363383054733276, |
|
"learning_rate": 1.9017432646592713e-05, |
|
"loss": 0.2061, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20602218700475436, |
|
"grad_norm": 0.9630652070045471, |
|
"learning_rate": 2.0602218700475437e-05, |
|
"loss": 0.1937, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2218700475435816, |
|
"grad_norm": 1.1261632442474365, |
|
"learning_rate": 2.218700475435816e-05, |
|
"loss": 0.1769, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23771790808240886, |
|
"grad_norm": 0.859064519405365, |
|
"learning_rate": 2.377179080824089e-05, |
|
"loss": 0.1788, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25356576862123614, |
|
"grad_norm": 0.8056842684745789, |
|
"learning_rate": 2.5356576862123617e-05, |
|
"loss": 0.1486, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2694136291600634, |
|
"grad_norm": 0.889215886592865, |
|
"learning_rate": 2.694136291600634e-05, |
|
"loss": 0.1597, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.28526148969889065, |
|
"grad_norm": 0.90467369556427, |
|
"learning_rate": 2.8526148969889065e-05, |
|
"loss": 0.1571, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3011093502377179, |
|
"grad_norm": 0.9579144716262817, |
|
"learning_rate": 3.0110935023771792e-05, |
|
"loss": 0.1378, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.31695721077654515, |
|
"grad_norm": 1.2036852836608887, |
|
"learning_rate": 3.1695721077654516e-05, |
|
"loss": 0.1315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3328050713153724, |
|
"grad_norm": 0.9379674196243286, |
|
"learning_rate": 3.3280507131537244e-05, |
|
"loss": 0.128, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3486529318541997, |
|
"grad_norm": 0.9713570475578308, |
|
"learning_rate": 3.486529318541997e-05, |
|
"loss": 0.124, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.36450079239302696, |
|
"grad_norm": 0.9157624244689941, |
|
"learning_rate": 3.64500792393027e-05, |
|
"loss": 0.1264, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3803486529318542, |
|
"grad_norm": 0.5934288501739502, |
|
"learning_rate": 3.8034865293185427e-05, |
|
"loss": 0.1121, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.39619651347068147, |
|
"grad_norm": 0.865558922290802, |
|
"learning_rate": 3.961965134706815e-05, |
|
"loss": 0.1222, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4120443740095087, |
|
"grad_norm": 0.8881043195724487, |
|
"learning_rate": 4.1204437400950875e-05, |
|
"loss": 0.1067, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.42789223454833597, |
|
"grad_norm": 0.7673732042312622, |
|
"learning_rate": 4.27892234548336e-05, |
|
"loss": 0.1088, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4437400950871632, |
|
"grad_norm": 0.5540139079093933, |
|
"learning_rate": 4.437400950871632e-05, |
|
"loss": 0.094, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4595879556259905, |
|
"grad_norm": 0.8689896464347839, |
|
"learning_rate": 4.595879556259905e-05, |
|
"loss": 0.0978, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4754358161648177, |
|
"grad_norm": 0.6194612979888916, |
|
"learning_rate": 4.754358161648178e-05, |
|
"loss": 0.0791, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.49128367670364503, |
|
"grad_norm": 0.698275089263916, |
|
"learning_rate": 4.9128367670364506e-05, |
|
"loss": 0.0883, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5071315372424723, |
|
"grad_norm": 0.7928034663200378, |
|
"learning_rate": 5.071315372424723e-05, |
|
"loss": 0.0994, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5229793977812995, |
|
"grad_norm": 0.5640034675598145, |
|
"learning_rate": 5.2297939778129954e-05, |
|
"loss": 0.0832, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5388272583201268, |
|
"grad_norm": 0.8224833011627197, |
|
"learning_rate": 5.388272583201268e-05, |
|
"loss": 0.0926, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.554675118858954, |
|
"grad_norm": 0.9649167656898499, |
|
"learning_rate": 5.546751188589541e-05, |
|
"loss": 0.0896, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5705229793977813, |
|
"grad_norm": 0.7821329832077026, |
|
"learning_rate": 5.705229793977813e-05, |
|
"loss": 0.1003, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5863708399366085, |
|
"grad_norm": 0.5526576638221741, |
|
"learning_rate": 5.863708399366086e-05, |
|
"loss": 0.0826, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6022187004754358, |
|
"grad_norm": 0.6012312769889832, |
|
"learning_rate": 6.0221870047543585e-05, |
|
"loss": 0.0808, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.618066561014263, |
|
"grad_norm": 0.6598588824272156, |
|
"learning_rate": 6.18066561014263e-05, |
|
"loss": 0.0781, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6339144215530903, |
|
"grad_norm": 0.6164723634719849, |
|
"learning_rate": 6.339144215530903e-05, |
|
"loss": 0.0725, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6497622820919176, |
|
"grad_norm": 0.6792150139808655, |
|
"learning_rate": 6.497622820919176e-05, |
|
"loss": 0.0694, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6656101426307448, |
|
"grad_norm": 0.5863011479377747, |
|
"learning_rate": 6.656101426307449e-05, |
|
"loss": 0.0771, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6814580031695721, |
|
"grad_norm": 0.6146591305732727, |
|
"learning_rate": 6.814580031695722e-05, |
|
"loss": 0.0768, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6973058637083994, |
|
"grad_norm": 0.4906776547431946, |
|
"learning_rate": 6.973058637083994e-05, |
|
"loss": 0.073, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7131537242472267, |
|
"grad_norm": 0.7824636101722717, |
|
"learning_rate": 7.131537242472267e-05, |
|
"loss": 0.074, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7290015847860539, |
|
"grad_norm": 0.5947498679161072, |
|
"learning_rate": 7.29001584786054e-05, |
|
"loss": 0.0786, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7448494453248812, |
|
"grad_norm": 0.49313023686408997, |
|
"learning_rate": 7.448494453248813e-05, |
|
"loss": 0.0694, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7606973058637084, |
|
"grad_norm": 0.36435437202453613, |
|
"learning_rate": 7.606973058637085e-05, |
|
"loss": 0.067, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7765451664025357, |
|
"grad_norm": 0.6767722368240356, |
|
"learning_rate": 7.765451664025357e-05, |
|
"loss": 0.0747, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7923930269413629, |
|
"grad_norm": 0.5377907752990723, |
|
"learning_rate": 7.92393026941363e-05, |
|
"loss": 0.0709, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8082408874801902, |
|
"grad_norm": 0.740249752998352, |
|
"learning_rate": 8.082408874801902e-05, |
|
"loss": 0.0762, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8240887480190174, |
|
"grad_norm": 0.5422998666763306, |
|
"learning_rate": 8.240887480190175e-05, |
|
"loss": 0.0636, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8399366085578447, |
|
"grad_norm": 0.7832110524177551, |
|
"learning_rate": 8.399366085578448e-05, |
|
"loss": 0.0673, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8557844690966719, |
|
"grad_norm": 0.8280210494995117, |
|
"learning_rate": 8.55784469096672e-05, |
|
"loss": 0.0729, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8716323296354992, |
|
"grad_norm": 0.4729553461074829, |
|
"learning_rate": 8.716323296354992e-05, |
|
"loss": 0.0802, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8874801901743264, |
|
"grad_norm": 0.49598968029022217, |
|
"learning_rate": 8.874801901743265e-05, |
|
"loss": 0.0707, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9033280507131537, |
|
"grad_norm": 0.5164231657981873, |
|
"learning_rate": 9.033280507131537e-05, |
|
"loss": 0.0723, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.919175911251981, |
|
"grad_norm": 0.8574791550636292, |
|
"learning_rate": 9.19175911251981e-05, |
|
"loss": 0.0691, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9350237717908082, |
|
"grad_norm": 0.6576387286186218, |
|
"learning_rate": 9.350237717908083e-05, |
|
"loss": 0.0652, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9508716323296355, |
|
"grad_norm": 0.5376480221748352, |
|
"learning_rate": 9.508716323296356e-05, |
|
"loss": 0.0665, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9667194928684627, |
|
"grad_norm": 0.3735605776309967, |
|
"learning_rate": 9.667194928684627e-05, |
|
"loss": 0.0663, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9825673534072901, |
|
"grad_norm": 0.4635787606239319, |
|
"learning_rate": 9.825673534072901e-05, |
|
"loss": 0.0641, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9984152139461173, |
|
"grad_norm": 0.7257930040359497, |
|
"learning_rate": 9.984152139461174e-05, |
|
"loss": 0.0689, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0142630744849446, |
|
"grad_norm": 0.4881118834018707, |
|
"learning_rate": 9.999986095395153e-05, |
|
"loss": 0.0612, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0301109350237718, |
|
"grad_norm": 0.4648587107658386, |
|
"learning_rate": 9.99993803019373e-05, |
|
"loss": 0.0638, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.045958795562599, |
|
"grad_norm": 0.47810834646224976, |
|
"learning_rate": 9.999855633063904e-05, |
|
"loss": 0.0594, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0618066561014263, |
|
"grad_norm": 0.5248630046844482, |
|
"learning_rate": 9.999738904571453e-05, |
|
"loss": 0.0613, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0776545166402536, |
|
"grad_norm": 0.5785868167877197, |
|
"learning_rate": 9.999587845517889e-05, |
|
"loss": 0.0658, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0935023771790808, |
|
"grad_norm": 0.5674669742584229, |
|
"learning_rate": 9.999402456940454e-05, |
|
"loss": 0.0584, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.109350237717908, |
|
"grad_norm": 0.5009925961494446, |
|
"learning_rate": 9.999182740112115e-05, |
|
"loss": 0.061, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1251980982567353, |
|
"grad_norm": 0.4827341139316559, |
|
"learning_rate": 9.99892869654155e-05, |
|
"loss": 0.0668, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1410459587955626, |
|
"grad_norm": 0.5092630982398987, |
|
"learning_rate": 9.998640327973141e-05, |
|
"loss": 0.0644, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1568938193343898, |
|
"grad_norm": 0.5155372619628906, |
|
"learning_rate": 9.998317636386964e-05, |
|
"loss": 0.0629, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.172741679873217, |
|
"grad_norm": 0.49632275104522705, |
|
"learning_rate": 9.997960623998772e-05, |
|
"loss": 0.0617, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.1885895404120443, |
|
"grad_norm": 0.44003790616989136, |
|
"learning_rate": 9.997569293259977e-05, |
|
"loss": 0.0492, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2044374009508716, |
|
"grad_norm": 0.5962253212928772, |
|
"learning_rate": 9.997143646857643e-05, |
|
"loss": 0.0606, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2202852614896988, |
|
"grad_norm": 0.5411946177482605, |
|
"learning_rate": 9.996683687714457e-05, |
|
"loss": 0.0613, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.236133122028526, |
|
"grad_norm": 0.38321027159690857, |
|
"learning_rate": 9.996189418988715e-05, |
|
"loss": 0.0524, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2519809825673534, |
|
"grad_norm": 0.41845589876174927, |
|
"learning_rate": 9.9956608440743e-05, |
|
"loss": 0.0703, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2678288431061806, |
|
"grad_norm": 0.5127869844436646, |
|
"learning_rate": 9.995097966600655e-05, |
|
"loss": 0.0644, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2836767036450079, |
|
"grad_norm": 0.46290555596351624, |
|
"learning_rate": 9.994500790432762e-05, |
|
"loss": 0.0645, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.299524564183835, |
|
"grad_norm": 0.4053901731967926, |
|
"learning_rate": 9.993869319671114e-05, |
|
"loss": 0.0595, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3153724247226624, |
|
"grad_norm": 0.5436721444129944, |
|
"learning_rate": 9.993203558651687e-05, |
|
"loss": 0.0549, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3312202852614896, |
|
"grad_norm": 0.47135990858078003, |
|
"learning_rate": 9.992503511945907e-05, |
|
"loss": 0.0716, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3470681458003169, |
|
"grad_norm": 0.5177005529403687, |
|
"learning_rate": 9.991769184360629e-05, |
|
"loss": 0.0616, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3629160063391441, |
|
"grad_norm": 0.49199798703193665, |
|
"learning_rate": 9.991000580938087e-05, |
|
"loss": 0.0559, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.3787638668779714, |
|
"grad_norm": 0.4700946509838104, |
|
"learning_rate": 9.990197706955877e-05, |
|
"loss": 0.0534, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.3946117274167986, |
|
"grad_norm": 0.49670886993408203, |
|
"learning_rate": 9.98936056792691e-05, |
|
"loss": 0.0494, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4104595879556259, |
|
"grad_norm": 0.42340198159217834, |
|
"learning_rate": 9.988489169599373e-05, |
|
"loss": 0.0577, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4263074484944531, |
|
"grad_norm": 0.395857036113739, |
|
"learning_rate": 9.987583517956703e-05, |
|
"loss": 0.0465, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4421553090332804, |
|
"grad_norm": 0.4153169095516205, |
|
"learning_rate": 9.986643619217524e-05, |
|
"loss": 0.0497, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4580031695721076, |
|
"grad_norm": 0.36922216415405273, |
|
"learning_rate": 9.985669479835629e-05, |
|
"loss": 0.055, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.473851030110935, |
|
"grad_norm": 0.43492555618286133, |
|
"learning_rate": 9.984661106499913e-05, |
|
"loss": 0.0459, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.4896988906497624, |
|
"grad_norm": 0.4480608403682709, |
|
"learning_rate": 9.983618506134344e-05, |
|
"loss": 0.0567, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5055467511885894, |
|
"grad_norm": 0.4628249704837799, |
|
"learning_rate": 9.98254168589791e-05, |
|
"loss": 0.0595, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5213946117274166, |
|
"grad_norm": 0.2762962877750397, |
|
"learning_rate": 9.98143065318456e-05, |
|
"loss": 0.055, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.537242472266244, |
|
"grad_norm": 0.4730890095233917, |
|
"learning_rate": 9.980285415623172e-05, |
|
"loss": 0.0571, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5530903328050714, |
|
"grad_norm": 0.5532105565071106, |
|
"learning_rate": 9.979105981077483e-05, |
|
"loss": 0.05, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.5689381933438986, |
|
"grad_norm": 0.3818027675151825, |
|
"learning_rate": 9.977892357646049e-05, |
|
"loss": 0.0564, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.5847860538827259, |
|
"grad_norm": 0.4600647985935211, |
|
"learning_rate": 9.976644553662178e-05, |
|
"loss": 0.0517, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6006339144215531, |
|
"grad_norm": 0.39071592688560486, |
|
"learning_rate": 9.975362577693879e-05, |
|
"loss": 0.0583, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6164817749603804, |
|
"grad_norm": 0.5415641069412231, |
|
"learning_rate": 9.974046438543805e-05, |
|
"loss": 0.0549, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6323296354992076, |
|
"grad_norm": 0.4827348291873932, |
|
"learning_rate": 9.972696145249185e-05, |
|
"loss": 0.0537, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6481774960380349, |
|
"grad_norm": 0.4411364495754242, |
|
"learning_rate": 9.971311707081769e-05, |
|
"loss": 0.0474, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6640253565768621, |
|
"grad_norm": 0.46820345520973206, |
|
"learning_rate": 9.96989313354776e-05, |
|
"loss": 0.0491, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6798732171156894, |
|
"grad_norm": 0.38286513090133667, |
|
"learning_rate": 9.968440434387756e-05, |
|
"loss": 0.0536, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.6957210776545166, |
|
"grad_norm": 0.45724204182624817, |
|
"learning_rate": 9.966953619576667e-05, |
|
"loss": 0.0543, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7115689381933439, |
|
"grad_norm": 0.4794733226299286, |
|
"learning_rate": 9.965432699323669e-05, |
|
"loss": 0.0456, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7274167987321711, |
|
"grad_norm": 0.4839860498905182, |
|
"learning_rate": 9.963877684072113e-05, |
|
"loss": 0.0558, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7432646592709984, |
|
"grad_norm": 0.4146821200847626, |
|
"learning_rate": 9.962288584499466e-05, |
|
"loss": 0.0492, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7591125198098256, |
|
"grad_norm": 0.4119221568107605, |
|
"learning_rate": 9.960665411517235e-05, |
|
"loss": 0.045, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.7749603803486529, |
|
"grad_norm": 0.5363435745239258, |
|
"learning_rate": 9.959008176270892e-05, |
|
"loss": 0.0508, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.7908082408874801, |
|
"grad_norm": 1.4820852279663086, |
|
"learning_rate": 9.957316890139792e-05, |
|
"loss": 0.0546, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8066561014263076, |
|
"grad_norm": 1.0511815547943115, |
|
"learning_rate": 9.955591564737099e-05, |
|
"loss": 0.0624, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8225039619651349, |
|
"grad_norm": 1.2796475887298584, |
|
"learning_rate": 9.953832211909715e-05, |
|
"loss": 0.0498, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8383518225039621, |
|
"grad_norm": 0.44021984934806824, |
|
"learning_rate": 9.952038843738181e-05, |
|
"loss": 0.0545, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.8541996830427894, |
|
"grad_norm": 0.3289697468280792, |
|
"learning_rate": 9.950211472536609e-05, |
|
"loss": 0.0525, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.8700475435816166, |
|
"grad_norm": 0.33451518416404724, |
|
"learning_rate": 9.948350110852587e-05, |
|
"loss": 0.0532, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.8858954041204439, |
|
"grad_norm": 0.28288567066192627, |
|
"learning_rate": 9.946454771467104e-05, |
|
"loss": 0.0554, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9017432646592711, |
|
"grad_norm": 0.43782156705856323, |
|
"learning_rate": 9.944525467394452e-05, |
|
"loss": 0.0464, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9175911251980984, |
|
"grad_norm": 0.43314260244369507, |
|
"learning_rate": 9.94256221188214e-05, |
|
"loss": 0.0457, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9334389857369256, |
|
"grad_norm": 0.3398579955101013, |
|
"learning_rate": 9.940565018410805e-05, |
|
"loss": 0.0479, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.9492868462757529, |
|
"grad_norm": 0.4561791718006134, |
|
"learning_rate": 9.938533900694118e-05, |
|
"loss": 0.0524, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.9651347068145801, |
|
"grad_norm": 0.4242919981479645, |
|
"learning_rate": 9.93646887267869e-05, |
|
"loss": 0.0472, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.9809825673534074, |
|
"grad_norm": 0.4337559938430786, |
|
"learning_rate": 9.934369948543972e-05, |
|
"loss": 0.0483, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.9968304278922346, |
|
"grad_norm": 0.3361351490020752, |
|
"learning_rate": 9.93223714270217e-05, |
|
"loss": 0.0584, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.012678288431062, |
|
"grad_norm": 0.38473933935165405, |
|
"learning_rate": 9.93007046979813e-05, |
|
"loss": 0.0464, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.028526148969889, |
|
"grad_norm": 0.4139537811279297, |
|
"learning_rate": 9.92786994470925e-05, |
|
"loss": 0.0482, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0443740095087164, |
|
"grad_norm": 0.4201923906803131, |
|
"learning_rate": 9.92563558254537e-05, |
|
"loss": 0.0385, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.0602218700475436, |
|
"grad_norm": 0.36632242798805237, |
|
"learning_rate": 9.923367398648671e-05, |
|
"loss": 0.0482, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.076069730586371, |
|
"grad_norm": 0.5546051859855652, |
|
"learning_rate": 9.921065408593574e-05, |
|
"loss": 0.0522, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.091917591125198, |
|
"grad_norm": 0.34690526127815247, |
|
"learning_rate": 9.918729628186628e-05, |
|
"loss": 0.0493, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.1077654516640254, |
|
"grad_norm": 0.32220637798309326, |
|
"learning_rate": 9.916360073466397e-05, |
|
"loss": 0.0445, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.1236133122028527, |
|
"grad_norm": 0.3459847569465637, |
|
"learning_rate": 9.913956760703363e-05, |
|
"loss": 0.0528, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.13946117274168, |
|
"grad_norm": 0.5802285671234131, |
|
"learning_rate": 9.911519706399798e-05, |
|
"loss": 0.0517, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.155309033280507, |
|
"grad_norm": 0.49526548385620117, |
|
"learning_rate": 9.909048927289668e-05, |
|
"loss": 0.0521, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.1711568938193344, |
|
"grad_norm": 0.48161160945892334, |
|
"learning_rate": 9.906544440338504e-05, |
|
"loss": 0.0486, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.1870047543581617, |
|
"grad_norm": 0.45289692282676697, |
|
"learning_rate": 9.904006262743293e-05, |
|
"loss": 0.0541, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.202852614896989, |
|
"grad_norm": 0.2760493755340576, |
|
"learning_rate": 9.901434411932358e-05, |
|
"loss": 0.0488, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.218700475435816, |
|
"grad_norm": 0.3549060523509979, |
|
"learning_rate": 9.898828905565236e-05, |
|
"loss": 0.0428, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.2345483359746434, |
|
"grad_norm": 0.39921554923057556, |
|
"learning_rate": 9.896189761532563e-05, |
|
"loss": 0.046, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.2503961965134707, |
|
"grad_norm": 0.31093716621398926, |
|
"learning_rate": 9.89351699795594e-05, |
|
"loss": 0.0507, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.266244057052298, |
|
"grad_norm": 0.48712223768234253, |
|
"learning_rate": 9.890810633187825e-05, |
|
"loss": 0.0537, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.282091917591125, |
|
"grad_norm": 0.2943997085094452, |
|
"learning_rate": 9.888070685811389e-05, |
|
"loss": 0.0434, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.2979397781299524, |
|
"grad_norm": 0.5522529482841492, |
|
"learning_rate": 9.885297174640401e-05, |
|
"loss": 0.0508, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3137876386687797, |
|
"grad_norm": 0.43696942925453186, |
|
"learning_rate": 9.882490118719095e-05, |
|
"loss": 0.0469, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.329635499207607, |
|
"grad_norm": 0.34890133142471313, |
|
"learning_rate": 9.87964953732204e-05, |
|
"loss": 0.0406, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.345483359746434, |
|
"grad_norm": 0.4267130494117737, |
|
"learning_rate": 9.876775449954003e-05, |
|
"loss": 0.0482, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.3613312202852614, |
|
"grad_norm": 0.44068190455436707, |
|
"learning_rate": 9.873867876349822e-05, |
|
"loss": 0.0509, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.3771790808240887, |
|
"grad_norm": 0.6220930814743042, |
|
"learning_rate": 9.870926836474265e-05, |
|
"loss": 0.0451, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.393026941362916, |
|
"grad_norm": 0.33855652809143066, |
|
"learning_rate": 9.867952350521899e-05, |
|
"loss": 0.0489, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.408874801901743, |
|
"grad_norm": 0.35320836305618286, |
|
"learning_rate": 9.864944438916943e-05, |
|
"loss": 0.0553, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.4247226624405704, |
|
"grad_norm": 0.44654354453086853, |
|
"learning_rate": 9.861903122313132e-05, |
|
"loss": 0.0523, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.4405705229793977, |
|
"grad_norm": 0.41467657685279846, |
|
"learning_rate": 9.858828421593582e-05, |
|
"loss": 0.0457, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.456418383518225, |
|
"grad_norm": 0.4683903157711029, |
|
"learning_rate": 9.855720357870635e-05, |
|
"loss": 0.0451, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.472266244057052, |
|
"grad_norm": 0.3570249378681183, |
|
"learning_rate": 9.852578952485716e-05, |
|
"loss": 0.0496, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.4881141045958794, |
|
"grad_norm": 0.31039777398109436, |
|
"learning_rate": 9.849404227009196e-05, |
|
"loss": 0.047, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5039619651347067, |
|
"grad_norm": 0.35661402344703674, |
|
"learning_rate": 9.846196203240234e-05, |
|
"loss": 0.0451, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.519809825673534, |
|
"grad_norm": 0.2695527970790863, |
|
"learning_rate": 9.842954903206634e-05, |
|
"loss": 0.0405, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.535657686212361, |
|
"grad_norm": 0.2955043613910675, |
|
"learning_rate": 9.839680349164684e-05, |
|
"loss": 0.0378, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.5515055467511885, |
|
"grad_norm": 0.32479894161224365, |
|
"learning_rate": 9.836372563599017e-05, |
|
"loss": 0.0398, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.5673534072900157, |
|
"grad_norm": 0.2761167883872986, |
|
"learning_rate": 9.833031569222443e-05, |
|
"loss": 0.0373, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.583201267828843, |
|
"grad_norm": 0.42928674817085266, |
|
"learning_rate": 9.829657388975803e-05, |
|
"loss": 0.0445, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.59904912836767, |
|
"grad_norm": 0.27622172236442566, |
|
"learning_rate": 9.826250046027809e-05, |
|
"loss": 0.038, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.6148969889064975, |
|
"grad_norm": 0.3467934727668762, |
|
"learning_rate": 9.822809563774881e-05, |
|
"loss": 0.0417, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.6307448494453247, |
|
"grad_norm": 0.3505828380584717, |
|
"learning_rate": 9.81933596584099e-05, |
|
"loss": 0.0424, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.6465927099841524, |
|
"grad_norm": 0.38430240750312805, |
|
"learning_rate": 9.815829276077492e-05, |
|
"loss": 0.0407, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.662440570522979, |
|
"grad_norm": 0.280718058347702, |
|
"learning_rate": 9.812289518562975e-05, |
|
"loss": 0.0415, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.678288431061807, |
|
"grad_norm": 0.3132197856903076, |
|
"learning_rate": 9.808716717603076e-05, |
|
"loss": 0.0467, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.6941362916006337, |
|
"grad_norm": 0.43638864159584045, |
|
"learning_rate": 9.80511089773033e-05, |
|
"loss": 0.0488, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7099841521394614, |
|
"grad_norm": 0.39665859937667847, |
|
"learning_rate": 9.801472083703993e-05, |
|
"loss": 0.043, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.7258320126782882, |
|
"grad_norm": 0.43067046999931335, |
|
"learning_rate": 9.797800300509879e-05, |
|
"loss": 0.044, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.741679873217116, |
|
"grad_norm": 0.4771805703639984, |
|
"learning_rate": 9.794095573360173e-05, |
|
"loss": 0.0428, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.7575277337559427, |
|
"grad_norm": 0.28175461292266846, |
|
"learning_rate": 9.790357927693282e-05, |
|
"loss": 0.0407, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.7733755942947704, |
|
"grad_norm": 0.3774772584438324, |
|
"learning_rate": 9.786587389173639e-05, |
|
"loss": 0.0526, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.7892234548335972, |
|
"grad_norm": 0.38130345940589905, |
|
"learning_rate": 9.782783983691534e-05, |
|
"loss": 0.0397, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.805071315372425, |
|
"grad_norm": 0.3435608744621277, |
|
"learning_rate": 9.778947737362942e-05, |
|
"loss": 0.0421, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.8209191759112517, |
|
"grad_norm": 0.3259428143501282, |
|
"learning_rate": 9.775078676529338e-05, |
|
"loss": 0.0534, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.8367670364500794, |
|
"grad_norm": 0.37528592348098755, |
|
"learning_rate": 9.771176827757512e-05, |
|
"loss": 0.0397, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.8526148969889062, |
|
"grad_norm": 0.22234618663787842, |
|
"learning_rate": 9.767242217839397e-05, |
|
"loss": 0.0425, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.868462757527734, |
|
"grad_norm": 0.366315096616745, |
|
"learning_rate": 9.763274873791874e-05, |
|
"loss": 0.048, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.8843106180665607, |
|
"grad_norm": 0.2822195887565613, |
|
"learning_rate": 9.759274822856598e-05, |
|
"loss": 0.0394, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.9001584786053884, |
|
"grad_norm": 0.2494378387928009, |
|
"learning_rate": 9.7552420924998e-05, |
|
"loss": 0.0525, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.9160063391442153, |
|
"grad_norm": 0.3741839528083801, |
|
"learning_rate": 9.751176710412106e-05, |
|
"loss": 0.0406, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.931854199683043, |
|
"grad_norm": 0.436900794506073, |
|
"learning_rate": 9.747078704508343e-05, |
|
"loss": 0.04, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.94770206022187, |
|
"grad_norm": 0.4009973704814911, |
|
"learning_rate": 9.742948102927351e-05, |
|
"loss": 0.0379, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.9635499207606975, |
|
"grad_norm": 0.34224581718444824, |
|
"learning_rate": 9.738784934031781e-05, |
|
"loss": 0.0383, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.9793977812995247, |
|
"grad_norm": 0.32024386525154114, |
|
"learning_rate": 9.734589226407913e-05, |
|
"loss": 0.0421, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.995245641838352, |
|
"grad_norm": 0.2870291769504547, |
|
"learning_rate": 9.730361008865452e-05, |
|
"loss": 0.038, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.011093502377179, |
|
"grad_norm": 0.30015242099761963, |
|
"learning_rate": 9.726100310437327e-05, |
|
"loss": 0.0427, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.0269413629160065, |
|
"grad_norm": 0.37298670411109924, |
|
"learning_rate": 9.721807160379503e-05, |
|
"loss": 0.0368, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.0427892234548337, |
|
"grad_norm": 0.3384378254413605, |
|
"learning_rate": 9.717481588170765e-05, |
|
"loss": 0.0338, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.058637083993661, |
|
"grad_norm": 0.456691175699234, |
|
"learning_rate": 9.713123623512532e-05, |
|
"loss": 0.0406, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.074484944532488, |
|
"grad_norm": 0.28940680623054504, |
|
"learning_rate": 9.70873329632864e-05, |
|
"loss": 0.039, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.0903328050713155, |
|
"grad_norm": 0.3975684940814972, |
|
"learning_rate": 9.704310636765142e-05, |
|
"loss": 0.0445, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.1061806656101427, |
|
"grad_norm": 0.33805158734321594, |
|
"learning_rate": 9.699855675190099e-05, |
|
"loss": 0.0452, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.12202852614897, |
|
"grad_norm": 0.4213513433933258, |
|
"learning_rate": 9.695368442193378e-05, |
|
"loss": 0.0371, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.1378763866877972, |
|
"grad_norm": 0.3289247751235962, |
|
"learning_rate": 9.69084896858643e-05, |
|
"loss": 0.0345, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.1537242472266245, |
|
"grad_norm": 0.3328181505203247, |
|
"learning_rate": 9.68629728540209e-05, |
|
"loss": 0.0436, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.1695721077654517, |
|
"grad_norm": 0.37982073426246643, |
|
"learning_rate": 9.681713423894359e-05, |
|
"loss": 0.0415, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.185419968304279, |
|
"grad_norm": 0.4652085602283478, |
|
"learning_rate": 9.677097415538186e-05, |
|
"loss": 0.0391, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.2012678288431062, |
|
"grad_norm": 0.44633859395980835, |
|
"learning_rate": 9.672449292029257e-05, |
|
"loss": 0.0375, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.2171156893819335, |
|
"grad_norm": 0.4091266095638275, |
|
"learning_rate": 9.66776908528378e-05, |
|
"loss": 0.042, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.2329635499207607, |
|
"grad_norm": 0.37333956360816956, |
|
"learning_rate": 9.663056827438252e-05, |
|
"loss": 0.0416, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.248811410459588, |
|
"grad_norm": 0.3241555988788605, |
|
"learning_rate": 9.65831255084926e-05, |
|
"loss": 0.0375, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.2646592709984152, |
|
"grad_norm": 0.3356478214263916, |
|
"learning_rate": 9.653536288093237e-05, |
|
"loss": 0.0379, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.2805071315372425, |
|
"grad_norm": 0.3036734163761139, |
|
"learning_rate": 9.648728071966251e-05, |
|
"loss": 0.0401, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.2963549920760697, |
|
"grad_norm": 0.3258291780948639, |
|
"learning_rate": 9.64388793548378e-05, |
|
"loss": 0.0403, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.312202852614897, |
|
"grad_norm": 0.24130631983280182, |
|
"learning_rate": 9.639015911880478e-05, |
|
"loss": 0.0429, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.3280507131537242, |
|
"grad_norm": 0.32036861777305603, |
|
"learning_rate": 9.634112034609955e-05, |
|
"loss": 0.044, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.3438985736925515, |
|
"grad_norm": 0.4092555046081543, |
|
"learning_rate": 9.629176337344538e-05, |
|
"loss": 0.04, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.3597464342313788, |
|
"grad_norm": 0.2772470712661743, |
|
"learning_rate": 9.62420885397505e-05, |
|
"loss": 0.0441, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.375594294770206, |
|
"grad_norm": 0.26047825813293457, |
|
"learning_rate": 9.619209618610569e-05, |
|
"loss": 0.0465, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.3914421553090333, |
|
"grad_norm": 0.3633308410644531, |
|
"learning_rate": 9.614178665578199e-05, |
|
"loss": 0.0366, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.4072900158478605, |
|
"grad_norm": 0.3024093806743622, |
|
"learning_rate": 9.609116029422834e-05, |
|
"loss": 0.0394, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.4231378763866878, |
|
"grad_norm": 0.30647802352905273, |
|
"learning_rate": 9.604021744906915e-05, |
|
"loss": 0.0378, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.438985736925515, |
|
"grad_norm": 0.36456671357154846, |
|
"learning_rate": 9.598895847010198e-05, |
|
"loss": 0.0378, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.4548335974643423, |
|
"grad_norm": 0.28538623452186584, |
|
"learning_rate": 9.593738370929513e-05, |
|
"loss": 0.0367, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.4706814580031695, |
|
"grad_norm": 0.3848700523376465, |
|
"learning_rate": 9.588549352078517e-05, |
|
"loss": 0.0376, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.4865293185419968, |
|
"grad_norm": 0.4077780842781067, |
|
"learning_rate": 9.583328826087456e-05, |
|
"loss": 0.0401, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.502377179080824, |
|
"grad_norm": 0.29360130429267883, |
|
"learning_rate": 9.578076828802922e-05, |
|
"loss": 0.0377, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.5182250396196513, |
|
"grad_norm": 0.23208092153072357, |
|
"learning_rate": 9.572793396287598e-05, |
|
"loss": 0.0456, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.5340729001584785, |
|
"grad_norm": 0.2558813691139221, |
|
"learning_rate": 9.567478564820019e-05, |
|
"loss": 0.032, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.5499207606973058, |
|
"grad_norm": 0.24153469502925873, |
|
"learning_rate": 9.562132370894321e-05, |
|
"loss": 0.0374, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.565768621236133, |
|
"grad_norm": 0.3506905436515808, |
|
"learning_rate": 9.55675485121999e-05, |
|
"loss": 0.0455, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.5816164817749603, |
|
"grad_norm": 0.3719404637813568, |
|
"learning_rate": 9.551346042721604e-05, |
|
"loss": 0.042, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.5974643423137875, |
|
"grad_norm": 0.27092769742012024, |
|
"learning_rate": 9.545905982538592e-05, |
|
"loss": 0.0351, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.613312202852615, |
|
"grad_norm": 0.492046982049942, |
|
"learning_rate": 9.540434708024966e-05, |
|
"loss": 0.0414, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.629160063391442, |
|
"grad_norm": 0.3070124387741089, |
|
"learning_rate": 9.534932256749074e-05, |
|
"loss": 0.0396, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.6450079239302693, |
|
"grad_norm": 0.30400729179382324, |
|
"learning_rate": 9.529398666493336e-05, |
|
"loss": 0.0356, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.6608557844690965, |
|
"grad_norm": 0.26050692796707153, |
|
"learning_rate": 9.523833975253988e-05, |
|
"loss": 0.0419, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.676703645007924, |
|
"grad_norm": 0.27897489070892334, |
|
"learning_rate": 9.51823822124082e-05, |
|
"loss": 0.034, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.692551505546751, |
|
"grad_norm": 0.3234636187553406, |
|
"learning_rate": 9.512611442876914e-05, |
|
"loss": 0.0428, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.7083993660855783, |
|
"grad_norm": 0.3589284121990204, |
|
"learning_rate": 9.506953678798378e-05, |
|
"loss": 0.0387, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.7242472266244055, |
|
"grad_norm": 0.4306239187717438, |
|
"learning_rate": 9.501264967854084e-05, |
|
"loss": 0.0474, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.740095087163233, |
|
"grad_norm": 0.22645992040634155, |
|
"learning_rate": 9.495545349105401e-05, |
|
"loss": 0.0385, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.75594294770206, |
|
"grad_norm": 0.3852635622024536, |
|
"learning_rate": 9.489794861825923e-05, |
|
"loss": 0.0345, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.7717908082408877, |
|
"grad_norm": 0.27143415808677673, |
|
"learning_rate": 9.484013545501203e-05, |
|
"loss": 0.0361, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.7876386687797146, |
|
"grad_norm": 0.28437864780426025, |
|
"learning_rate": 9.47820143982848e-05, |
|
"loss": 0.0341, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.8034865293185423, |
|
"grad_norm": 0.2932587265968323, |
|
"learning_rate": 9.472358584716408e-05, |
|
"loss": 0.0462, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.819334389857369, |
|
"grad_norm": 0.4200306236743927, |
|
"learning_rate": 9.466485020284782e-05, |
|
"loss": 0.0421, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.8351822503961968, |
|
"grad_norm": 0.41429468989372253, |
|
"learning_rate": 9.46058078686426e-05, |
|
"loss": 0.0429, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.8510301109350236, |
|
"grad_norm": 0.29475075006484985, |
|
"learning_rate": 9.454645924996087e-05, |
|
"loss": 0.036, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.8668779714738513, |
|
"grad_norm": 0.36007851362228394, |
|
"learning_rate": 9.448680475431819e-05, |
|
"loss": 0.0343, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.882725832012678, |
|
"grad_norm": 0.31012412905693054, |
|
"learning_rate": 9.442684479133044e-05, |
|
"loss": 0.0328, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.8985736925515058, |
|
"grad_norm": 0.3186264932155609, |
|
"learning_rate": 9.436657977271093e-05, |
|
"loss": 0.0357, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.9144215530903326, |
|
"grad_norm": 0.289347380399704, |
|
"learning_rate": 9.430601011226763e-05, |
|
"loss": 0.0322, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.9302694136291603, |
|
"grad_norm": 0.8456028699874878, |
|
"learning_rate": 9.424513622590038e-05, |
|
"loss": 0.0368, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.946117274167987, |
|
"grad_norm": 0.2324322909116745, |
|
"learning_rate": 9.418395853159793e-05, |
|
"loss": 0.0334, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.9619651347068148, |
|
"grad_norm": 0.24869747459888458, |
|
"learning_rate": 9.412247744943512e-05, |
|
"loss": 0.0333, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.9778129952456416, |
|
"grad_norm": 0.31198471784591675, |
|
"learning_rate": 9.406069340157003e-05, |
|
"loss": 0.0335, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.9936608557844693, |
|
"grad_norm": 0.3861044645309448, |
|
"learning_rate": 9.399860681224098e-05, |
|
"loss": 0.0363, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 4.009508716323296, |
|
"grad_norm": 0.21961505711078644, |
|
"learning_rate": 9.393621810776376e-05, |
|
"loss": 0.0353, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.025356576862124, |
|
"grad_norm": 0.28296446800231934, |
|
"learning_rate": 9.387352771652856e-05, |
|
"loss": 0.0438, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 4.041204437400951, |
|
"grad_norm": 0.48964765667915344, |
|
"learning_rate": 9.381053606899713e-05, |
|
"loss": 0.0346, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.057052297939778, |
|
"grad_norm": 0.38259637355804443, |
|
"learning_rate": 9.374724359769979e-05, |
|
"loss": 0.0342, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 4.072900158478605, |
|
"grad_norm": 0.29834380745887756, |
|
"learning_rate": 9.368365073723241e-05, |
|
"loss": 0.031, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 4.088748019017433, |
|
"grad_norm": 0.41095930337905884, |
|
"learning_rate": 9.361975792425356e-05, |
|
"loss": 0.0344, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.10459587955626, |
|
"grad_norm": 0.21189731359481812, |
|
"learning_rate": 9.355556559748133e-05, |
|
"loss": 0.0367, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 4.120443740095087, |
|
"grad_norm": 0.2741738557815552, |
|
"learning_rate": 9.349107419769048e-05, |
|
"loss": 0.0366, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.136291600633914, |
|
"grad_norm": 0.28367432951927185, |
|
"learning_rate": 9.342628416770928e-05, |
|
"loss": 0.0301, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 4.152139461172742, |
|
"grad_norm": 0.29805341362953186, |
|
"learning_rate": 9.336119595241665e-05, |
|
"loss": 0.0332, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 4.167987321711569, |
|
"grad_norm": 0.340262770652771, |
|
"learning_rate": 9.329580999873887e-05, |
|
"loss": 0.0332, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 4.183835182250396, |
|
"grad_norm": 0.2894122302532196, |
|
"learning_rate": 9.323012675564668e-05, |
|
"loss": 0.0333, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.199683042789223, |
|
"grad_norm": 0.2781189978122711, |
|
"learning_rate": 9.316414667415216e-05, |
|
"loss": 0.0348, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.215530903328051, |
|
"grad_norm": 0.321756511926651, |
|
"learning_rate": 9.309787020730562e-05, |
|
"loss": 0.0303, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.231378763866878, |
|
"grad_norm": 0.275852233171463, |
|
"learning_rate": 9.303129781019249e-05, |
|
"loss": 0.0407, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.247226624405705, |
|
"grad_norm": 0.44196420907974243, |
|
"learning_rate": 9.296442993993015e-05, |
|
"loss": 0.0395, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.263074484944532, |
|
"grad_norm": 0.2846081852912903, |
|
"learning_rate": 9.289726705566491e-05, |
|
"loss": 0.0344, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.27892234548336, |
|
"grad_norm": 0.31943535804748535, |
|
"learning_rate": 9.282980961856875e-05, |
|
"loss": 0.0388, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.294770206022187, |
|
"grad_norm": 0.4001297354698181, |
|
"learning_rate": 9.276205809183618e-05, |
|
"loss": 0.0366, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.310618066561014, |
|
"grad_norm": 0.2874903976917267, |
|
"learning_rate": 9.26940129406811e-05, |
|
"loss": 0.0302, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.326465927099841, |
|
"grad_norm": 0.3430187404155731, |
|
"learning_rate": 9.262567463233352e-05, |
|
"loss": 0.0368, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.342313787638669, |
|
"grad_norm": 0.3248710632324219, |
|
"learning_rate": 9.255704363603645e-05, |
|
"loss": 0.0337, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.358161648177496, |
|
"grad_norm": 0.3309313654899597, |
|
"learning_rate": 9.248812042304263e-05, |
|
"loss": 0.0328, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.374009508716323, |
|
"grad_norm": 0.2918064594268799, |
|
"learning_rate": 9.24189054666113e-05, |
|
"loss": 0.0394, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.38985736925515, |
|
"grad_norm": 0.35082730650901794, |
|
"learning_rate": 9.23493992420049e-05, |
|
"loss": 0.0406, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.405705229793978, |
|
"grad_norm": 0.32973727583885193, |
|
"learning_rate": 9.227960222648593e-05, |
|
"loss": 0.034, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.4215530903328055, |
|
"grad_norm": 0.23779386281967163, |
|
"learning_rate": 9.220951489931352e-05, |
|
"loss": 0.0371, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.437400950871632, |
|
"grad_norm": 0.2471320629119873, |
|
"learning_rate": 9.213913774174028e-05, |
|
"loss": 0.0317, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.453248811410459, |
|
"grad_norm": 0.3636610805988312, |
|
"learning_rate": 9.20684712370089e-05, |
|
"loss": 0.0356, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.469096671949287, |
|
"grad_norm": 0.18174231052398682, |
|
"learning_rate": 9.199751587034887e-05, |
|
"loss": 0.0258, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.4849445324881145, |
|
"grad_norm": 0.20908503234386444, |
|
"learning_rate": 9.192627212897315e-05, |
|
"loss": 0.0368, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.500792393026941, |
|
"grad_norm": 0.27427220344543457, |
|
"learning_rate": 9.185474050207478e-05, |
|
"loss": 0.0382, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.516640253565768, |
|
"grad_norm": 0.35455378890037537, |
|
"learning_rate": 9.178292148082362e-05, |
|
"loss": 0.0338, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.532488114104596, |
|
"grad_norm": 0.3077165484428406, |
|
"learning_rate": 9.171081555836287e-05, |
|
"loss": 0.032, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.5483359746434235, |
|
"grad_norm": 0.29954010248184204, |
|
"learning_rate": 9.163842322980573e-05, |
|
"loss": 0.0363, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.56418383518225, |
|
"grad_norm": 0.23956748843193054, |
|
"learning_rate": 9.156574499223202e-05, |
|
"loss": 0.0319, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.580031695721077, |
|
"grad_norm": 0.24991659820079803, |
|
"learning_rate": 9.149278134468472e-05, |
|
"loss": 0.0351, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.595879556259905, |
|
"grad_norm": 0.35879701375961304, |
|
"learning_rate": 9.141953278816661e-05, |
|
"loss": 0.0364, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.6117274167987325, |
|
"grad_norm": 0.2529746890068054, |
|
"learning_rate": 9.134599982563674e-05, |
|
"loss": 0.0357, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.627575277337559, |
|
"grad_norm": 0.23599006235599518, |
|
"learning_rate": 9.127218296200705e-05, |
|
"loss": 0.0363, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.643423137876387, |
|
"grad_norm": 0.3693040907382965, |
|
"learning_rate": 9.119808270413891e-05, |
|
"loss": 0.036, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.659270998415214, |
|
"grad_norm": 0.37512966990470886, |
|
"learning_rate": 9.112369956083953e-05, |
|
"loss": 0.0379, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.675118858954042, |
|
"grad_norm": 0.35540756583213806, |
|
"learning_rate": 9.104903404285862e-05, |
|
"loss": 0.0305, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.690966719492868, |
|
"grad_norm": 0.4176557660102844, |
|
"learning_rate": 9.097408666288475e-05, |
|
"loss": 0.0355, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.706814580031696, |
|
"grad_norm": 0.28811272978782654, |
|
"learning_rate": 9.089885793554195e-05, |
|
"loss": 0.0376, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.722662440570523, |
|
"grad_norm": 0.3358956575393677, |
|
"learning_rate": 9.082334837738607e-05, |
|
"loss": 0.0368, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.738510301109351, |
|
"grad_norm": 0.3090055584907532, |
|
"learning_rate": 9.074755850690127e-05, |
|
"loss": 0.0326, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.754358161648177, |
|
"grad_norm": 0.24217335879802704, |
|
"learning_rate": 9.067148884449647e-05, |
|
"loss": 0.0271, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.770206022187005, |
|
"grad_norm": 0.361965149641037, |
|
"learning_rate": 9.059513991250181e-05, |
|
"loss": 0.0361, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.786053882725832, |
|
"grad_norm": 0.36846402287483215, |
|
"learning_rate": 9.051851223516501e-05, |
|
"loss": 0.0381, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.80190174326466, |
|
"grad_norm": 0.3030705451965332, |
|
"learning_rate": 9.044160633864776e-05, |
|
"loss": 0.0363, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.817749603803486, |
|
"grad_norm": 0.40651705861091614, |
|
"learning_rate": 9.036442275102213e-05, |
|
"loss": 0.0305, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.833597464342314, |
|
"grad_norm": 0.2696928381919861, |
|
"learning_rate": 9.0286962002267e-05, |
|
"loss": 0.0386, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.849445324881141, |
|
"grad_norm": 0.3362119197845459, |
|
"learning_rate": 9.020922462426433e-05, |
|
"loss": 0.0318, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.865293185419969, |
|
"grad_norm": 0.21661606431007385, |
|
"learning_rate": 9.013121115079557e-05, |
|
"loss": 0.0338, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.881141045958795, |
|
"grad_norm": 0.2977627217769623, |
|
"learning_rate": 9.005292211753792e-05, |
|
"loss": 0.0323, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.896988906497623, |
|
"grad_norm": 0.3265908658504486, |
|
"learning_rate": 8.997435806206078e-05, |
|
"loss": 0.032, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 4.91283676703645, |
|
"grad_norm": 0.45224496722221375, |
|
"learning_rate": 8.989551952382192e-05, |
|
"loss": 0.0347, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.928684627575278, |
|
"grad_norm": 0.3116205930709839, |
|
"learning_rate": 8.981640704416385e-05, |
|
"loss": 0.0278, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 4.944532488114104, |
|
"grad_norm": 0.38788729906082153, |
|
"learning_rate": 8.97370211663101e-05, |
|
"loss": 0.0356, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.960380348652932, |
|
"grad_norm": 0.3053205609321594, |
|
"learning_rate": 8.965736243536152e-05, |
|
"loss": 0.0298, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.976228209191759, |
|
"grad_norm": 0.3261253535747528, |
|
"learning_rate": 8.957743139829243e-05, |
|
"loss": 0.038, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.992076069730587, |
|
"grad_norm": 0.3000582158565521, |
|
"learning_rate": 8.949722860394693e-05, |
|
"loss": 0.0485, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 5.007923930269413, |
|
"grad_norm": 0.3081798553466797, |
|
"learning_rate": 8.941675460303522e-05, |
|
"loss": 0.0401, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 5.023771790808241, |
|
"grad_norm": 0.29715317487716675, |
|
"learning_rate": 8.933600994812965e-05, |
|
"loss": 0.0314, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 5.039619651347068, |
|
"grad_norm": 0.20959503948688507, |
|
"learning_rate": 8.925499519366102e-05, |
|
"loss": 0.0344, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 5.055467511885896, |
|
"grad_norm": 0.34640997648239136, |
|
"learning_rate": 8.917371089591482e-05, |
|
"loss": 0.0324, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 5.071315372424722, |
|
"grad_norm": 0.29564642906188965, |
|
"learning_rate": 8.909215761302728e-05, |
|
"loss": 0.0404, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.08716323296355, |
|
"grad_norm": 0.29282501339912415, |
|
"learning_rate": 8.90103359049816e-05, |
|
"loss": 0.0317, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 5.103011093502377, |
|
"grad_norm": 0.3910326063632965, |
|
"learning_rate": 8.892824633360419e-05, |
|
"loss": 0.0297, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 5.118858954041205, |
|
"grad_norm": 0.30237722396850586, |
|
"learning_rate": 8.884588946256069e-05, |
|
"loss": 0.0372, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 5.134706814580031, |
|
"grad_norm": 0.3003133535385132, |
|
"learning_rate": 8.876326585735213e-05, |
|
"loss": 0.0332, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 5.150554675118859, |
|
"grad_norm": 0.2812441885471344, |
|
"learning_rate": 8.868037608531108e-05, |
|
"loss": 0.0315, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 5.166402535657686, |
|
"grad_norm": 0.2651035785675049, |
|
"learning_rate": 8.859722071559777e-05, |
|
"loss": 0.0292, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.182250396196514, |
|
"grad_norm": 0.31288737058639526, |
|
"learning_rate": 8.85138003191961e-05, |
|
"loss": 0.0294, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 5.19809825673534, |
|
"grad_norm": 0.2833364009857178, |
|
"learning_rate": 8.843011546890978e-05, |
|
"loss": 0.0331, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 5.213946117274168, |
|
"grad_norm": 0.25718948245048523, |
|
"learning_rate": 8.834616673935839e-05, |
|
"loss": 0.0281, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 5.229793977812995, |
|
"grad_norm": 0.28992629051208496, |
|
"learning_rate": 8.82619547069734e-05, |
|
"loss": 0.034, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.245641838351823, |
|
"grad_norm": 0.2499540150165558, |
|
"learning_rate": 8.817747994999432e-05, |
|
"loss": 0.027, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 5.261489698890649, |
|
"grad_norm": 0.25445619225502014, |
|
"learning_rate": 8.80927430484646e-05, |
|
"loss": 0.0316, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 5.277337559429477, |
|
"grad_norm": 0.28179076313972473, |
|
"learning_rate": 8.800774458422765e-05, |
|
"loss": 0.035, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 5.293185419968304, |
|
"grad_norm": 0.30823758244514465, |
|
"learning_rate": 8.792248514092299e-05, |
|
"loss": 0.0259, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.309033280507132, |
|
"grad_norm": 0.3379741311073303, |
|
"learning_rate": 8.783696530398207e-05, |
|
"loss": 0.033, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.324881141045958, |
|
"grad_norm": 0.29917508363723755, |
|
"learning_rate": 8.775118566062435e-05, |
|
"loss": 0.0278, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 5.340729001584786, |
|
"grad_norm": 0.15989099442958832, |
|
"learning_rate": 8.766514679985325e-05, |
|
"loss": 0.0315, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 5.356576862123613, |
|
"grad_norm": 0.2137162983417511, |
|
"learning_rate": 8.757884931245211e-05, |
|
"loss": 0.0333, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 5.372424722662441, |
|
"grad_norm": 0.30674856901168823, |
|
"learning_rate": 8.749229379098008e-05, |
|
"loss": 0.0308, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 5.3882725832012675, |
|
"grad_norm": 0.23785285651683807, |
|
"learning_rate": 8.740548082976814e-05, |
|
"loss": 0.0278, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.404120443740095, |
|
"grad_norm": 0.25887709856033325, |
|
"learning_rate": 8.731841102491494e-05, |
|
"loss": 0.0283, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 5.419968304278922, |
|
"grad_norm": 0.3679006099700928, |
|
"learning_rate": 8.723108497428276e-05, |
|
"loss": 0.0273, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 5.43581616481775, |
|
"grad_norm": 0.40523847937583923, |
|
"learning_rate": 8.714350327749337e-05, |
|
"loss": 0.0319, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 5.4516640253565765, |
|
"grad_norm": 0.2975967228412628, |
|
"learning_rate": 8.705566653592393e-05, |
|
"loss": 0.0382, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 5.467511885895404, |
|
"grad_norm": 0.27645203471183777, |
|
"learning_rate": 8.696757535270285e-05, |
|
"loss": 0.0413, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.483359746434231, |
|
"grad_norm": 0.23291446268558502, |
|
"learning_rate": 8.68792303327057e-05, |
|
"loss": 0.0306, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 5.499207606973059, |
|
"grad_norm": 0.34922730922698975, |
|
"learning_rate": 8.679063208255095e-05, |
|
"loss": 0.0299, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 5.5150554675118855, |
|
"grad_norm": 0.2651195228099823, |
|
"learning_rate": 8.67017812105959e-05, |
|
"loss": 0.0279, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 5.530903328050713, |
|
"grad_norm": 0.23726455867290497, |
|
"learning_rate": 8.661267832693247e-05, |
|
"loss": 0.0311, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 5.546751188589541, |
|
"grad_norm": 0.22650249302387238, |
|
"learning_rate": 8.6523324043383e-05, |
|
"loss": 0.0319, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.562599049128368, |
|
"grad_norm": 0.275462806224823, |
|
"learning_rate": 8.643371897349609e-05, |
|
"loss": 0.0328, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 5.5784469096671945, |
|
"grad_norm": 0.30848929286003113, |
|
"learning_rate": 8.63438637325423e-05, |
|
"loss": 0.0353, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 5.594294770206022, |
|
"grad_norm": 0.22483864426612854, |
|
"learning_rate": 8.625375893751005e-05, |
|
"loss": 0.0291, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 5.61014263074485, |
|
"grad_norm": 0.2007935345172882, |
|
"learning_rate": 8.616340520710124e-05, |
|
"loss": 0.0287, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 5.625990491283677, |
|
"grad_norm": 0.24104808270931244, |
|
"learning_rate": 8.607280316172717e-05, |
|
"loss": 0.0296, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 5.6418383518225035, |
|
"grad_norm": 0.25262153148651123, |
|
"learning_rate": 8.598195342350413e-05, |
|
"loss": 0.0332, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 5.657686212361331, |
|
"grad_norm": 0.2854628264904022, |
|
"learning_rate": 8.589085661624915e-05, |
|
"loss": 0.0287, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 5.673534072900159, |
|
"grad_norm": 0.27987590432167053, |
|
"learning_rate": 8.579951336547583e-05, |
|
"loss": 0.0358, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 5.689381933438986, |
|
"grad_norm": 0.28694331645965576, |
|
"learning_rate": 8.570792429838994e-05, |
|
"loss": 0.0301, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 5.705229793977813, |
|
"grad_norm": 0.4414514899253845, |
|
"learning_rate": 8.561609004388511e-05, |
|
"loss": 0.0276, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.72107765451664, |
|
"grad_norm": 0.36731958389282227, |
|
"learning_rate": 8.552401123253857e-05, |
|
"loss": 0.0326, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 5.736925515055468, |
|
"grad_norm": 0.3216352164745331, |
|
"learning_rate": 8.543168849660682e-05, |
|
"loss": 0.0351, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 5.752773375594295, |
|
"grad_norm": 0.2965521812438965, |
|
"learning_rate": 8.533912247002116e-05, |
|
"loss": 0.0336, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 5.768621236133122, |
|
"grad_norm": 0.37146931886672974, |
|
"learning_rate": 8.524631378838357e-05, |
|
"loss": 0.041, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 5.784469096671949, |
|
"grad_norm": 0.27054694294929504, |
|
"learning_rate": 8.515326308896213e-05, |
|
"loss": 0.0333, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 5.800316957210777, |
|
"grad_norm": 0.30338549613952637, |
|
"learning_rate": 8.505997101068675e-05, |
|
"loss": 0.0305, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 5.816164817749604, |
|
"grad_norm": 0.2014935314655304, |
|
"learning_rate": 8.496643819414476e-05, |
|
"loss": 0.0292, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 5.832012678288431, |
|
"grad_norm": 0.3620418906211853, |
|
"learning_rate": 8.48726652815765e-05, |
|
"loss": 0.0345, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 5.847860538827258, |
|
"grad_norm": 0.22847791016101837, |
|
"learning_rate": 8.477865291687095e-05, |
|
"loss": 0.038, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 5.863708399366086, |
|
"grad_norm": 0.42736053466796875, |
|
"learning_rate": 8.468440174556127e-05, |
|
"loss": 0.0341, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.879556259904913, |
|
"grad_norm": 0.2668206989765167, |
|
"learning_rate": 8.458991241482036e-05, |
|
"loss": 0.0365, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 5.89540412044374, |
|
"grad_norm": 0.24107444286346436, |
|
"learning_rate": 8.449518557345645e-05, |
|
"loss": 0.033, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 5.911251980982567, |
|
"grad_norm": 0.2556779384613037, |
|
"learning_rate": 8.440022187190864e-05, |
|
"loss": 0.0336, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 5.927099841521395, |
|
"grad_norm": 0.2224377542734146, |
|
"learning_rate": 8.43050219622424e-05, |
|
"loss": 0.0257, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 5.942947702060222, |
|
"grad_norm": 0.247999370098114, |
|
"learning_rate": 8.420958649814513e-05, |
|
"loss": 0.0325, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.958795562599049, |
|
"grad_norm": 0.3033657670021057, |
|
"learning_rate": 8.411391613492165e-05, |
|
"loss": 0.0336, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 5.974643423137876, |
|
"grad_norm": 0.3270326852798462, |
|
"learning_rate": 8.401801152948973e-05, |
|
"loss": 0.0302, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 5.990491283676704, |
|
"grad_norm": 0.23401206731796265, |
|
"learning_rate": 8.392187334037555e-05, |
|
"loss": 0.0308, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 6.006339144215531, |
|
"grad_norm": 0.2145588994026184, |
|
"learning_rate": 8.382550222770915e-05, |
|
"loss": 0.035, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 6.022187004754358, |
|
"grad_norm": 0.27132412791252136, |
|
"learning_rate": 8.372889885321996e-05, |
|
"loss": 0.0313, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.038034865293185, |
|
"grad_norm": 0.21529650688171387, |
|
"learning_rate": 8.363206388023224e-05, |
|
"loss": 0.0297, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 6.053882725832013, |
|
"grad_norm": 0.25313499569892883, |
|
"learning_rate": 8.353499797366051e-05, |
|
"loss": 0.0255, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 6.06973058637084, |
|
"grad_norm": 0.19570957124233246, |
|
"learning_rate": 8.343770180000497e-05, |
|
"loss": 0.0275, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 6.085578446909667, |
|
"grad_norm": 0.24506336450576782, |
|
"learning_rate": 8.334017602734697e-05, |
|
"loss": 0.0268, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 6.101426307448494, |
|
"grad_norm": 0.21346315741539001, |
|
"learning_rate": 8.324242132534435e-05, |
|
"loss": 0.024, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 6.117274167987322, |
|
"grad_norm": 0.3212679624557495, |
|
"learning_rate": 8.314443836522692e-05, |
|
"loss": 0.036, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 6.133122028526149, |
|
"grad_norm": 0.24916702508926392, |
|
"learning_rate": 8.304622781979183e-05, |
|
"loss": 0.0271, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 6.148969889064976, |
|
"grad_norm": 0.30624908208847046, |
|
"learning_rate": 8.294779036339893e-05, |
|
"loss": 0.0318, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 6.164817749603803, |
|
"grad_norm": 0.2676468789577484, |
|
"learning_rate": 8.284912667196612e-05, |
|
"loss": 0.0294, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 6.180665610142631, |
|
"grad_norm": 0.24745798110961914, |
|
"learning_rate": 8.275023742296474e-05, |
|
"loss": 0.0303, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.196513470681458, |
|
"grad_norm": 0.2466627061367035, |
|
"learning_rate": 8.265112329541495e-05, |
|
"loss": 0.0255, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 6.212361331220285, |
|
"grad_norm": 0.3070094883441925, |
|
"learning_rate": 8.255178496988101e-05, |
|
"loss": 0.0284, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 6.228209191759112, |
|
"grad_norm": 0.3049757778644562, |
|
"learning_rate": 8.245222312846663e-05, |
|
"loss": 0.0286, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 6.24405705229794, |
|
"grad_norm": 0.3167661428451538, |
|
"learning_rate": 8.235243845481029e-05, |
|
"loss": 0.0256, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 6.259904912836767, |
|
"grad_norm": 0.2966691851615906, |
|
"learning_rate": 8.225243163408051e-05, |
|
"loss": 0.0332, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 6.2757527733755945, |
|
"grad_norm": 0.29441869258880615, |
|
"learning_rate": 8.215220335297124e-05, |
|
"loss": 0.0279, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 6.291600633914421, |
|
"grad_norm": 0.2598278522491455, |
|
"learning_rate": 8.205175429969701e-05, |
|
"loss": 0.0327, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 6.307448494453249, |
|
"grad_norm": 0.3308967351913452, |
|
"learning_rate": 8.195108516398834e-05, |
|
"loss": 0.0301, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 6.323296354992076, |
|
"grad_norm": 0.2924744486808777, |
|
"learning_rate": 8.185019663708689e-05, |
|
"loss": 0.035, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 6.3391442155309035, |
|
"grad_norm": 0.29859915375709534, |
|
"learning_rate": 8.174908941174078e-05, |
|
"loss": 0.0293, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.35499207606973, |
|
"grad_norm": 0.2642618715763092, |
|
"learning_rate": 8.164776418219982e-05, |
|
"loss": 0.0377, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 6.370839936608558, |
|
"grad_norm": 0.25345122814178467, |
|
"learning_rate": 8.154622164421075e-05, |
|
"loss": 0.0321, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 6.386687797147385, |
|
"grad_norm": 0.27396586537361145, |
|
"learning_rate": 8.144446249501244e-05, |
|
"loss": 0.0362, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 6.4025356576862125, |
|
"grad_norm": 0.23460988700389862, |
|
"learning_rate": 8.13424874333311e-05, |
|
"loss": 0.0288, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 6.418383518225039, |
|
"grad_norm": 0.268079549074173, |
|
"learning_rate": 8.124029715937552e-05, |
|
"loss": 0.0337, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.434231378763867, |
|
"grad_norm": 0.23016807436943054, |
|
"learning_rate": 8.113789237483224e-05, |
|
"loss": 0.0297, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 6.450079239302694, |
|
"grad_norm": 0.21488989889621735, |
|
"learning_rate": 8.103527378286071e-05, |
|
"loss": 0.0226, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 6.4659270998415215, |
|
"grad_norm": 0.3006250262260437, |
|
"learning_rate": 8.093244208808847e-05, |
|
"loss": 0.0323, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 6.481774960380348, |
|
"grad_norm": 0.31131377816200256, |
|
"learning_rate": 8.082939799660641e-05, |
|
"loss": 0.0263, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 6.497622820919176, |
|
"grad_norm": 0.3602330982685089, |
|
"learning_rate": 8.072614221596372e-05, |
|
"loss": 0.0327, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.513470681458003, |
|
"grad_norm": 0.24554632604122162, |
|
"learning_rate": 8.062267545516323e-05, |
|
"loss": 0.0307, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 6.5293185419968305, |
|
"grad_norm": 0.3024232089519501, |
|
"learning_rate": 8.05189984246564e-05, |
|
"loss": 0.031, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 6.545166402535658, |
|
"grad_norm": 0.20746688544750214, |
|
"learning_rate": 8.041511183633855e-05, |
|
"loss": 0.0296, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 6.561014263074485, |
|
"grad_norm": 0.2613235414028168, |
|
"learning_rate": 8.03110164035439e-05, |
|
"loss": 0.0349, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.576862123613312, |
|
"grad_norm": 0.41507190465927124, |
|
"learning_rate": 8.020671284104072e-05, |
|
"loss": 0.0377, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 6.5927099841521395, |
|
"grad_norm": 0.2900952696800232, |
|
"learning_rate": 8.010220186502635e-05, |
|
"loss": 0.0296, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 6.608557844690967, |
|
"grad_norm": 0.26226314902305603, |
|
"learning_rate": 7.999748419312234e-05, |
|
"loss": 0.0289, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 6.624405705229794, |
|
"grad_norm": 0.3070898950099945, |
|
"learning_rate": 7.989256054436956e-05, |
|
"loss": 0.0298, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.640253565768621, |
|
"grad_norm": 0.2827918231487274, |
|
"learning_rate": 7.978743163922316e-05, |
|
"loss": 0.0299, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 6.6561014263074485, |
|
"grad_norm": 0.2928052842617035, |
|
"learning_rate": 7.968209819954768e-05, |
|
"loss": 0.0337, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.671949286846276, |
|
"grad_norm": 0.23168888688087463, |
|
"learning_rate": 7.957656094861214e-05, |
|
"loss": 0.0334, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 6.687797147385103, |
|
"grad_norm": 0.24511629343032837, |
|
"learning_rate": 7.947082061108497e-05, |
|
"loss": 0.0302, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 6.70364500792393, |
|
"grad_norm": 0.24456819891929626, |
|
"learning_rate": 7.93648779130291e-05, |
|
"loss": 0.0311, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 6.7194928684627575, |
|
"grad_norm": 0.26930612325668335, |
|
"learning_rate": 7.925873358189699e-05, |
|
"loss": 0.0291, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 6.735340729001585, |
|
"grad_norm": 0.18482516705989838, |
|
"learning_rate": 7.91523883465256e-05, |
|
"loss": 0.0313, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 6.751188589540412, |
|
"grad_norm": 0.36619842052459717, |
|
"learning_rate": 7.904584293713134e-05, |
|
"loss": 0.0298, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 6.767036450079239, |
|
"grad_norm": 0.28840282559394836, |
|
"learning_rate": 7.893909808530518e-05, |
|
"loss": 0.0318, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 6.7828843106180665, |
|
"grad_norm": 0.2239818572998047, |
|
"learning_rate": 7.883215452400752e-05, |
|
"loss": 0.0295, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 6.798732171156894, |
|
"grad_norm": 0.21004091203212738, |
|
"learning_rate": 7.872501298756319e-05, |
|
"loss": 0.0284, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 6.814580031695721, |
|
"grad_norm": 0.21372993290424347, |
|
"learning_rate": 7.861767421165644e-05, |
|
"loss": 0.031, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.830427892234549, |
|
"grad_norm": 0.20823988318443298, |
|
"learning_rate": 7.851013893332584e-05, |
|
"loss": 0.0275, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 6.8462757527733755, |
|
"grad_norm": 0.24077993631362915, |
|
"learning_rate": 7.84024078909592e-05, |
|
"loss": 0.0267, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 6.862123613312203, |
|
"grad_norm": 0.29702138900756836, |
|
"learning_rate": 7.82944818242886e-05, |
|
"loss": 0.0293, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 6.87797147385103, |
|
"grad_norm": 0.23424126207828522, |
|
"learning_rate": 7.818636147438523e-05, |
|
"loss": 0.0254, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 6.893819334389858, |
|
"grad_norm": 0.28826698660850525, |
|
"learning_rate": 7.807804758365431e-05, |
|
"loss": 0.028, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 6.9096671949286845, |
|
"grad_norm": 0.25839823484420776, |
|
"learning_rate": 7.796954089583e-05, |
|
"loss": 0.0339, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 6.925515055467512, |
|
"grad_norm": 0.25523653626441956, |
|
"learning_rate": 7.786084215597029e-05, |
|
"loss": 0.0283, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 6.941362916006339, |
|
"grad_norm": 0.23376896977424622, |
|
"learning_rate": 7.775195211045193e-05, |
|
"loss": 0.0287, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 6.957210776545167, |
|
"grad_norm": 0.2951514720916748, |
|
"learning_rate": 7.764287150696523e-05, |
|
"loss": 0.0279, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 6.9730586370839935, |
|
"grad_norm": 0.3112223446369171, |
|
"learning_rate": 7.753360109450893e-05, |
|
"loss": 0.0348, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.988906497622821, |
|
"grad_norm": 0.3574570119380951, |
|
"learning_rate": 7.742414162338519e-05, |
|
"loss": 0.0315, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 7.004754358161648, |
|
"grad_norm": 0.25105416774749756, |
|
"learning_rate": 7.73144938451942e-05, |
|
"loss": 0.0259, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 7.020602218700476, |
|
"grad_norm": 0.313162624835968, |
|
"learning_rate": 7.720465851282927e-05, |
|
"loss": 0.0293, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 7.0364500792393025, |
|
"grad_norm": 0.2756791412830353, |
|
"learning_rate": 7.70946363804715e-05, |
|
"loss": 0.032, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 7.05229793977813, |
|
"grad_norm": 0.2672293484210968, |
|
"learning_rate": 7.698442820358463e-05, |
|
"loss": 0.0295, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 7.068145800316957, |
|
"grad_norm": 0.27197128534317017, |
|
"learning_rate": 7.687403473890988e-05, |
|
"loss": 0.0329, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 7.083993660855785, |
|
"grad_norm": 0.3267204761505127, |
|
"learning_rate": 7.676345674446077e-05, |
|
"loss": 0.0336, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 7.0998415213946116, |
|
"grad_norm": 0.3577364683151245, |
|
"learning_rate": 7.665269497951787e-05, |
|
"loss": 0.0253, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 7.115689381933439, |
|
"grad_norm": 0.25939124822616577, |
|
"learning_rate": 7.65417502046236e-05, |
|
"loss": 0.0257, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 7.131537242472266, |
|
"grad_norm": 0.211978480219841, |
|
"learning_rate": 7.6430623181577e-05, |
|
"loss": 0.0276, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.147385103011094, |
|
"grad_norm": 0.22676114737987518, |
|
"learning_rate": 7.631931467342853e-05, |
|
"loss": 0.0264, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 7.163232963549921, |
|
"grad_norm": 0.3186163604259491, |
|
"learning_rate": 7.620782544447483e-05, |
|
"loss": 0.0312, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 7.179080824088748, |
|
"grad_norm": 0.2680210769176483, |
|
"learning_rate": 7.609615626025342e-05, |
|
"loss": 0.0297, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 7.194928684627575, |
|
"grad_norm": 0.25488680601119995, |
|
"learning_rate": 7.598430788753748e-05, |
|
"loss": 0.0309, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 7.210776545166403, |
|
"grad_norm": 0.25716468691825867, |
|
"learning_rate": 7.587228109433061e-05, |
|
"loss": 0.0295, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 7.22662440570523, |
|
"grad_norm": 0.17865824699401855, |
|
"learning_rate": 7.576007664986149e-05, |
|
"loss": 0.0275, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 7.242472266244057, |
|
"grad_norm": 0.25337857007980347, |
|
"learning_rate": 7.56476953245787e-05, |
|
"loss": 0.0309, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 7.258320126782884, |
|
"grad_norm": 0.23190538585186005, |
|
"learning_rate": 7.553513789014531e-05, |
|
"loss": 0.0326, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 7.274167987321712, |
|
"grad_norm": 0.23697835206985474, |
|
"learning_rate": 7.542240511943362e-05, |
|
"loss": 0.0289, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 7.290015847860539, |
|
"grad_norm": 0.19046033918857574, |
|
"learning_rate": 7.530949778651995e-05, |
|
"loss": 0.0272, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.305863708399366, |
|
"grad_norm": 0.2411852329969406, |
|
"learning_rate": 7.519641666667918e-05, |
|
"loss": 0.0281, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 7.321711568938193, |
|
"grad_norm": 0.2323843538761139, |
|
"learning_rate": 7.508316253637951e-05, |
|
"loss": 0.0286, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 7.337559429477021, |
|
"grad_norm": 0.2985825538635254, |
|
"learning_rate": 7.496973617327714e-05, |
|
"loss": 0.027, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 7.353407290015848, |
|
"grad_norm": 0.2772405743598938, |
|
"learning_rate": 7.485613835621088e-05, |
|
"loss": 0.0287, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 7.369255150554675, |
|
"grad_norm": 0.28249087929725647, |
|
"learning_rate": 7.474236986519679e-05, |
|
"loss": 0.029, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 7.385103011093502, |
|
"grad_norm": 0.2735413908958435, |
|
"learning_rate": 7.462843148142292e-05, |
|
"loss": 0.0285, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 7.40095087163233, |
|
"grad_norm": 0.3959973454475403, |
|
"learning_rate": 7.451432398724384e-05, |
|
"loss": 0.0314, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 7.416798732171157, |
|
"grad_norm": 0.23869942128658295, |
|
"learning_rate": 7.440004816617533e-05, |
|
"loss": 0.0302, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 7.432646592709984, |
|
"grad_norm": 0.2646492123603821, |
|
"learning_rate": 7.428560480288896e-05, |
|
"loss": 0.0277, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 7.448494453248811, |
|
"grad_norm": 0.23564158380031586, |
|
"learning_rate": 7.417099468320676e-05, |
|
"loss": 0.0284, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.464342313787639, |
|
"grad_norm": 0.19051893055438995, |
|
"learning_rate": 7.405621859409577e-05, |
|
"loss": 0.031, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 7.480190174326466, |
|
"grad_norm": 0.5017970204353333, |
|
"learning_rate": 7.394127732366264e-05, |
|
"loss": 0.028, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 7.496038034865293, |
|
"grad_norm": 0.24149303138256073, |
|
"learning_rate": 7.382617166114826e-05, |
|
"loss": 0.0263, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 7.51188589540412, |
|
"grad_norm": 0.2918100357055664, |
|
"learning_rate": 7.371090239692228e-05, |
|
"loss": 0.029, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 7.527733755942948, |
|
"grad_norm": 0.41638660430908203, |
|
"learning_rate": 7.359547032247773e-05, |
|
"loss": 0.0279, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 7.543581616481775, |
|
"grad_norm": 0.24228066205978394, |
|
"learning_rate": 7.347987623042561e-05, |
|
"loss": 0.0249, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 7.559429477020602, |
|
"grad_norm": 0.3426589369773865, |
|
"learning_rate": 7.336412091448936e-05, |
|
"loss": 0.0291, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 7.575277337559429, |
|
"grad_norm": 0.381527841091156, |
|
"learning_rate": 7.324820516949946e-05, |
|
"loss": 0.0329, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 7.591125198098257, |
|
"grad_norm": 0.26290562748908997, |
|
"learning_rate": 7.3132129791388e-05, |
|
"loss": 0.0305, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 7.606973058637084, |
|
"grad_norm": 0.28301799297332764, |
|
"learning_rate": 7.301589557718315e-05, |
|
"loss": 0.0224, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.622820919175911, |
|
"grad_norm": 0.33471032977104187, |
|
"learning_rate": 7.28995033250038e-05, |
|
"loss": 0.0356, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 7.638668779714738, |
|
"grad_norm": 0.219041109085083, |
|
"learning_rate": 7.278295383405389e-05, |
|
"loss": 0.0278, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 7.654516640253566, |
|
"grad_norm": 0.27412205934524536, |
|
"learning_rate": 7.266624790461713e-05, |
|
"loss": 0.0271, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.6703645007923935, |
|
"grad_norm": 0.27656254172325134, |
|
"learning_rate": 7.254938633805137e-05, |
|
"loss": 0.0296, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 7.68621236133122, |
|
"grad_norm": 0.23747026920318604, |
|
"learning_rate": 7.243236993678311e-05, |
|
"loss": 0.0217, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 7.702060221870047, |
|
"grad_norm": 0.29850152134895325, |
|
"learning_rate": 7.231519950430212e-05, |
|
"loss": 0.0297, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 7.717908082408875, |
|
"grad_norm": 0.2872811555862427, |
|
"learning_rate": 7.219787584515567e-05, |
|
"loss": 0.0274, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 7.7337559429477025, |
|
"grad_norm": 0.26487553119659424, |
|
"learning_rate": 7.208039976494329e-05, |
|
"loss": 0.0267, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 7.749603803486529, |
|
"grad_norm": 0.32571732997894287, |
|
"learning_rate": 7.196277207031103e-05, |
|
"loss": 0.031, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 7.765451664025356, |
|
"grad_norm": 0.2101273387670517, |
|
"learning_rate": 7.184499356894606e-05, |
|
"loss": 0.0261, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.781299524564184, |
|
"grad_norm": 0.3179239332675934, |
|
"learning_rate": 7.172706506957095e-05, |
|
"loss": 0.0303, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 7.7971473851030115, |
|
"grad_norm": 0.1984127014875412, |
|
"learning_rate": 7.160898738193833e-05, |
|
"loss": 0.0226, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 7.812995245641838, |
|
"grad_norm": 0.19061654806137085, |
|
"learning_rate": 7.149076131682521e-05, |
|
"loss": 0.0219, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 7.828843106180665, |
|
"grad_norm": 0.27196112275123596, |
|
"learning_rate": 7.137238768602739e-05, |
|
"loss": 0.0327, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 7.844690966719493, |
|
"grad_norm": 0.2761131525039673, |
|
"learning_rate": 7.125386730235395e-05, |
|
"loss": 0.0258, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 7.8605388272583205, |
|
"grad_norm": 0.22716206312179565, |
|
"learning_rate": 7.113520097962165e-05, |
|
"loss": 0.0306, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 7.876386687797147, |
|
"grad_norm": 0.278010755777359, |
|
"learning_rate": 7.101638953264933e-05, |
|
"loss": 0.0261, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 7.892234548335974, |
|
"grad_norm": 0.19748617708683014, |
|
"learning_rate": 7.08974337772523e-05, |
|
"loss": 0.0216, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 7.908082408874802, |
|
"grad_norm": 0.35271981358528137, |
|
"learning_rate": 7.077833453023678e-05, |
|
"loss": 0.0236, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 7.9239302694136295, |
|
"grad_norm": 0.33073899149894714, |
|
"learning_rate": 7.065909260939429e-05, |
|
"loss": 0.0274, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.939778129952456, |
|
"grad_norm": 0.36262351274490356, |
|
"learning_rate": 7.053970883349599e-05, |
|
"loss": 0.0229, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 7.955625990491284, |
|
"grad_norm": 0.4560012221336365, |
|
"learning_rate": 7.04201840222871e-05, |
|
"loss": 0.027, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 7.971473851030111, |
|
"grad_norm": 0.3530636727809906, |
|
"learning_rate": 7.03005189964812e-05, |
|
"loss": 0.0307, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 7.9873217115689386, |
|
"grad_norm": 0.2944605052471161, |
|
"learning_rate": 7.018071457775474e-05, |
|
"loss": 0.0254, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 8.003169572107765, |
|
"grad_norm": 0.25718453526496887, |
|
"learning_rate": 7.006077158874124e-05, |
|
"loss": 0.0289, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 8.019017432646592, |
|
"grad_norm": 0.23285925388336182, |
|
"learning_rate": 6.994069085302573e-05, |
|
"loss": 0.0278, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 8.03486529318542, |
|
"grad_norm": 0.2729281485080719, |
|
"learning_rate": 6.98204731951391e-05, |
|
"loss": 0.0259, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 8.050713153724248, |
|
"grad_norm": 0.2978493869304657, |
|
"learning_rate": 6.970011944055234e-05, |
|
"loss": 0.0231, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 8.066561014263074, |
|
"grad_norm": 0.20820550620555878, |
|
"learning_rate": 6.9579630415671e-05, |
|
"loss": 0.0281, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 8.082408874801901, |
|
"grad_norm": 0.23685221374034882, |
|
"learning_rate": 6.945900694782949e-05, |
|
"loss": 0.0251, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.09825673534073, |
|
"grad_norm": 0.25722959637641907, |
|
"learning_rate": 6.933824986528527e-05, |
|
"loss": 0.0302, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 8.114104595879557, |
|
"grad_norm": 0.28215500712394714, |
|
"learning_rate": 6.921735999721338e-05, |
|
"loss": 0.0218, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 8.129952456418383, |
|
"grad_norm": 0.24379587173461914, |
|
"learning_rate": 6.909633817370051e-05, |
|
"loss": 0.0274, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 8.14580031695721, |
|
"grad_norm": 0.295631468296051, |
|
"learning_rate": 6.897518522573951e-05, |
|
"loss": 0.0226, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 8.161648177496039, |
|
"grad_norm": 0.24112898111343384, |
|
"learning_rate": 6.885390198522356e-05, |
|
"loss": 0.027, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 8.177496038034866, |
|
"grad_norm": 0.2933104336261749, |
|
"learning_rate": 6.873248928494046e-05, |
|
"loss": 0.0257, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 8.193343898573692, |
|
"grad_norm": 0.29547762870788574, |
|
"learning_rate": 6.8610947958567e-05, |
|
"loss": 0.0242, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 8.20919175911252, |
|
"grad_norm": 0.27927926182746887, |
|
"learning_rate": 6.848927884066311e-05, |
|
"loss": 0.0257, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 8.225039619651348, |
|
"grad_norm": 0.2721002697944641, |
|
"learning_rate": 6.836748276666627e-05, |
|
"loss": 0.0244, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 8.240887480190175, |
|
"grad_norm": 0.25311270356178284, |
|
"learning_rate": 6.824556057288563e-05, |
|
"loss": 0.0279, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.256735340729001, |
|
"grad_norm": 0.23902995884418488, |
|
"learning_rate": 6.81235130964964e-05, |
|
"loss": 0.0312, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 8.272583201267828, |
|
"grad_norm": 0.30612844228744507, |
|
"learning_rate": 6.8001341175534e-05, |
|
"loss": 0.0357, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 8.288431061806657, |
|
"grad_norm": 0.19130030274391174, |
|
"learning_rate": 6.787904564888837e-05, |
|
"loss": 0.0242, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 8.304278922345484, |
|
"grad_norm": 0.2579098045825958, |
|
"learning_rate": 6.775662735629816e-05, |
|
"loss": 0.0329, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 8.32012678288431, |
|
"grad_norm": 0.3037128150463104, |
|
"learning_rate": 6.763408713834498e-05, |
|
"loss": 0.0262, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 8.335974643423137, |
|
"grad_norm": 0.2066265344619751, |
|
"learning_rate": 6.751142583644767e-05, |
|
"loss": 0.0311, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 8.351822503961966, |
|
"grad_norm": 0.19183726608753204, |
|
"learning_rate": 6.738864429285648e-05, |
|
"loss": 0.0291, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 8.367670364500793, |
|
"grad_norm": 0.2202986180782318, |
|
"learning_rate": 6.72657433506473e-05, |
|
"loss": 0.0224, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 8.38351822503962, |
|
"grad_norm": 0.2542373538017273, |
|
"learning_rate": 6.714272385371585e-05, |
|
"loss": 0.0254, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 8.399366085578446, |
|
"grad_norm": 0.33272790908813477, |
|
"learning_rate": 6.701958664677191e-05, |
|
"loss": 0.0245, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 8.415213946117275, |
|
"grad_norm": 0.25956010818481445, |
|
"learning_rate": 6.68963325753335e-05, |
|
"loss": 0.0255, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 8.431061806656102, |
|
"grad_norm": 0.314311683177948, |
|
"learning_rate": 6.677296248572112e-05, |
|
"loss": 0.0248, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 8.446909667194928, |
|
"grad_norm": 0.28039562702178955, |
|
"learning_rate": 6.664947722505188e-05, |
|
"loss": 0.0282, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 8.462757527733755, |
|
"grad_norm": 0.23970749974250793, |
|
"learning_rate": 6.652587764123373e-05, |
|
"loss": 0.0273, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 8.478605388272584, |
|
"grad_norm": 0.1702006310224533, |
|
"learning_rate": 6.640216458295958e-05, |
|
"loss": 0.0291, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 8.49445324881141, |
|
"grad_norm": 0.13902607560157776, |
|
"learning_rate": 6.627833889970155e-05, |
|
"loss": 0.0241, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 8.510301109350237, |
|
"grad_norm": 0.2187580019235611, |
|
"learning_rate": 6.615440144170502e-05, |
|
"loss": 0.027, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 8.526148969889064, |
|
"grad_norm": 0.2224210649728775, |
|
"learning_rate": 6.603035305998301e-05, |
|
"loss": 0.0235, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 8.541996830427893, |
|
"grad_norm": 0.32996585965156555, |
|
"learning_rate": 6.590619460631005e-05, |
|
"loss": 0.0267, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 8.55784469096672, |
|
"grad_norm": 0.31346139311790466, |
|
"learning_rate": 6.578192693321656e-05, |
|
"loss": 0.0194, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.573692551505546, |
|
"grad_norm": 0.198611781001091, |
|
"learning_rate": 6.565755089398285e-05, |
|
"loss": 0.0256, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 8.589540412044373, |
|
"grad_norm": 0.2415742725133896, |
|
"learning_rate": 6.553306734263342e-05, |
|
"loss": 0.0233, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 8.605388272583202, |
|
"grad_norm": 0.3221810460090637, |
|
"learning_rate": 6.540847713393088e-05, |
|
"loss": 0.025, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 8.621236133122029, |
|
"grad_norm": 0.17353218793869019, |
|
"learning_rate": 6.528378112337031e-05, |
|
"loss": 0.0229, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 8.637083993660855, |
|
"grad_norm": 0.31122300028800964, |
|
"learning_rate": 6.515898016717318e-05, |
|
"loss": 0.0229, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 8.652931854199682, |
|
"grad_norm": 0.27111196517944336, |
|
"learning_rate": 6.50340751222816e-05, |
|
"loss": 0.0329, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 8.66877971473851, |
|
"grad_norm": 0.29258912801742554, |
|
"learning_rate": 6.49090668463525e-05, |
|
"loss": 0.0251, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 8.684627575277338, |
|
"grad_norm": 0.23192371428012848, |
|
"learning_rate": 6.478395619775145e-05, |
|
"loss": 0.0294, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 8.700475435816164, |
|
"grad_norm": 0.31985238194465637, |
|
"learning_rate": 6.465874403554711e-05, |
|
"loss": 0.0242, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 8.716323296354991, |
|
"grad_norm": 0.23439311981201172, |
|
"learning_rate": 6.453343121950513e-05, |
|
"loss": 0.0267, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.73217115689382, |
|
"grad_norm": 0.18457037210464478, |
|
"learning_rate": 6.44080186100823e-05, |
|
"loss": 0.0232, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 8.748019017432647, |
|
"grad_norm": 0.2508156895637512, |
|
"learning_rate": 6.428250706842064e-05, |
|
"loss": 0.0365, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 8.763866877971473, |
|
"grad_norm": 0.2573819160461426, |
|
"learning_rate": 6.415689745634147e-05, |
|
"loss": 0.029, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 8.7797147385103, |
|
"grad_norm": 0.2110164314508438, |
|
"learning_rate": 6.403119063633956e-05, |
|
"loss": 0.0254, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 8.795562599049129, |
|
"grad_norm": 0.3200654089450836, |
|
"learning_rate": 6.390538747157706e-05, |
|
"loss": 0.028, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 8.811410459587956, |
|
"grad_norm": 0.2371603101491928, |
|
"learning_rate": 6.377948882587777e-05, |
|
"loss": 0.0217, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 8.827258320126782, |
|
"grad_norm": 0.2176957130432129, |
|
"learning_rate": 6.365349556372105e-05, |
|
"loss": 0.0319, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 8.843106180665611, |
|
"grad_norm": 0.2418396770954132, |
|
"learning_rate": 6.352740855023594e-05, |
|
"loss": 0.0258, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 8.858954041204438, |
|
"grad_norm": 0.24693243205547333, |
|
"learning_rate": 6.340122865119524e-05, |
|
"loss": 0.0293, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 8.874801901743265, |
|
"grad_norm": 0.249970942735672, |
|
"learning_rate": 6.327495673300957e-05, |
|
"loss": 0.0276, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.890649762282091, |
|
"grad_norm": 0.21087859570980072, |
|
"learning_rate": 6.314859366272132e-05, |
|
"loss": 0.0234, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 8.906497622820918, |
|
"grad_norm": 0.2701822817325592, |
|
"learning_rate": 6.302214030799883e-05, |
|
"loss": 0.022, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 8.922345483359747, |
|
"grad_norm": 0.261089950799942, |
|
"learning_rate": 6.28955975371304e-05, |
|
"loss": 0.0264, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 8.938193343898574, |
|
"grad_norm": 0.3843868672847748, |
|
"learning_rate": 6.276896621901825e-05, |
|
"loss": 0.0272, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 8.9540412044374, |
|
"grad_norm": 0.3247261643409729, |
|
"learning_rate": 6.26422472231726e-05, |
|
"loss": 0.0275, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 8.969889064976229, |
|
"grad_norm": 0.27681615948677063, |
|
"learning_rate": 6.251544141970578e-05, |
|
"loss": 0.0281, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 8.985736925515056, |
|
"grad_norm": 0.255501925945282, |
|
"learning_rate": 6.238854967932612e-05, |
|
"loss": 0.0249, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 9.001584786053883, |
|
"grad_norm": 0.2693521976470947, |
|
"learning_rate": 6.2261572873332e-05, |
|
"loss": 0.0202, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 9.01743264659271, |
|
"grad_norm": 0.21597042679786682, |
|
"learning_rate": 6.213451187360601e-05, |
|
"loss": 0.0238, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 9.033280507131538, |
|
"grad_norm": 0.3910636007785797, |
|
"learning_rate": 6.200736755260877e-05, |
|
"loss": 0.023, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.049128367670365, |
|
"grad_norm": 0.22803229093551636, |
|
"learning_rate": 6.188014078337305e-05, |
|
"loss": 0.0227, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 9.064976228209192, |
|
"grad_norm": 0.22921766340732574, |
|
"learning_rate": 6.175283243949772e-05, |
|
"loss": 0.0225, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 9.080824088748018, |
|
"grad_norm": 0.2634933590888977, |
|
"learning_rate": 6.162544339514183e-05, |
|
"loss": 0.0304, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 9.096671949286847, |
|
"grad_norm": 0.5331051349639893, |
|
"learning_rate": 6.149797452501851e-05, |
|
"loss": 0.0282, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 9.112519809825674, |
|
"grad_norm": 0.2564757466316223, |
|
"learning_rate": 6.137042670438907e-05, |
|
"loss": 0.0262, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 9.1283676703645, |
|
"grad_norm": 0.24122044444084167, |
|
"learning_rate": 6.124280080905685e-05, |
|
"loss": 0.0243, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 9.144215530903328, |
|
"grad_norm": 0.20856255292892456, |
|
"learning_rate": 6.111509771536138e-05, |
|
"loss": 0.0255, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 9.160063391442156, |
|
"grad_norm": 0.39979806542396545, |
|
"learning_rate": 6.098731830017217e-05, |
|
"loss": 0.0281, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 9.175911251980983, |
|
"grad_norm": 0.16420406103134155, |
|
"learning_rate": 6.0859463440882866e-05, |
|
"loss": 0.0217, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 9.19175911251981, |
|
"grad_norm": 0.25281447172164917, |
|
"learning_rate": 6.073153401540512e-05, |
|
"loss": 0.0279, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.207606973058637, |
|
"grad_norm": 0.25699812173843384, |
|
"learning_rate": 6.060353090216261e-05, |
|
"loss": 0.0258, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 9.223454833597465, |
|
"grad_norm": 0.19040873646736145, |
|
"learning_rate": 6.0475454980084945e-05, |
|
"loss": 0.0233, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 9.239302694136292, |
|
"grad_norm": 0.21894507110118866, |
|
"learning_rate": 6.0347307128601716e-05, |
|
"loss": 0.0203, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 9.255150554675119, |
|
"grad_norm": 0.35552018880844116, |
|
"learning_rate": 6.021908822763641e-05, |
|
"loss": 0.0238, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 9.270998415213946, |
|
"grad_norm": 0.328046053647995, |
|
"learning_rate": 6.0090799157600354e-05, |
|
"loss": 0.0249, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 9.286846275752774, |
|
"grad_norm": 0.23552384972572327, |
|
"learning_rate": 5.996244079938671e-05, |
|
"loss": 0.0236, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 9.302694136291601, |
|
"grad_norm": 0.2591778337955475, |
|
"learning_rate": 5.983401403436437e-05, |
|
"loss": 0.0248, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 9.318541996830428, |
|
"grad_norm": 0.16465957462787628, |
|
"learning_rate": 5.970551974437198e-05, |
|
"loss": 0.0208, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 9.334389857369255, |
|
"grad_norm": 0.25457292795181274, |
|
"learning_rate": 5.957695881171184e-05, |
|
"loss": 0.033, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 9.350237717908083, |
|
"grad_norm": 0.19111283123493195, |
|
"learning_rate": 5.944833211914382e-05, |
|
"loss": 0.0318, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 9.36608557844691, |
|
"grad_norm": 0.30721551179885864, |
|
"learning_rate": 5.931964054987935e-05, |
|
"loss": 0.0224, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 9.381933438985737, |
|
"grad_norm": 0.25978097319602966, |
|
"learning_rate": 5.9190884987575336e-05, |
|
"loss": 0.0251, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 9.397781299524564, |
|
"grad_norm": 0.2720729112625122, |
|
"learning_rate": 5.906206631632807e-05, |
|
"loss": 0.025, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 9.413629160063392, |
|
"grad_norm": 0.26405835151672363, |
|
"learning_rate": 5.8933185420667217e-05, |
|
"loss": 0.0266, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 9.429477020602219, |
|
"grad_norm": 0.27683427929878235, |
|
"learning_rate": 5.880424318554967e-05, |
|
"loss": 0.0256, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 9.445324881141046, |
|
"grad_norm": 0.2533441185951233, |
|
"learning_rate": 5.867524049635352e-05, |
|
"loss": 0.0255, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 9.461172741679873, |
|
"grad_norm": 0.3351084589958191, |
|
"learning_rate": 5.854617823887196e-05, |
|
"loss": 0.0257, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 9.477020602218701, |
|
"grad_norm": 0.2585383951663971, |
|
"learning_rate": 5.841705729930721e-05, |
|
"loss": 0.0257, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 9.492868462757528, |
|
"grad_norm": 0.2588648796081543, |
|
"learning_rate": 5.828787856426444e-05, |
|
"loss": 0.0226, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 9.508716323296355, |
|
"grad_norm": 0.2622322738170624, |
|
"learning_rate": 5.8158642920745655e-05, |
|
"loss": 0.0221, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.524564183835182, |
|
"grad_norm": 0.23283162713050842, |
|
"learning_rate": 5.802935125614361e-05, |
|
"loss": 0.0177, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 9.54041204437401, |
|
"grad_norm": 0.265953928232193, |
|
"learning_rate": 5.790000445823576e-05, |
|
"loss": 0.0237, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 9.556259904912837, |
|
"grad_norm": 0.23547948896884918, |
|
"learning_rate": 5.777060341517811e-05, |
|
"loss": 0.0254, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 9.572107765451664, |
|
"grad_norm": 0.3150040805339813, |
|
"learning_rate": 5.764114901549914e-05, |
|
"loss": 0.0298, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 9.58795562599049, |
|
"grad_norm": 0.23534265160560608, |
|
"learning_rate": 5.7511642148093704e-05, |
|
"loss": 0.0208, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 9.60380348652932, |
|
"grad_norm": 0.2798217833042145, |
|
"learning_rate": 5.7382083702216925e-05, |
|
"loss": 0.0264, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 9.619651347068146, |
|
"grad_norm": 0.2324879914522171, |
|
"learning_rate": 5.725247456747809e-05, |
|
"loss": 0.0315, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 9.635499207606973, |
|
"grad_norm": 0.25599566102027893, |
|
"learning_rate": 5.7122815633834506e-05, |
|
"loss": 0.0227, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 9.6513470681458, |
|
"grad_norm": 0.1766338348388672, |
|
"learning_rate": 5.699310779158551e-05, |
|
"loss": 0.0222, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 9.667194928684628, |
|
"grad_norm": 0.2305234670639038, |
|
"learning_rate": 5.686335193136616e-05, |
|
"loss": 0.0229, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 9.683042789223455, |
|
"grad_norm": 0.24864676594734192, |
|
"learning_rate": 5.673354894414129e-05, |
|
"loss": 0.0259, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 9.698890649762282, |
|
"grad_norm": 0.25202295184135437, |
|
"learning_rate": 5.660369972119933e-05, |
|
"loss": 0.0237, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 9.714738510301109, |
|
"grad_norm": 0.32556819915771484, |
|
"learning_rate": 5.6473805154146174e-05, |
|
"loss": 0.02, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 9.730586370839937, |
|
"grad_norm": 0.2521624267101288, |
|
"learning_rate": 5.634386613489908e-05, |
|
"loss": 0.0242, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 9.746434231378764, |
|
"grad_norm": 0.25148093700408936, |
|
"learning_rate": 5.6213883555680516e-05, |
|
"loss": 0.0269, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 9.76228209191759, |
|
"grad_norm": 0.22112874686717987, |
|
"learning_rate": 5.608385830901206e-05, |
|
"loss": 0.0285, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 9.778129952456418, |
|
"grad_norm": 0.33593472838401794, |
|
"learning_rate": 5.5953791287708254e-05, |
|
"loss": 0.03, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 9.793977812995246, |
|
"grad_norm": 0.306130975484848, |
|
"learning_rate": 5.5823683384870554e-05, |
|
"loss": 0.0244, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 9.809825673534073, |
|
"grad_norm": 0.3085562288761139, |
|
"learning_rate": 5.569353549388103e-05, |
|
"loss": 0.027, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 9.8256735340729, |
|
"grad_norm": 0.2247430682182312, |
|
"learning_rate": 5.556334850839637e-05, |
|
"loss": 0.0234, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 9.841521394611727, |
|
"grad_norm": 0.26314494013786316, |
|
"learning_rate": 5.543312332234174e-05, |
|
"loss": 0.024, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 9.857369255150555, |
|
"grad_norm": 0.22496825456619263, |
|
"learning_rate": 5.530286082990454e-05, |
|
"loss": 0.0194, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 9.873217115689382, |
|
"grad_norm": 0.29987284541130066, |
|
"learning_rate": 5.5172561925528386e-05, |
|
"loss": 0.0252, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 9.889064976228209, |
|
"grad_norm": 0.3042098581790924, |
|
"learning_rate": 5.5042227503906894e-05, |
|
"loss": 0.0246, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 9.904912836767036, |
|
"grad_norm": 0.22687886655330658, |
|
"learning_rate": 5.491185845997757e-05, |
|
"loss": 0.026, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 9.920760697305864, |
|
"grad_norm": 0.2479943484067917, |
|
"learning_rate": 5.478145568891562e-05, |
|
"loss": 0.0289, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 9.936608557844691, |
|
"grad_norm": 0.20297874510288239, |
|
"learning_rate": 5.465102008612789e-05, |
|
"loss": 0.0233, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 9.952456418383518, |
|
"grad_norm": 0.17246457934379578, |
|
"learning_rate": 5.452055254724664e-05, |
|
"loss": 0.0253, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 9.968304278922346, |
|
"grad_norm": 0.24328118562698364, |
|
"learning_rate": 5.4390053968123386e-05, |
|
"loss": 0.025, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 9.984152139461173, |
|
"grad_norm": 0.18752968311309814, |
|
"learning_rate": 5.425952524482283e-05, |
|
"loss": 0.024, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.18232440948486328, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 0.0241, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 10.015847860538827, |
|
"grad_norm": 0.22801880538463593, |
|
"learning_rate": 5.3998380950977266e-05, |
|
"loss": 0.0209, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 10.031695721077655, |
|
"grad_norm": 0.21135802567005157, |
|
"learning_rate": 5.386776717357193e-05, |
|
"loss": 0.0234, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 10.047543581616482, |
|
"grad_norm": 0.2743472754955292, |
|
"learning_rate": 5.373712683825629e-05, |
|
"loss": 0.0237, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 10.063391442155309, |
|
"grad_norm": 0.2664951682090759, |
|
"learning_rate": 5.3606460842068426e-05, |
|
"loss": 0.0249, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 10.079239302694136, |
|
"grad_norm": 0.20999731123447418, |
|
"learning_rate": 5.347577008222253e-05, |
|
"loss": 0.0244, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 10.095087163232964, |
|
"grad_norm": 0.18719319999217987, |
|
"learning_rate": 5.334505545610293e-05, |
|
"loss": 0.0239, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 10.110935023771791, |
|
"grad_norm": 0.17207162082195282, |
|
"learning_rate": 5.321431786125778e-05, |
|
"loss": 0.0218, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 10.126782884310618, |
|
"grad_norm": 0.21071314811706543, |
|
"learning_rate": 5.3083558195392936e-05, |
|
"loss": 0.021, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 10.142630744849445, |
|
"grad_norm": 0.21377994120121002, |
|
"learning_rate": 5.295277735636583e-05, |
|
"loss": 0.0226, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 10.158478605388273, |
|
"grad_norm": 0.16608726978302002, |
|
"learning_rate": 5.282197624217928e-05, |
|
"loss": 0.0227, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 10.1743264659271, |
|
"grad_norm": 0.19757942855358124, |
|
"learning_rate": 5.2691155750975316e-05, |
|
"loss": 0.0196, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 10.190174326465927, |
|
"grad_norm": 0.1993936449289322, |
|
"learning_rate": 5.2560316781029005e-05, |
|
"loss": 0.0199, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 10.206022187004754, |
|
"grad_norm": 0.20808455348014832, |
|
"learning_rate": 5.2429460230742346e-05, |
|
"loss": 0.0214, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 10.221870047543582, |
|
"grad_norm": 0.1672813892364502, |
|
"learning_rate": 5.2298586998637956e-05, |
|
"loss": 0.0243, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 10.23771790808241, |
|
"grad_norm": 0.26778897643089294, |
|
"learning_rate": 5.216769798335311e-05, |
|
"loss": 0.025, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 10.253565768621236, |
|
"grad_norm": 0.22870604693889618, |
|
"learning_rate": 5.203679408363341e-05, |
|
"loss": 0.021, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 10.269413629160063, |
|
"grad_norm": 0.2953716516494751, |
|
"learning_rate": 5.190587619832664e-05, |
|
"loss": 0.0215, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 10.285261489698891, |
|
"grad_norm": 0.3255462944507599, |
|
"learning_rate": 5.1774945226376624e-05, |
|
"loss": 0.0166, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 10.301109350237718, |
|
"grad_norm": 0.17969000339508057, |
|
"learning_rate": 5.1644002066817063e-05, |
|
"loss": 0.0205, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.316957210776545, |
|
"grad_norm": 0.2460571676492691, |
|
"learning_rate": 5.151304761876536e-05, |
|
"loss": 0.0201, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 10.332805071315372, |
|
"grad_norm": 0.178553506731987, |
|
"learning_rate": 5.1382082781416396e-05, |
|
"loss": 0.0203, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 10.3486529318542, |
|
"grad_norm": 0.18054994940757751, |
|
"learning_rate": 5.125110845403638e-05, |
|
"loss": 0.0204, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 10.364500792393027, |
|
"grad_norm": 0.2226029634475708, |
|
"learning_rate": 5.112012553595671e-05, |
|
"loss": 0.0202, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 10.380348652931854, |
|
"grad_norm": 0.23070666193962097, |
|
"learning_rate": 5.0989134926567785e-05, |
|
"loss": 0.0205, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 10.39619651347068, |
|
"grad_norm": 0.1447778195142746, |
|
"learning_rate": 5.085813752531278e-05, |
|
"loss": 0.0273, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 10.41204437400951, |
|
"grad_norm": 0.18221695721149445, |
|
"learning_rate": 5.072713423168154e-05, |
|
"loss": 0.0196, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 10.427892234548336, |
|
"grad_norm": 0.2584993839263916, |
|
"learning_rate": 5.0596125945204334e-05, |
|
"loss": 0.0205, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 10.443740095087163, |
|
"grad_norm": 0.19126753509044647, |
|
"learning_rate": 5.046511356544574e-05, |
|
"loss": 0.0226, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 10.45958795562599, |
|
"grad_norm": 0.19277669489383698, |
|
"learning_rate": 5.033409799199844e-05, |
|
"loss": 0.0195, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 10.475435816164818, |
|
"grad_norm": 0.22546206414699554, |
|
"learning_rate": 5.020308012447704e-05, |
|
"loss": 0.022, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 10.491283676703645, |
|
"grad_norm": 0.26715290546417236, |
|
"learning_rate": 5.0072060862511893e-05, |
|
"loss": 0.0232, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 10.507131537242472, |
|
"grad_norm": 0.23546898365020752, |
|
"learning_rate": 4.994104110574295e-05, |
|
"loss": 0.0233, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 10.522979397781299, |
|
"grad_norm": 0.38194459676742554, |
|
"learning_rate": 4.981002175381352e-05, |
|
"loss": 0.0266, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 10.538827258320127, |
|
"grad_norm": 0.17723363637924194, |
|
"learning_rate": 4.9679003706364185e-05, |
|
"loss": 0.0249, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 10.554675118858954, |
|
"grad_norm": 0.30575594305992126, |
|
"learning_rate": 4.9547987863026507e-05, |
|
"loss": 0.0268, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 10.570522979397781, |
|
"grad_norm": 0.2724224328994751, |
|
"learning_rate": 4.9416975123416966e-05, |
|
"loss": 0.0216, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 10.586370839936608, |
|
"grad_norm": 0.3302716910839081, |
|
"learning_rate": 4.92859663871307e-05, |
|
"loss": 0.0222, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 10.602218700475436, |
|
"grad_norm": 0.182839035987854, |
|
"learning_rate": 4.915496255373537e-05, |
|
"loss": 0.0241, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 10.618066561014263, |
|
"grad_norm": 0.18011973798274994, |
|
"learning_rate": 4.902396452276498e-05, |
|
"loss": 0.0166, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 10.63391442155309, |
|
"grad_norm": 0.2910979688167572, |
|
"learning_rate": 4.8892973193713684e-05, |
|
"loss": 0.0268, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 10.649762282091917, |
|
"grad_norm": 0.20945270359516144, |
|
"learning_rate": 4.876198946602963e-05, |
|
"loss": 0.0243, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 10.665610142630745, |
|
"grad_norm": 0.2104242444038391, |
|
"learning_rate": 4.86310142391087e-05, |
|
"loss": 0.0217, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 10.681458003169572, |
|
"grad_norm": 0.22012865543365479, |
|
"learning_rate": 4.850004841228852e-05, |
|
"loss": 0.0187, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 10.697305863708399, |
|
"grad_norm": 0.252900093793869, |
|
"learning_rate": 4.836909288484208e-05, |
|
"loss": 0.0284, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 10.713153724247226, |
|
"grad_norm": 0.2362486571073532, |
|
"learning_rate": 4.8238148555971704e-05, |
|
"loss": 0.0178, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 10.729001584786054, |
|
"grad_norm": 0.28352028131484985, |
|
"learning_rate": 4.81072163248028e-05, |
|
"loss": 0.0281, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 10.744849445324881, |
|
"grad_norm": 0.31054121255874634, |
|
"learning_rate": 4.7976297090377706e-05, |
|
"loss": 0.0271, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 10.760697305863708, |
|
"grad_norm": 0.15438808500766754, |
|
"learning_rate": 4.7845391751649505e-05, |
|
"loss": 0.0256, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 10.776545166402535, |
|
"grad_norm": 0.17651043832302094, |
|
"learning_rate": 4.7714501207475884e-05, |
|
"loss": 0.0218, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.792393026941363, |
|
"grad_norm": 0.2993830740451813, |
|
"learning_rate": 4.7583626356612954e-05, |
|
"loss": 0.0219, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 10.80824088748019, |
|
"grad_norm": 0.21443192660808563, |
|
"learning_rate": 4.745276809770905e-05, |
|
"loss": 0.0198, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 10.824088748019017, |
|
"grad_norm": 0.22990483045578003, |
|
"learning_rate": 4.732192732929858e-05, |
|
"loss": 0.024, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 10.839936608557844, |
|
"grad_norm": 0.2523830831050873, |
|
"learning_rate": 4.7191104949795845e-05, |
|
"loss": 0.02, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 10.855784469096672, |
|
"grad_norm": 0.19074945151805878, |
|
"learning_rate": 4.706030185748894e-05, |
|
"loss": 0.0235, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 10.8716323296355, |
|
"grad_norm": 0.17805525660514832, |
|
"learning_rate": 4.692951895053342e-05, |
|
"loss": 0.024, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 10.887480190174326, |
|
"grad_norm": 0.25457364320755005, |
|
"learning_rate": 4.6798757126946324e-05, |
|
"loss": 0.0225, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 10.903328050713153, |
|
"grad_norm": 0.2769658863544464, |
|
"learning_rate": 4.6668017284599866e-05, |
|
"loss": 0.0186, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 10.919175911251982, |
|
"grad_norm": 0.27840906381607056, |
|
"learning_rate": 4.653730032121539e-05, |
|
"loss": 0.0213, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 10.935023771790808, |
|
"grad_norm": 0.31035539507865906, |
|
"learning_rate": 4.640660713435709e-05, |
|
"loss": 0.022, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.950871632329635, |
|
"grad_norm": 0.2523256540298462, |
|
"learning_rate": 4.627593862142594e-05, |
|
"loss": 0.0261, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 10.966719492868462, |
|
"grad_norm": 0.2741487920284271, |
|
"learning_rate": 4.61452956796534e-05, |
|
"loss": 0.0243, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 10.98256735340729, |
|
"grad_norm": 0.18995286524295807, |
|
"learning_rate": 4.601467920609547e-05, |
|
"loss": 0.0261, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 10.998415213946117, |
|
"grad_norm": 0.33396896719932556, |
|
"learning_rate": 4.588409009762634e-05, |
|
"loss": 0.0268, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 11.014263074484944, |
|
"grad_norm": 0.2645708918571472, |
|
"learning_rate": 4.575352925093229e-05, |
|
"loss": 0.0221, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 11.030110935023771, |
|
"grad_norm": 0.21601872146129608, |
|
"learning_rate": 4.562299756250557e-05, |
|
"loss": 0.0197, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 11.0459587955626, |
|
"grad_norm": 0.26823803782463074, |
|
"learning_rate": 4.549249592863822e-05, |
|
"loss": 0.0318, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 11.061806656101426, |
|
"grad_norm": 0.40468984842300415, |
|
"learning_rate": 4.536202524541588e-05, |
|
"loss": 0.0201, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 11.077654516640253, |
|
"grad_norm": 0.2228170931339264, |
|
"learning_rate": 4.5231586408711684e-05, |
|
"loss": 0.0232, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 11.09350237717908, |
|
"grad_norm": 0.17821644246578217, |
|
"learning_rate": 4.510118031418009e-05, |
|
"loss": 0.0193, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.109350237717909, |
|
"grad_norm": 0.22201032936573029, |
|
"learning_rate": 4.4970807857250745e-05, |
|
"loss": 0.0235, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 11.125198098256735, |
|
"grad_norm": 0.16020157933235168, |
|
"learning_rate": 4.4840469933122314e-05, |
|
"loss": 0.0206, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 11.141045958795562, |
|
"grad_norm": 0.18815340101718903, |
|
"learning_rate": 4.471016743675633e-05, |
|
"loss": 0.0202, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 11.15689381933439, |
|
"grad_norm": 0.2237204611301422, |
|
"learning_rate": 4.457990126287112e-05, |
|
"loss": 0.021, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 11.172741679873218, |
|
"grad_norm": 0.2936099171638489, |
|
"learning_rate": 4.444967230593551e-05, |
|
"loss": 0.0203, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 11.188589540412044, |
|
"grad_norm": 0.1436583399772644, |
|
"learning_rate": 4.431948146016286e-05, |
|
"loss": 0.0197, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 11.204437400950871, |
|
"grad_norm": 0.2675095796585083, |
|
"learning_rate": 4.418932961950478e-05, |
|
"loss": 0.02, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 11.2202852614897, |
|
"grad_norm": 0.23882818222045898, |
|
"learning_rate": 4.405921767764511e-05, |
|
"loss": 0.0217, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 11.236133122028527, |
|
"grad_norm": 0.2709539830684662, |
|
"learning_rate": 4.392914652799368e-05, |
|
"loss": 0.0209, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 11.251980982567353, |
|
"grad_norm": 0.18802231550216675, |
|
"learning_rate": 4.3799117063680254e-05, |
|
"loss": 0.0173, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 11.26782884310618, |
|
"grad_norm": 0.25173911452293396, |
|
"learning_rate": 4.366913017754836e-05, |
|
"loss": 0.0228, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 11.283676703645009, |
|
"grad_norm": 0.2181670218706131, |
|
"learning_rate": 4.3539186762149106e-05, |
|
"loss": 0.016, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 11.299524564183836, |
|
"grad_norm": 0.18725943565368652, |
|
"learning_rate": 4.3409287709735204e-05, |
|
"loss": 0.0234, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 11.315372424722662, |
|
"grad_norm": 0.3149115741252899, |
|
"learning_rate": 4.3279433912254675e-05, |
|
"loss": 0.0213, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 11.33122028526149, |
|
"grad_norm": 0.2042395919561386, |
|
"learning_rate": 4.314962626134484e-05, |
|
"loss": 0.0206, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 11.347068145800318, |
|
"grad_norm": 0.14478328824043274, |
|
"learning_rate": 4.301986564832613e-05, |
|
"loss": 0.0203, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 11.362916006339145, |
|
"grad_norm": 0.20697103440761566, |
|
"learning_rate": 4.289015296419603e-05, |
|
"loss": 0.0156, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 11.378763866877971, |
|
"grad_norm": 0.2516174912452698, |
|
"learning_rate": 4.276048909962286e-05, |
|
"loss": 0.021, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 11.394611727416798, |
|
"grad_norm": 0.30749985575675964, |
|
"learning_rate": 4.263087494493977e-05, |
|
"loss": 0.0189, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 11.410459587955627, |
|
"grad_norm": 0.2317238450050354, |
|
"learning_rate": 4.2501311390138574e-05, |
|
"loss": 0.0245, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 11.426307448494454, |
|
"grad_norm": 0.24530279636383057, |
|
"learning_rate": 4.2371799324863614e-05, |
|
"loss": 0.0185, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 11.44215530903328, |
|
"grad_norm": 0.16856257617473602, |
|
"learning_rate": 4.224233963840574e-05, |
|
"loss": 0.0223, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 11.458003169572107, |
|
"grad_norm": 0.15289132297039032, |
|
"learning_rate": 4.2112933219696106e-05, |
|
"loss": 0.0157, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 11.473851030110936, |
|
"grad_norm": 0.17484936118125916, |
|
"learning_rate": 4.198358095730006e-05, |
|
"loss": 0.0212, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 11.489698890649763, |
|
"grad_norm": 0.18419259786605835, |
|
"learning_rate": 4.185428373941115e-05, |
|
"loss": 0.0207, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 11.50554675118859, |
|
"grad_norm": 0.2928980588912964, |
|
"learning_rate": 4.172504245384496e-05, |
|
"loss": 0.0217, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 11.521394611727416, |
|
"grad_norm": 0.19275160133838654, |
|
"learning_rate": 4.1595857988033e-05, |
|
"loss": 0.0194, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 11.537242472266245, |
|
"grad_norm": 0.3847340941429138, |
|
"learning_rate": 4.146673122901662e-05, |
|
"loss": 0.0199, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 11.553090332805072, |
|
"grad_norm": 0.25312259793281555, |
|
"learning_rate": 4.1337663063440946e-05, |
|
"loss": 0.0174, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 11.568938193343898, |
|
"grad_norm": 0.274879515171051, |
|
"learning_rate": 4.120865437754877e-05, |
|
"loss": 0.0238, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 11.584786053882725, |
|
"grad_norm": 0.22004622220993042, |
|
"learning_rate": 4.1079706057174455e-05, |
|
"loss": 0.0231, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 11.600633914421554, |
|
"grad_norm": 0.4630294740200043, |
|
"learning_rate": 4.095081898773787e-05, |
|
"loss": 0.022, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 11.61648177496038, |
|
"grad_norm": 0.15254133939743042, |
|
"learning_rate": 4.0821994054238325e-05, |
|
"loss": 0.0218, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 11.632329635499207, |
|
"grad_norm": 0.18909721076488495, |
|
"learning_rate": 4.069323214124845e-05, |
|
"loss": 0.0241, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 11.648177496038034, |
|
"grad_norm": 0.18203580379486084, |
|
"learning_rate": 4.0564534132908164e-05, |
|
"loss": 0.0206, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 11.664025356576863, |
|
"grad_norm": 0.31021520495414734, |
|
"learning_rate": 4.04359009129186e-05, |
|
"loss": 0.0229, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 11.67987321711569, |
|
"grad_norm": 0.21043580770492554, |
|
"learning_rate": 4.0307333364535973e-05, |
|
"loss": 0.0243, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 11.695721077654516, |
|
"grad_norm": 0.17714616656303406, |
|
"learning_rate": 4.017883237056561e-05, |
|
"loss": 0.02, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 11.711568938193343, |
|
"grad_norm": 0.23153972625732422, |
|
"learning_rate": 4.005039881335583e-05, |
|
"loss": 0.0178, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 11.727416798732172, |
|
"grad_norm": 0.7659839391708374, |
|
"learning_rate": 3.99220335747919e-05, |
|
"loss": 0.0213, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 11.743264659270999, |
|
"grad_norm": 0.2092520147562027, |
|
"learning_rate": 3.979373753628999e-05, |
|
"loss": 0.023, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 11.759112519809825, |
|
"grad_norm": 0.3415199816226959, |
|
"learning_rate": 3.9665511578791096e-05, |
|
"loss": 0.021, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 11.774960380348652, |
|
"grad_norm": 0.31222307682037354, |
|
"learning_rate": 3.9537356582755034e-05, |
|
"loss": 0.0214, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 11.79080824088748, |
|
"grad_norm": 0.18112266063690186, |
|
"learning_rate": 3.940927342815428e-05, |
|
"loss": 0.0234, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 11.806656101426308, |
|
"grad_norm": 0.28897473216056824, |
|
"learning_rate": 3.9281262994468114e-05, |
|
"loss": 0.0258, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 11.822503961965134, |
|
"grad_norm": 0.28549882769584656, |
|
"learning_rate": 3.915332616067643e-05, |
|
"loss": 0.0188, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 11.838351822503961, |
|
"grad_norm": 0.19967828691005707, |
|
"learning_rate": 3.9025463805253765e-05, |
|
"loss": 0.0201, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 11.85419968304279, |
|
"grad_norm": 0.27357855439186096, |
|
"learning_rate": 3.889767680616324e-05, |
|
"loss": 0.0193, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 11.870047543581617, |
|
"grad_norm": 0.202061265707016, |
|
"learning_rate": 3.8769966040850566e-05, |
|
"loss": 0.0188, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 11.885895404120443, |
|
"grad_norm": 0.24488794803619385, |
|
"learning_rate": 3.864233238623796e-05, |
|
"loss": 0.0177, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.90174326465927, |
|
"grad_norm": 0.23348113894462585, |
|
"learning_rate": 3.851477671871818e-05, |
|
"loss": 0.0189, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 11.917591125198099, |
|
"grad_norm": 0.31944724917411804, |
|
"learning_rate": 3.838729991414852e-05, |
|
"loss": 0.0211, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 11.933438985736926, |
|
"grad_norm": 0.24721786379814148, |
|
"learning_rate": 3.82599028478447e-05, |
|
"loss": 0.0159, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 11.949286846275752, |
|
"grad_norm": 0.2412160336971283, |
|
"learning_rate": 3.8132586394574974e-05, |
|
"loss": 0.0231, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 11.96513470681458, |
|
"grad_norm": 0.2842359244823456, |
|
"learning_rate": 3.8005351428554036e-05, |
|
"loss": 0.0179, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 11.980982567353408, |
|
"grad_norm": 0.19113971292972565, |
|
"learning_rate": 3.78781988234371e-05, |
|
"loss": 0.0178, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 11.996830427892235, |
|
"grad_norm": 0.24129873514175415, |
|
"learning_rate": 3.775112945231377e-05, |
|
"loss": 0.0214, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 12.012678288431061, |
|
"grad_norm": 0.30563119053840637, |
|
"learning_rate": 3.7624144187702174e-05, |
|
"loss": 0.0207, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 12.028526148969888, |
|
"grad_norm": 0.16946931183338165, |
|
"learning_rate": 3.7497243901542934e-05, |
|
"loss": 0.0194, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 12.044374009508717, |
|
"grad_norm": 0.23966370522975922, |
|
"learning_rate": 3.7370429465193154e-05, |
|
"loss": 0.0198, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 12.060221870047544, |
|
"grad_norm": 0.2549941837787628, |
|
"learning_rate": 3.724370174942047e-05, |
|
"loss": 0.023, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 12.07606973058637, |
|
"grad_norm": 0.2220945656299591, |
|
"learning_rate": 3.711706162439704e-05, |
|
"loss": 0.0174, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 12.091917591125197, |
|
"grad_norm": 0.16276349127292633, |
|
"learning_rate": 3.699050995969354e-05, |
|
"loss": 0.0192, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 12.107765451664026, |
|
"grad_norm": 0.3065180778503418, |
|
"learning_rate": 3.6864047624273325e-05, |
|
"loss": 0.019, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 12.123613312202853, |
|
"grad_norm": 0.19206896424293518, |
|
"learning_rate": 3.67376754864863e-05, |
|
"loss": 0.0149, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 12.13946117274168, |
|
"grad_norm": 0.21416613459587097, |
|
"learning_rate": 3.6611394414063074e-05, |
|
"loss": 0.0179, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 12.155309033280506, |
|
"grad_norm": 0.2737729251384735, |
|
"learning_rate": 3.6485205274108936e-05, |
|
"loss": 0.0235, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 12.171156893819335, |
|
"grad_norm": 0.17268019914627075, |
|
"learning_rate": 3.635910893309792e-05, |
|
"loss": 0.0162, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 12.187004754358162, |
|
"grad_norm": 0.23836471140384674, |
|
"learning_rate": 3.6233106256866895e-05, |
|
"loss": 0.0174, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 12.202852614896988, |
|
"grad_norm": 0.447587788105011, |
|
"learning_rate": 3.610719811060952e-05, |
|
"loss": 0.0189, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 12.218700475435817, |
|
"grad_norm": 0.21118977665901184, |
|
"learning_rate": 3.598138535887041e-05, |
|
"loss": 0.0183, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 12.234548335974644, |
|
"grad_norm": 0.257715106010437, |
|
"learning_rate": 3.585566886553917e-05, |
|
"loss": 0.0209, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 12.25039619651347, |
|
"grad_norm": 0.295749694108963, |
|
"learning_rate": 3.5730049493844405e-05, |
|
"loss": 0.0261, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 12.266244057052297, |
|
"grad_norm": 0.3179740607738495, |
|
"learning_rate": 3.560452810634787e-05, |
|
"loss": 0.0214, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 12.282091917591124, |
|
"grad_norm": 0.1746010035276413, |
|
"learning_rate": 3.547910556493852e-05, |
|
"loss": 0.0208, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 12.297939778129953, |
|
"grad_norm": 0.2330365628004074, |
|
"learning_rate": 3.535378273082656e-05, |
|
"loss": 0.0208, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 12.31378763866878, |
|
"grad_norm": 0.39738985896110535, |
|
"learning_rate": 3.5228560464537535e-05, |
|
"loss": 0.0239, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 12.329635499207606, |
|
"grad_norm": 0.2947781980037689, |
|
"learning_rate": 3.510343962590653e-05, |
|
"loss": 0.0191, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 12.345483359746435, |
|
"grad_norm": 0.21791400015354156, |
|
"learning_rate": 3.49784210740721e-05, |
|
"loss": 0.0264, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 12.361331220285262, |
|
"grad_norm": 0.19092513620853424, |
|
"learning_rate": 3.485350566747049e-05, |
|
"loss": 0.0248, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 12.377179080824089, |
|
"grad_norm": 0.35505980253219604, |
|
"learning_rate": 3.4728694263829684e-05, |
|
"loss": 0.0199, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 12.393026941362915, |
|
"grad_norm": 0.1710539311170578, |
|
"learning_rate": 3.460398772016355e-05, |
|
"loss": 0.019, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 12.408874801901744, |
|
"grad_norm": 0.33750495314598083, |
|
"learning_rate": 3.4479386892765905e-05, |
|
"loss": 0.0205, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 12.42472266244057, |
|
"grad_norm": 0.2829129099845886, |
|
"learning_rate": 3.43548926372047e-05, |
|
"loss": 0.0198, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 12.440570522979398, |
|
"grad_norm": 0.18969641625881195, |
|
"learning_rate": 3.423050580831611e-05, |
|
"loss": 0.0205, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 12.456418383518225, |
|
"grad_norm": 0.2330506592988968, |
|
"learning_rate": 3.410622726019865e-05, |
|
"loss": 0.0213, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 12.472266244057053, |
|
"grad_norm": 0.2536896765232086, |
|
"learning_rate": 3.398205784620735e-05, |
|
"loss": 0.0207, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 12.48811410459588, |
|
"grad_norm": 0.16537010669708252, |
|
"learning_rate": 3.3857998418947864e-05, |
|
"loss": 0.0169, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 12.503961965134707, |
|
"grad_norm": 0.2565062344074249, |
|
"learning_rate": 3.373404983027062e-05, |
|
"loss": 0.0214, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 12.519809825673534, |
|
"grad_norm": 0.18320074677467346, |
|
"learning_rate": 3.361021293126497e-05, |
|
"loss": 0.0166, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 12.535657686212362, |
|
"grad_norm": 0.2510707378387451, |
|
"learning_rate": 3.3486488572253385e-05, |
|
"loss": 0.0173, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 12.551505546751189, |
|
"grad_norm": 0.24890565872192383, |
|
"learning_rate": 3.3362877602785524e-05, |
|
"loss": 0.0196, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 12.567353407290016, |
|
"grad_norm": 0.25348639488220215, |
|
"learning_rate": 3.3239380871632543e-05, |
|
"loss": 0.0201, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 12.583201267828843, |
|
"grad_norm": 0.2547270655632019, |
|
"learning_rate": 3.3115999226781135e-05, |
|
"loss": 0.0163, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 12.599049128367671, |
|
"grad_norm": 0.1903742551803589, |
|
"learning_rate": 3.299273351542773e-05, |
|
"loss": 0.0162, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 12.614896988906498, |
|
"grad_norm": 0.14592960476875305, |
|
"learning_rate": 3.286958458397273e-05, |
|
"loss": 0.0218, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 12.630744849445325, |
|
"grad_norm": 0.220992311835289, |
|
"learning_rate": 3.27465532780147e-05, |
|
"loss": 0.0193, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 12.646592709984152, |
|
"grad_norm": 0.3510618209838867, |
|
"learning_rate": 3.2623640442344505e-05, |
|
"loss": 0.021, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 12.66244057052298, |
|
"grad_norm": 0.1398414969444275, |
|
"learning_rate": 3.250084692093953e-05, |
|
"loss": 0.0199, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 12.678288431061807, |
|
"grad_norm": 0.24324694275856018, |
|
"learning_rate": 3.237817355695791e-05, |
|
"loss": 0.0172, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.694136291600634, |
|
"grad_norm": 0.20084106922149658, |
|
"learning_rate": 3.225562119273272e-05, |
|
"loss": 0.0134, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 12.70998415213946, |
|
"grad_norm": 0.20435374975204468, |
|
"learning_rate": 3.213319066976617e-05, |
|
"loss": 0.019, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 12.72583201267829, |
|
"grad_norm": 0.21612811088562012, |
|
"learning_rate": 3.201088282872387e-05, |
|
"loss": 0.0159, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 12.741679873217116, |
|
"grad_norm": 0.2342618703842163, |
|
"learning_rate": 3.188869850942905e-05, |
|
"loss": 0.0186, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 12.757527733755943, |
|
"grad_norm": 0.20277902483940125, |
|
"learning_rate": 3.176663855085677e-05, |
|
"loss": 0.0209, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 12.77337559429477, |
|
"grad_norm": 0.2995304763317108, |
|
"learning_rate": 3.164470379112816e-05, |
|
"loss": 0.0247, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 12.789223454833598, |
|
"grad_norm": 0.23769770562648773, |
|
"learning_rate": 3.15228950675047e-05, |
|
"loss": 0.0152, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 12.805071315372425, |
|
"grad_norm": 0.1370396465063095, |
|
"learning_rate": 3.140121321638241e-05, |
|
"loss": 0.0177, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 12.820919175911252, |
|
"grad_norm": 0.4313637614250183, |
|
"learning_rate": 3.127965907328617e-05, |
|
"loss": 0.0154, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 12.836767036450079, |
|
"grad_norm": 0.2073371410369873, |
|
"learning_rate": 3.115823347286397e-05, |
|
"loss": 0.0165, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 12.852614896988907, |
|
"grad_norm": 0.32266175746917725, |
|
"learning_rate": 3.103693724888112e-05, |
|
"loss": 0.0212, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 12.868462757527734, |
|
"grad_norm": 0.1806778460741043, |
|
"learning_rate": 3.091577123421462e-05, |
|
"loss": 0.0145, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 12.88431061806656, |
|
"grad_norm": 0.25016674399375916, |
|
"learning_rate": 3.079473626084737e-05, |
|
"loss": 0.0211, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 12.900158478605388, |
|
"grad_norm": 0.16698500514030457, |
|
"learning_rate": 3.067383315986249e-05, |
|
"loss": 0.0228, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 12.916006339144216, |
|
"grad_norm": 0.22536715865135193, |
|
"learning_rate": 3.055306276143754e-05, |
|
"loss": 0.0213, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 12.931854199683043, |
|
"grad_norm": 0.17826388776302338, |
|
"learning_rate": 3.0432425894838977e-05, |
|
"loss": 0.023, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 12.94770206022187, |
|
"grad_norm": 0.22973258793354034, |
|
"learning_rate": 3.031192338841631e-05, |
|
"loss": 0.0188, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 12.963549920760697, |
|
"grad_norm": 0.3207305669784546, |
|
"learning_rate": 3.0191556069596476e-05, |
|
"loss": 0.0199, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 12.979397781299525, |
|
"grad_norm": 0.19772501289844513, |
|
"learning_rate": 3.0071324764878155e-05, |
|
"loss": 0.0177, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 12.995245641838352, |
|
"grad_norm": 0.19332300126552582, |
|
"learning_rate": 2.99512302998261e-05, |
|
"loss": 0.0243, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 13.011093502377179, |
|
"grad_norm": 0.22696681320667267, |
|
"learning_rate": 2.9831273499065422e-05, |
|
"loss": 0.0178, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 13.026941362916006, |
|
"grad_norm": 0.2711600065231323, |
|
"learning_rate": 2.9711455186275998e-05, |
|
"loss": 0.0149, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 13.042789223454834, |
|
"grad_norm": 0.22301819920539856, |
|
"learning_rate": 2.959177618418678e-05, |
|
"loss": 0.0201, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 13.058637083993661, |
|
"grad_norm": 0.1777944713830948, |
|
"learning_rate": 2.9472237314570134e-05, |
|
"loss": 0.0187, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 13.074484944532488, |
|
"grad_norm": 0.24867452681064606, |
|
"learning_rate": 2.935283939823621e-05, |
|
"loss": 0.0217, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 13.090332805071315, |
|
"grad_norm": 0.24219559133052826, |
|
"learning_rate": 2.9233583255027313e-05, |
|
"loss": 0.013, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 13.106180665610143, |
|
"grad_norm": 0.14742301404476166, |
|
"learning_rate": 2.9114469703812292e-05, |
|
"loss": 0.0199, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 13.12202852614897, |
|
"grad_norm": 0.167776420712471, |
|
"learning_rate": 2.8995499562480842e-05, |
|
"loss": 0.0183, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 13.137876386687797, |
|
"grad_norm": 0.29319486021995544, |
|
"learning_rate": 2.8876673647937945e-05, |
|
"loss": 0.0208, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 13.153724247226624, |
|
"grad_norm": 0.1555861234664917, |
|
"learning_rate": 2.875799277609832e-05, |
|
"loss": 0.0194, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 13.169572107765452, |
|
"grad_norm": 0.1766081005334854, |
|
"learning_rate": 2.863945776188065e-05, |
|
"loss": 0.0182, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 13.185419968304279, |
|
"grad_norm": 0.2022436112165451, |
|
"learning_rate": 2.8521069419202195e-05, |
|
"loss": 0.0161, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 13.201267828843106, |
|
"grad_norm": 0.1649257242679596, |
|
"learning_rate": 2.840282856097304e-05, |
|
"loss": 0.0168, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 13.217115689381933, |
|
"grad_norm": 0.24146905541419983, |
|
"learning_rate": 2.828473599909055e-05, |
|
"loss": 0.0178, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 13.232963549920761, |
|
"grad_norm": 0.20440474152565002, |
|
"learning_rate": 2.8166792544433894e-05, |
|
"loss": 0.0251, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 13.248811410459588, |
|
"grad_norm": 0.21215130388736725, |
|
"learning_rate": 2.8048999006858323e-05, |
|
"loss": 0.0225, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 13.264659270998415, |
|
"grad_norm": 0.17490635812282562, |
|
"learning_rate": 2.7931356195189735e-05, |
|
"loss": 0.0151, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 13.280507131537242, |
|
"grad_norm": 0.2777180075645447, |
|
"learning_rate": 2.781386491721908e-05, |
|
"loss": 0.0178, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 13.29635499207607, |
|
"grad_norm": 0.23932000994682312, |
|
"learning_rate": 2.7696525979696752e-05, |
|
"loss": 0.0147, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 13.312202852614897, |
|
"grad_norm": 0.19922451674938202, |
|
"learning_rate": 2.7579340188327186e-05, |
|
"loss": 0.0168, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 13.328050713153724, |
|
"grad_norm": 0.2395889014005661, |
|
"learning_rate": 2.7462308347763127e-05, |
|
"loss": 0.017, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 13.343898573692552, |
|
"grad_norm": 0.23529374599456787, |
|
"learning_rate": 2.7345431261600317e-05, |
|
"loss": 0.0197, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 13.35974643423138, |
|
"grad_norm": 0.2671940326690674, |
|
"learning_rate": 2.7228709732371886e-05, |
|
"loss": 0.0155, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 13.375594294770206, |
|
"grad_norm": 0.2091439962387085, |
|
"learning_rate": 2.7112144561542757e-05, |
|
"loss": 0.0205, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 13.391442155309033, |
|
"grad_norm": 0.20118452608585358, |
|
"learning_rate": 2.6995736549504315e-05, |
|
"loss": 0.015, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 13.407290015847861, |
|
"grad_norm": 0.15710382163524628, |
|
"learning_rate": 2.687948649556874e-05, |
|
"loss": 0.0192, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 13.423137876386688, |
|
"grad_norm": 0.22499555349349976, |
|
"learning_rate": 2.6763395197963626e-05, |
|
"loss": 0.0268, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 13.438985736925515, |
|
"grad_norm": 0.17233209311962128, |
|
"learning_rate": 2.6647463453826505e-05, |
|
"loss": 0.0191, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 13.454833597464342, |
|
"grad_norm": 0.28862184286117554, |
|
"learning_rate": 2.6531692059199275e-05, |
|
"loss": 0.0196, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 13.47068145800317, |
|
"grad_norm": 0.19401662051677704, |
|
"learning_rate": 2.6416081809022887e-05, |
|
"loss": 0.0171, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 13.486529318541997, |
|
"grad_norm": 0.21995659172534943, |
|
"learning_rate": 2.6300633497131687e-05, |
|
"loss": 0.0195, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 13.502377179080824, |
|
"grad_norm": 0.2321847379207611, |
|
"learning_rate": 2.618534791624816e-05, |
|
"loss": 0.0209, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 13.51822503961965, |
|
"grad_norm": 0.21036501228809357, |
|
"learning_rate": 2.6070225857977428e-05, |
|
"loss": 0.0204, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 13.53407290015848, |
|
"grad_norm": 0.2640347480773926, |
|
"learning_rate": 2.5955268112801656e-05, |
|
"loss": 0.0158, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 13.549920760697306, |
|
"grad_norm": 0.30468320846557617, |
|
"learning_rate": 2.58404754700749e-05, |
|
"loss": 0.0151, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 13.565768621236133, |
|
"grad_norm": 0.19475166499614716, |
|
"learning_rate": 2.5725848718017454e-05, |
|
"loss": 0.0194, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 13.58161648177496, |
|
"grad_norm": 0.18407198786735535, |
|
"learning_rate": 2.561138864371057e-05, |
|
"loss": 0.017, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 13.597464342313788, |
|
"grad_norm": 0.197821244597435, |
|
"learning_rate": 2.549709603309104e-05, |
|
"loss": 0.0192, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 13.613312202852615, |
|
"grad_norm": 0.19414368271827698, |
|
"learning_rate": 2.53829716709457e-05, |
|
"loss": 0.0161, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 13.629160063391442, |
|
"grad_norm": 0.32657763361930847, |
|
"learning_rate": 2.5269016340906138e-05, |
|
"loss": 0.0193, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 13.645007923930269, |
|
"grad_norm": 0.17926651239395142, |
|
"learning_rate": 2.5155230825443332e-05, |
|
"loss": 0.0172, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 13.660855784469097, |
|
"grad_norm": 0.1641903668642044, |
|
"learning_rate": 2.504161590586217e-05, |
|
"loss": 0.0171, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 13.676703645007924, |
|
"grad_norm": 0.23365381360054016, |
|
"learning_rate": 2.4928172362296205e-05, |
|
"loss": 0.0149, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 13.692551505546751, |
|
"grad_norm": 0.2839002311229706, |
|
"learning_rate": 2.4814900973702183e-05, |
|
"loss": 0.0198, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 13.708399366085578, |
|
"grad_norm": 0.233973428606987, |
|
"learning_rate": 2.4701802517854822e-05, |
|
"loss": 0.022, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 13.724247226624406, |
|
"grad_norm": 0.2717144191265106, |
|
"learning_rate": 2.458887777134134e-05, |
|
"loss": 0.0199, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 13.740095087163233, |
|
"grad_norm": 0.2552318274974823, |
|
"learning_rate": 2.44761275095562e-05, |
|
"loss": 0.019, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 13.75594294770206, |
|
"grad_norm": 0.17286346852779388, |
|
"learning_rate": 2.4363552506695814e-05, |
|
"loss": 0.0182, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 13.771790808240887, |
|
"grad_norm": 0.1892533153295517, |
|
"learning_rate": 2.4251153535753107e-05, |
|
"loss": 0.0212, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 13.787638668779715, |
|
"grad_norm": 0.15570400655269623, |
|
"learning_rate": 2.4138931368512375e-05, |
|
"loss": 0.0178, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 13.803486529318542, |
|
"grad_norm": 0.287626177072525, |
|
"learning_rate": 2.402688677554381e-05, |
|
"loss": 0.0166, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 13.819334389857369, |
|
"grad_norm": 0.3084344267845154, |
|
"learning_rate": 2.3915020526198373e-05, |
|
"loss": 0.0148, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 13.835182250396196, |
|
"grad_norm": 0.13890209794044495, |
|
"learning_rate": 2.3803333388602372e-05, |
|
"loss": 0.0158, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 13.851030110935024, |
|
"grad_norm": 0.24919134378433228, |
|
"learning_rate": 2.3691826129652267e-05, |
|
"loss": 0.0202, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 13.866877971473851, |
|
"grad_norm": 0.19362711906433105, |
|
"learning_rate": 2.3580499515009408e-05, |
|
"loss": 0.0186, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 13.882725832012678, |
|
"grad_norm": 0.23859569430351257, |
|
"learning_rate": 2.346935430909476e-05, |
|
"loss": 0.018, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 13.898573692551505, |
|
"grad_norm": 0.41652438044548035, |
|
"learning_rate": 2.335839127508359e-05, |
|
"loss": 0.018, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 13.914421553090333, |
|
"grad_norm": 0.19404253363609314, |
|
"learning_rate": 2.3247611174900375e-05, |
|
"loss": 0.0144, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 13.93026941362916, |
|
"grad_norm": 0.27209949493408203, |
|
"learning_rate": 2.3137014769213415e-05, |
|
"loss": 0.0181, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 13.946117274167987, |
|
"grad_norm": 0.15419328212738037, |
|
"learning_rate": 2.3026602817429677e-05, |
|
"loss": 0.0176, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 13.961965134706814, |
|
"grad_norm": 0.22414186596870422, |
|
"learning_rate": 2.291637607768964e-05, |
|
"loss": 0.0224, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 13.977812995245642, |
|
"grad_norm": 0.16095861792564392, |
|
"learning_rate": 2.280633530686195e-05, |
|
"loss": 0.0152, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 13.99366085578447, |
|
"grad_norm": 0.1415528804063797, |
|
"learning_rate": 2.2696481260538393e-05, |
|
"loss": 0.0156, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 14.009508716323296, |
|
"grad_norm": 0.1570771187543869, |
|
"learning_rate": 2.2586814693028524e-05, |
|
"loss": 0.0173, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 14.025356576862123, |
|
"grad_norm": 0.2337312251329422, |
|
"learning_rate": 2.247733635735466e-05, |
|
"loss": 0.0197, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 14.041204437400951, |
|
"grad_norm": 0.2519458532333374, |
|
"learning_rate": 2.2368047005246585e-05, |
|
"loss": 0.0177, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 14.057052297939778, |
|
"grad_norm": 0.26522183418273926, |
|
"learning_rate": 2.2258947387136415e-05, |
|
"loss": 0.0192, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 14.072900158478605, |
|
"grad_norm": 0.12336030602455139, |
|
"learning_rate": 2.2150038252153533e-05, |
|
"loss": 0.0175, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 14.088748019017432, |
|
"grad_norm": 0.15576300024986267, |
|
"learning_rate": 2.204132034811929e-05, |
|
"loss": 0.0174, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 14.10459587955626, |
|
"grad_norm": 0.21424925327301025, |
|
"learning_rate": 2.1932794421542018e-05, |
|
"loss": 0.0142, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 14.120443740095087, |
|
"grad_norm": 0.21682120859622955, |
|
"learning_rate": 2.182446121761186e-05, |
|
"loss": 0.0191, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 14.136291600633914, |
|
"grad_norm": 0.25047534704208374, |
|
"learning_rate": 2.171632148019552e-05, |
|
"loss": 0.0154, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 14.152139461172741, |
|
"grad_norm": 0.2971823513507843, |
|
"learning_rate": 2.1608375951831383e-05, |
|
"loss": 0.0227, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 14.16798732171157, |
|
"grad_norm": 0.2523512542247772, |
|
"learning_rate": 2.1500625373724286e-05, |
|
"loss": 0.0144, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 14.183835182250396, |
|
"grad_norm": 0.21813775599002838, |
|
"learning_rate": 2.1393070485740386e-05, |
|
"loss": 0.0154, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 14.199683042789223, |
|
"grad_norm": 0.2209501713514328, |
|
"learning_rate": 2.1285712026402215e-05, |
|
"loss": 0.0137, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 14.21553090332805, |
|
"grad_norm": 0.1733659952878952, |
|
"learning_rate": 2.117855073288346e-05, |
|
"loss": 0.0133, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 14.231378763866879, |
|
"grad_norm": 0.19718633592128754, |
|
"learning_rate": 2.1071587341004058e-05, |
|
"loss": 0.0212, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 14.247226624405705, |
|
"grad_norm": 0.23138895630836487, |
|
"learning_rate": 2.0964822585224987e-05, |
|
"loss": 0.0218, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 14.263074484944532, |
|
"grad_norm": 0.22604243457317352, |
|
"learning_rate": 2.08582571986433e-05, |
|
"loss": 0.0165, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 14.278922345483359, |
|
"grad_norm": 0.21740014851093292, |
|
"learning_rate": 2.075189191298716e-05, |
|
"loss": 0.018, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 14.294770206022188, |
|
"grad_norm": 0.5042977333068848, |
|
"learning_rate": 2.0645727458610646e-05, |
|
"loss": 0.015, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 14.310618066561014, |
|
"grad_norm": 0.17162521183490753, |
|
"learning_rate": 2.0539764564488927e-05, |
|
"loss": 0.0147, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 14.326465927099841, |
|
"grad_norm": 0.23630589246749878, |
|
"learning_rate": 2.04340039582131e-05, |
|
"loss": 0.0168, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 14.342313787638668, |
|
"grad_norm": 0.22610369324684143, |
|
"learning_rate": 2.0328446365985253e-05, |
|
"loss": 0.019, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 14.358161648177497, |
|
"grad_norm": 0.23171366751194, |
|
"learning_rate": 2.022309251261355e-05, |
|
"loss": 0.0185, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 14.374009508716323, |
|
"grad_norm": 0.20405028760433197, |
|
"learning_rate": 2.0117943121507117e-05, |
|
"loss": 0.018, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 14.38985736925515, |
|
"grad_norm": 0.20171862840652466, |
|
"learning_rate": 2.0012998914671182e-05, |
|
"loss": 0.0156, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 14.405705229793977, |
|
"grad_norm": 0.2580902874469757, |
|
"learning_rate": 1.99082606127021e-05, |
|
"loss": 0.018, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 14.421553090332806, |
|
"grad_norm": 0.16781866550445557, |
|
"learning_rate": 1.9803728934782323e-05, |
|
"loss": 0.0178, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 14.437400950871632, |
|
"grad_norm": 0.21224135160446167, |
|
"learning_rate": 1.969940459867562e-05, |
|
"loss": 0.0169, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 14.45324881141046, |
|
"grad_norm": 0.16903094947338104, |
|
"learning_rate": 1.9595288320721923e-05, |
|
"loss": 0.0138, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 14.469096671949288, |
|
"grad_norm": 0.2130252569913864, |
|
"learning_rate": 1.949138081583265e-05, |
|
"loss": 0.0175, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 14.484944532488115, |
|
"grad_norm": 0.2133990377187729, |
|
"learning_rate": 1.938768279748566e-05, |
|
"loss": 0.0169, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 14.500792393026941, |
|
"grad_norm": 0.19141750037670135, |
|
"learning_rate": 1.9284194977720344e-05, |
|
"loss": 0.0139, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 14.516640253565768, |
|
"grad_norm": 0.18053506314754486, |
|
"learning_rate": 1.9180918067132813e-05, |
|
"loss": 0.0202, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 14.532488114104595, |
|
"grad_norm": 0.2015606015920639, |
|
"learning_rate": 1.9077852774870945e-05, |
|
"loss": 0.0188, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 14.548335974643424, |
|
"grad_norm": 0.2063121348619461, |
|
"learning_rate": 1.8974999808629545e-05, |
|
"loss": 0.0141, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 14.56418383518225, |
|
"grad_norm": 0.14588534832000732, |
|
"learning_rate": 1.887235987464553e-05, |
|
"loss": 0.0147, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 14.580031695721077, |
|
"grad_norm": 0.17593805491924286, |
|
"learning_rate": 1.876993367769297e-05, |
|
"loss": 0.0139, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 14.595879556259906, |
|
"grad_norm": 0.15790753066539764, |
|
"learning_rate": 1.8667721921078397e-05, |
|
"loss": 0.0123, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 14.611727416798733, |
|
"grad_norm": 0.23879548907279968, |
|
"learning_rate": 1.8565725306635806e-05, |
|
"loss": 0.0186, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 14.62757527733756, |
|
"grad_norm": 0.23344580829143524, |
|
"learning_rate": 1.8463944534722e-05, |
|
"loss": 0.0158, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 14.643423137876386, |
|
"grad_norm": 0.219131201505661, |
|
"learning_rate": 1.83623803042117e-05, |
|
"loss": 0.0197, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 14.659270998415215, |
|
"grad_norm": 0.17857685685157776, |
|
"learning_rate": 1.826103331249267e-05, |
|
"loss": 0.0128, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 14.675118858954042, |
|
"grad_norm": 0.19189006090164185, |
|
"learning_rate": 1.8159904255461108e-05, |
|
"loss": 0.0172, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 14.690966719492868, |
|
"grad_norm": 0.18938252329826355, |
|
"learning_rate": 1.8058993827516697e-05, |
|
"loss": 0.0212, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 14.706814580031695, |
|
"grad_norm": 0.20771273970603943, |
|
"learning_rate": 1.795830272155796e-05, |
|
"loss": 0.0248, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 14.722662440570524, |
|
"grad_norm": 0.22910486161708832, |
|
"learning_rate": 1.7857831628977456e-05, |
|
"loss": 0.015, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 14.73851030110935, |
|
"grad_norm": 0.20048457384109497, |
|
"learning_rate": 1.7757581239656984e-05, |
|
"loss": 0.0168, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 14.754358161648177, |
|
"grad_norm": 0.21910695731639862, |
|
"learning_rate": 1.7657552241962904e-05, |
|
"loss": 0.0119, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 14.770206022187004, |
|
"grad_norm": 0.214069664478302, |
|
"learning_rate": 1.7557745322741433e-05, |
|
"loss": 0.0167, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 14.786053882725833, |
|
"grad_norm": 0.20221184194087982, |
|
"learning_rate": 1.745816116731383e-05, |
|
"loss": 0.0153, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 14.80190174326466, |
|
"grad_norm": 0.1907825767993927, |
|
"learning_rate": 1.735880045947183e-05, |
|
"loss": 0.016, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 14.817749603803486, |
|
"grad_norm": 0.2389329969882965, |
|
"learning_rate": 1.7259663881472787e-05, |
|
"loss": 0.0168, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 14.833597464342313, |
|
"grad_norm": 0.2041391283273697, |
|
"learning_rate": 1.716075211403516e-05, |
|
"loss": 0.0166, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 14.849445324881142, |
|
"grad_norm": 0.3064650595188141, |
|
"learning_rate": 1.7062065836333696e-05, |
|
"loss": 0.0166, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 14.865293185419969, |
|
"grad_norm": 0.25269177556037903, |
|
"learning_rate": 1.6963605725994807e-05, |
|
"loss": 0.0179, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 14.881141045958795, |
|
"grad_norm": 0.13689862191677094, |
|
"learning_rate": 1.686537245909201e-05, |
|
"loss": 0.0136, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 14.896988906497622, |
|
"grad_norm": 0.2099904716014862, |
|
"learning_rate": 1.6767366710141125e-05, |
|
"loss": 0.0188, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 14.91283676703645, |
|
"grad_norm": 0.20536595582962036, |
|
"learning_rate": 1.666958915209578e-05, |
|
"loss": 0.0161, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 14.928684627575278, |
|
"grad_norm": 0.18782939016819, |
|
"learning_rate": 1.6572040456342737e-05, |
|
"loss": 0.0249, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 14.944532488114104, |
|
"grad_norm": 0.29753440618515015, |
|
"learning_rate": 1.6474721292697247e-05, |
|
"loss": 0.0174, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 14.960380348652931, |
|
"grad_norm": 0.14820578694343567, |
|
"learning_rate": 1.6377632329398507e-05, |
|
"loss": 0.0229, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 14.97622820919176, |
|
"grad_norm": 0.26186251640319824, |
|
"learning_rate": 1.628077423310503e-05, |
|
"loss": 0.0203, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 14.992076069730587, |
|
"grad_norm": 0.2948777675628662, |
|
"learning_rate": 1.6184147668890116e-05, |
|
"loss": 0.0192, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 15.007923930269413, |
|
"grad_norm": 0.20523428916931152, |
|
"learning_rate": 1.608775330023727e-05, |
|
"loss": 0.0171, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 15.02377179080824, |
|
"grad_norm": 0.28263282775878906, |
|
"learning_rate": 1.599159178903557e-05, |
|
"loss": 0.0149, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 15.039619651347069, |
|
"grad_norm": 0.2222396433353424, |
|
"learning_rate": 1.5895663795575255e-05, |
|
"loss": 0.0174, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 15.055467511885896, |
|
"grad_norm": 0.2283553034067154, |
|
"learning_rate": 1.5799969978543072e-05, |
|
"loss": 0.0152, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 15.071315372424722, |
|
"grad_norm": 0.19190463423728943, |
|
"learning_rate": 1.570451099501781e-05, |
|
"loss": 0.0193, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 15.08716323296355, |
|
"grad_norm": 0.2034788280725479, |
|
"learning_rate": 1.560928750046582e-05, |
|
"loss": 0.0142, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 15.103011093502378, |
|
"grad_norm": 0.1533176153898239, |
|
"learning_rate": 1.5514300148736405e-05, |
|
"loss": 0.0147, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 15.118858954041205, |
|
"grad_norm": 0.16323472559452057, |
|
"learning_rate": 1.5419549592057485e-05, |
|
"loss": 0.0128, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 15.134706814580031, |
|
"grad_norm": 0.1336495280265808, |
|
"learning_rate": 1.532503648103095e-05, |
|
"loss": 0.0152, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 15.150554675118858, |
|
"grad_norm": 0.23295193910598755, |
|
"learning_rate": 1.5230761464628351e-05, |
|
"loss": 0.0202, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 15.166402535657687, |
|
"grad_norm": 0.21971255540847778, |
|
"learning_rate": 1.5136725190186312e-05, |
|
"loss": 0.0127, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 15.182250396196514, |
|
"grad_norm": 0.12831509113311768, |
|
"learning_rate": 1.5042928303402155e-05, |
|
"loss": 0.0131, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 15.19809825673534, |
|
"grad_norm": 0.2782778739929199, |
|
"learning_rate": 1.4949371448329491e-05, |
|
"loss": 0.0134, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 15.213946117274167, |
|
"grad_norm": 0.15872108936309814, |
|
"learning_rate": 1.4856055267373704e-05, |
|
"loss": 0.0126, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 15.229793977812996, |
|
"grad_norm": 0.1593102514743805, |
|
"learning_rate": 1.476298040128763e-05, |
|
"loss": 0.0168, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 15.245641838351823, |
|
"grad_norm": 0.21707729995250702, |
|
"learning_rate": 1.4670147489167157e-05, |
|
"loss": 0.0128, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 15.26148969889065, |
|
"grad_norm": 0.13602186739444733, |
|
"learning_rate": 1.4577557168446704e-05, |
|
"loss": 0.0163, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 15.277337559429476, |
|
"grad_norm": 0.15380342304706573, |
|
"learning_rate": 1.4485210074895028e-05, |
|
"loss": 0.0131, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 15.293185419968305, |
|
"grad_norm": 0.23396658897399902, |
|
"learning_rate": 1.4393106842610765e-05, |
|
"loss": 0.0182, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 15.309033280507132, |
|
"grad_norm": 0.351018488407135, |
|
"learning_rate": 1.4301248104018039e-05, |
|
"loss": 0.0163, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 15.324881141045958, |
|
"grad_norm": 0.15941226482391357, |
|
"learning_rate": 1.4209634489862228e-05, |
|
"loss": 0.0151, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 15.340729001584785, |
|
"grad_norm": 0.31737878918647766, |
|
"learning_rate": 1.4118266629205501e-05, |
|
"loss": 0.016, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 15.356576862123614, |
|
"grad_norm": 0.1942298859357834, |
|
"learning_rate": 1.4027145149422637e-05, |
|
"loss": 0.0138, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 15.37242472266244, |
|
"grad_norm": 0.20650826394557953, |
|
"learning_rate": 1.3936270676196605e-05, |
|
"loss": 0.0196, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 15.388272583201267, |
|
"grad_norm": 0.13685113191604614, |
|
"learning_rate": 1.3845643833514294e-05, |
|
"loss": 0.015, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 15.404120443740094, |
|
"grad_norm": 0.22127866744995117, |
|
"learning_rate": 1.3755265243662308e-05, |
|
"loss": 0.0146, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 15.419968304278923, |
|
"grad_norm": 0.1102658063173294, |
|
"learning_rate": 1.3665135527222566e-05, |
|
"loss": 0.0132, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 15.43581616481775, |
|
"grad_norm": 0.17032739520072937, |
|
"learning_rate": 1.3575255303068157e-05, |
|
"loss": 0.0168, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 15.451664025356576, |
|
"grad_norm": 0.20449472963809967, |
|
"learning_rate": 1.3485625188359008e-05, |
|
"loss": 0.0155, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 15.467511885895403, |
|
"grad_norm": 0.2856760323047638, |
|
"learning_rate": 1.3396245798537655e-05, |
|
"loss": 0.0174, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 15.483359746434232, |
|
"grad_norm": 0.17707166075706482, |
|
"learning_rate": 1.3307117747325104e-05, |
|
"loss": 0.0145, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 15.499207606973059, |
|
"grad_norm": 0.2179175168275833, |
|
"learning_rate": 1.321824164671649e-05, |
|
"loss": 0.0142, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 15.515055467511885, |
|
"grad_norm": 0.14933204650878906, |
|
"learning_rate": 1.3129618106976966e-05, |
|
"loss": 0.0166, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 15.530903328050712, |
|
"grad_norm": 0.23230569064617157, |
|
"learning_rate": 1.3041247736637497e-05, |
|
"loss": 0.02, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 15.54675118858954, |
|
"grad_norm": 0.2146037369966507, |
|
"learning_rate": 1.2953131142490621e-05, |
|
"loss": 0.0187, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 15.562599049128368, |
|
"grad_norm": 0.27099379897117615, |
|
"learning_rate": 1.2865268929586399e-05, |
|
"loss": 0.0175, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 15.578446909667194, |
|
"grad_norm": 0.21641230583190918, |
|
"learning_rate": 1.2777661701228094e-05, |
|
"loss": 0.0131, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 15.594294770206023, |
|
"grad_norm": 0.206056609749794, |
|
"learning_rate": 1.2690310058968208e-05, |
|
"loss": 0.0124, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 15.61014263074485, |
|
"grad_norm": 0.2695901095867157, |
|
"learning_rate": 1.2603214602604251e-05, |
|
"loss": 0.017, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 15.625990491283677, |
|
"grad_norm": 0.24454373121261597, |
|
"learning_rate": 1.2516375930174607e-05, |
|
"loss": 0.0185, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 15.641838351822503, |
|
"grad_norm": 0.24143637716770172, |
|
"learning_rate": 1.2429794637954505e-05, |
|
"loss": 0.0167, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 15.65768621236133, |
|
"grad_norm": 0.24098831415176392, |
|
"learning_rate": 1.234347132045185e-05, |
|
"loss": 0.0156, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 15.673534072900159, |
|
"grad_norm": 0.2231469452381134, |
|
"learning_rate": 1.2257406570403158e-05, |
|
"loss": 0.0162, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 15.689381933438986, |
|
"grad_norm": 0.18433237075805664, |
|
"learning_rate": 1.217160097876956e-05, |
|
"loss": 0.0148, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 15.705229793977812, |
|
"grad_norm": 0.24673160910606384, |
|
"learning_rate": 1.2086055134732604e-05, |
|
"loss": 0.0156, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 15.721077654516641, |
|
"grad_norm": 0.2098625749349594, |
|
"learning_rate": 1.2000769625690367e-05, |
|
"loss": 0.0123, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 15.736925515055468, |
|
"grad_norm": 0.19441257417201996, |
|
"learning_rate": 1.1915745037253273e-05, |
|
"loss": 0.0149, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 15.752773375594295, |
|
"grad_norm": 0.30163636803627014, |
|
"learning_rate": 1.1830981953240183e-05, |
|
"loss": 0.0145, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 15.768621236133121, |
|
"grad_norm": 0.2016548216342926, |
|
"learning_rate": 1.1746480955674371e-05, |
|
"loss": 0.0157, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 15.78446909667195, |
|
"grad_norm": 0.16448210179805756, |
|
"learning_rate": 1.1662242624779413e-05, |
|
"loss": 0.0093, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 15.800316957210777, |
|
"grad_norm": 0.1529219001531601, |
|
"learning_rate": 1.1578267538975384e-05, |
|
"loss": 0.016, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 15.816164817749604, |
|
"grad_norm": 0.11220666021108627, |
|
"learning_rate": 1.1494556274874736e-05, |
|
"loss": 0.0151, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 15.83201267828843, |
|
"grad_norm": 0.1833869069814682, |
|
"learning_rate": 1.1411109407278425e-05, |
|
"loss": 0.0126, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 15.847860538827259, |
|
"grad_norm": 0.24351130425930023, |
|
"learning_rate": 1.1327927509171948e-05, |
|
"loss": 0.0148, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 15.863708399366086, |
|
"grad_norm": 0.18271566927433014, |
|
"learning_rate": 1.1245011151721358e-05, |
|
"loss": 0.0153, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 15.879556259904913, |
|
"grad_norm": 0.17010100185871124, |
|
"learning_rate": 1.1162360904269399e-05, |
|
"loss": 0.0139, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 15.89540412044374, |
|
"grad_norm": 0.20020832121372223, |
|
"learning_rate": 1.1079977334331593e-05, |
|
"loss": 0.014, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 15.911251980982568, |
|
"grad_norm": 0.31756097078323364, |
|
"learning_rate": 1.0997861007592297e-05, |
|
"loss": 0.0137, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 15.927099841521395, |
|
"grad_norm": 0.20857271552085876, |
|
"learning_rate": 1.0916012487900901e-05, |
|
"loss": 0.0187, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 15.942947702060222, |
|
"grad_norm": 0.21330268681049347, |
|
"learning_rate": 1.0834432337267835e-05, |
|
"loss": 0.0182, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 15.958795562599049, |
|
"grad_norm": 0.2602750360965729, |
|
"learning_rate": 1.0753121115860859e-05, |
|
"loss": 0.0126, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 15.974643423137877, |
|
"grad_norm": 0.10706225037574768, |
|
"learning_rate": 1.0672079382001076e-05, |
|
"loss": 0.0141, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 15.990491283676704, |
|
"grad_norm": 0.18691207468509674, |
|
"learning_rate": 1.0591307692159175e-05, |
|
"loss": 0.018, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 16.00633914421553, |
|
"grad_norm": 0.16258151829242706, |
|
"learning_rate": 1.0510806600951634e-05, |
|
"loss": 0.0138, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 16.022187004754358, |
|
"grad_norm": 0.22133781015872955, |
|
"learning_rate": 1.0430576661136809e-05, |
|
"loss": 0.0136, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 16.038034865293184, |
|
"grad_norm": 0.14174553751945496, |
|
"learning_rate": 1.0350618423611258e-05, |
|
"loss": 0.012, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 16.05388272583201, |
|
"grad_norm": 0.21903228759765625, |
|
"learning_rate": 1.0270932437405894e-05, |
|
"loss": 0.0162, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 16.06973058637084, |
|
"grad_norm": 0.15532748401165009, |
|
"learning_rate": 1.0191519249682202e-05, |
|
"loss": 0.0129, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 16.08557844690967, |
|
"grad_norm": 0.2952392101287842, |
|
"learning_rate": 1.0112379405728512e-05, |
|
"loss": 0.014, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 16.101426307448495, |
|
"grad_norm": 0.1566477119922638, |
|
"learning_rate": 1.003351344895624e-05, |
|
"loss": 0.0168, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 16.117274167987322, |
|
"grad_norm": 0.18433576822280884, |
|
"learning_rate": 9.954921920896181e-06, |
|
"loss": 0.0141, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 16.13312202852615, |
|
"grad_norm": 0.1970781683921814, |
|
"learning_rate": 9.876605361194784e-06, |
|
"loss": 0.014, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 16.148969889064976, |
|
"grad_norm": 0.22483587265014648, |
|
"learning_rate": 9.798564307610397e-06, |
|
"loss": 0.0172, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 16.164817749603802, |
|
"grad_norm": 0.12307272851467133, |
|
"learning_rate": 9.720799296009652e-06, |
|
"loss": 0.0142, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 16.18066561014263, |
|
"grad_norm": 0.09929801523685455, |
|
"learning_rate": 9.64331086036372e-06, |
|
"loss": 0.0157, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 16.19651347068146, |
|
"grad_norm": 0.22220948338508606, |
|
"learning_rate": 9.566099532744666e-06, |
|
"loss": 0.0144, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 16.212361331220286, |
|
"grad_norm": 0.21739843487739563, |
|
"learning_rate": 9.48916584332184e-06, |
|
"loss": 0.0141, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 16.228209191759113, |
|
"grad_norm": 0.20657970011234283, |
|
"learning_rate": 9.412510320358148e-06, |
|
"loss": 0.0125, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 16.24405705229794, |
|
"grad_norm": 0.1589168906211853, |
|
"learning_rate": 9.336133490206527e-06, |
|
"loss": 0.0146, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 16.259904912836767, |
|
"grad_norm": 0.20785082876682281, |
|
"learning_rate": 9.260035877306222e-06, |
|
"loss": 0.015, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 16.275752773375594, |
|
"grad_norm": 0.3436870872974396, |
|
"learning_rate": 9.184218004179296e-06, |
|
"loss": 0.0142, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 16.29160063391442, |
|
"grad_norm": 0.19214791059494019, |
|
"learning_rate": 9.108680391426944e-06, |
|
"loss": 0.0153, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 16.307448494453247, |
|
"grad_norm": 0.18752476572990417, |
|
"learning_rate": 9.033423557725968e-06, |
|
"loss": 0.0198, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 16.323296354992078, |
|
"grad_norm": 0.2008536010980606, |
|
"learning_rate": 8.958448019825238e-06, |
|
"loss": 0.0139, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 16.339144215530904, |
|
"grad_norm": 0.3124418258666992, |
|
"learning_rate": 8.883754292542073e-06, |
|
"loss": 0.0184, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 16.35499207606973, |
|
"grad_norm": 0.18249309062957764, |
|
"learning_rate": 8.809342888758787e-06, |
|
"loss": 0.012, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 16.370839936608558, |
|
"grad_norm": 0.27810513973236084, |
|
"learning_rate": 8.735214319419122e-06, |
|
"loss": 0.012, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 16.386687797147385, |
|
"grad_norm": 0.25395792722702026, |
|
"learning_rate": 8.66136909352469e-06, |
|
"loss": 0.0175, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 16.40253565768621, |
|
"grad_norm": 0.10935286432504654, |
|
"learning_rate": 8.587807718131607e-06, |
|
"loss": 0.0138, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 16.41838351822504, |
|
"grad_norm": 0.20935213565826416, |
|
"learning_rate": 8.514530698346911e-06, |
|
"loss": 0.0149, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 16.434231378763865, |
|
"grad_norm": 0.15524841845035553, |
|
"learning_rate": 8.4415385373251e-06, |
|
"loss": 0.016, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 16.450079239302696, |
|
"grad_norm": 0.17828898131847382, |
|
"learning_rate": 8.368831736264738e-06, |
|
"loss": 0.0155, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 16.465927099841522, |
|
"grad_norm": 0.11186101287603378, |
|
"learning_rate": 8.296410794404925e-06, |
|
"loss": 0.0146, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 16.48177496038035, |
|
"grad_norm": 0.1628289818763733, |
|
"learning_rate": 8.22427620902197e-06, |
|
"loss": 0.0138, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 16.497622820919176, |
|
"grad_norm": 0.20246130228042603, |
|
"learning_rate": 8.152428475425876e-06, |
|
"loss": 0.017, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 16.513470681458003, |
|
"grad_norm": 0.2126418799161911, |
|
"learning_rate": 8.080868086957e-06, |
|
"loss": 0.0181, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 16.52931854199683, |
|
"grad_norm": 0.27646327018737793, |
|
"learning_rate": 8.009595534982684e-06, |
|
"loss": 0.0138, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 16.545166402535656, |
|
"grad_norm": 0.23372896015644073, |
|
"learning_rate": 7.938611308893796e-06, |
|
"loss": 0.0206, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 16.561014263074483, |
|
"grad_norm": 0.21742697060108185, |
|
"learning_rate": 7.867915896101475e-06, |
|
"loss": 0.0117, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 16.576862123613314, |
|
"grad_norm": 0.30523791909217834, |
|
"learning_rate": 7.797509782033696e-06, |
|
"loss": 0.0189, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 16.59270998415214, |
|
"grad_norm": 0.33640623092651367, |
|
"learning_rate": 7.727393450131976e-06, |
|
"loss": 0.0147, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 16.608557844690967, |
|
"grad_norm": 0.14561405777931213, |
|
"learning_rate": 7.65756738184808e-06, |
|
"loss": 0.0119, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 16.624405705229794, |
|
"grad_norm": 0.27383899688720703, |
|
"learning_rate": 7.588032056640643e-06, |
|
"loss": 0.0181, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 16.64025356576862, |
|
"grad_norm": 0.2113339751958847, |
|
"learning_rate": 7.518787951971951e-06, |
|
"loss": 0.0151, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 16.656101426307448, |
|
"grad_norm": 0.22912786900997162, |
|
"learning_rate": 7.449835543304645e-06, |
|
"loss": 0.013, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 16.671949286846274, |
|
"grad_norm": 0.24694296717643738, |
|
"learning_rate": 7.381175304098398e-06, |
|
"loss": 0.0124, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 16.687797147385105, |
|
"grad_norm": 0.14873796701431274, |
|
"learning_rate": 7.3128077058067675e-06, |
|
"loss": 0.0166, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 16.70364500792393, |
|
"grad_norm": 0.14333923161029816, |
|
"learning_rate": 7.244733217873834e-06, |
|
"loss": 0.0128, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 16.71949286846276, |
|
"grad_norm": 0.17385222017765045, |
|
"learning_rate": 7.1769523077310885e-06, |
|
"loss": 0.0172, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 16.735340729001585, |
|
"grad_norm": 0.1889476180076599, |
|
"learning_rate": 7.1094654407941945e-06, |
|
"loss": 0.0105, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 16.751188589540412, |
|
"grad_norm": 0.13638252019882202, |
|
"learning_rate": 7.042273080459716e-06, |
|
"loss": 0.0137, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 16.76703645007924, |
|
"grad_norm": 0.16387833654880524, |
|
"learning_rate": 6.97537568810207e-06, |
|
"loss": 0.0121, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 16.782884310618066, |
|
"grad_norm": 0.18849371373653412, |
|
"learning_rate": 6.908773723070228e-06, |
|
"loss": 0.0112, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 16.798732171156892, |
|
"grad_norm": 0.2580081522464752, |
|
"learning_rate": 6.842467642684619e-06, |
|
"loss": 0.0164, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 16.814580031695723, |
|
"grad_norm": 0.19095416367053986, |
|
"learning_rate": 6.7764579022340405e-06, |
|
"loss": 0.0156, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 16.83042789223455, |
|
"grad_norm": 0.38263216614723206, |
|
"learning_rate": 6.71074495497242e-06, |
|
"loss": 0.0141, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 16.846275752773376, |
|
"grad_norm": 0.19752560555934906, |
|
"learning_rate": 6.645329252115812e-06, |
|
"loss": 0.0134, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 16.862123613312203, |
|
"grad_norm": 0.21061812341213226, |
|
"learning_rate": 6.580211242839207e-06, |
|
"loss": 0.0161, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 16.87797147385103, |
|
"grad_norm": 0.30705246329307556, |
|
"learning_rate": 6.515391374273522e-06, |
|
"loss": 0.0136, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 16.893819334389857, |
|
"grad_norm": 0.16327637434005737, |
|
"learning_rate": 6.4508700915025145e-06, |
|
"loss": 0.0178, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 16.909667194928684, |
|
"grad_norm": 0.19477631151676178, |
|
"learning_rate": 6.3866478375596454e-06, |
|
"loss": 0.0155, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 16.92551505546751, |
|
"grad_norm": 0.10037015378475189, |
|
"learning_rate": 6.322725053425166e-06, |
|
"loss": 0.0141, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 16.94136291600634, |
|
"grad_norm": 0.14681270718574524, |
|
"learning_rate": 6.259102178023019e-06, |
|
"loss": 0.0132, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 16.957210776545168, |
|
"grad_norm": 0.14646220207214355, |
|
"learning_rate": 6.1957796482177865e-06, |
|
"loss": 0.015, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 16.973058637083994, |
|
"grad_norm": 0.14095987379550934, |
|
"learning_rate": 6.1327578988118086e-06, |
|
"loss": 0.0117, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 16.98890649762282, |
|
"grad_norm": 0.17115886509418488, |
|
"learning_rate": 6.070037362542058e-06, |
|
"loss": 0.0113, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 17.004754358161648, |
|
"grad_norm": 0.18565335869789124, |
|
"learning_rate": 6.00761847007727e-06, |
|
"loss": 0.0144, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 17.020602218700475, |
|
"grad_norm": 0.13037702441215515, |
|
"learning_rate": 5.945501650014951e-06, |
|
"loss": 0.0137, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 17.0364500792393, |
|
"grad_norm": 0.18481898307800293, |
|
"learning_rate": 5.883687328878423e-06, |
|
"loss": 0.0133, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 17.05229793977813, |
|
"grad_norm": 0.2438468635082245, |
|
"learning_rate": 5.822175931113933e-06, |
|
"loss": 0.0163, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 17.06814580031696, |
|
"grad_norm": 0.18955622613430023, |
|
"learning_rate": 5.760967879087675e-06, |
|
"loss": 0.0113, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 17.083993660855786, |
|
"grad_norm": 0.3023121953010559, |
|
"learning_rate": 5.700063593082971e-06, |
|
"loss": 0.0142, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 17.099841521394612, |
|
"grad_norm": 0.19684407114982605, |
|
"learning_rate": 5.639463491297314e-06, |
|
"loss": 0.0183, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 17.11568938193344, |
|
"grad_norm": 0.1771165281534195, |
|
"learning_rate": 5.579167989839512e-06, |
|
"loss": 0.0149, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 17.131537242472266, |
|
"grad_norm": 0.31700730323791504, |
|
"learning_rate": 5.519177502726897e-06, |
|
"loss": 0.0149, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 17.147385103011093, |
|
"grad_norm": 0.24914953112602234, |
|
"learning_rate": 5.459492441882369e-06, |
|
"loss": 0.0096, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 17.16323296354992, |
|
"grad_norm": 0.17742785811424255, |
|
"learning_rate": 5.400113217131669e-06, |
|
"loss": 0.0126, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 17.179080824088746, |
|
"grad_norm": 0.19636410474777222, |
|
"learning_rate": 5.341040236200512e-06, |
|
"loss": 0.0148, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 17.194928684627577, |
|
"grad_norm": 0.16673442721366882, |
|
"learning_rate": 5.282273904711793e-06, |
|
"loss": 0.0149, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 17.210776545166404, |
|
"grad_norm": 0.17502924799919128, |
|
"learning_rate": 5.223814626182804e-06, |
|
"loss": 0.0156, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 17.22662440570523, |
|
"grad_norm": 0.24374344944953918, |
|
"learning_rate": 5.165662802022469e-06, |
|
"loss": 0.0156, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 17.242472266244057, |
|
"grad_norm": 0.20077760517597198, |
|
"learning_rate": 5.107818831528593e-06, |
|
"loss": 0.0152, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 17.258320126782884, |
|
"grad_norm": 0.19688129425048828, |
|
"learning_rate": 5.050283111885123e-06, |
|
"loss": 0.0108, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 17.27416798732171, |
|
"grad_norm": 0.2499351054430008, |
|
"learning_rate": 4.9930560381593825e-06, |
|
"loss": 0.0174, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 17.290015847860538, |
|
"grad_norm": 0.11787986755371094, |
|
"learning_rate": 4.936138003299412e-06, |
|
"loss": 0.011, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 17.305863708399364, |
|
"grad_norm": 0.10276877880096436, |
|
"learning_rate": 4.879529398131227e-06, |
|
"loss": 0.0151, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 17.321711568938195, |
|
"grad_norm": 0.21218866109848022, |
|
"learning_rate": 4.823230611356155e-06, |
|
"loss": 0.0188, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 17.33755942947702, |
|
"grad_norm": 0.12643927335739136, |
|
"learning_rate": 4.767242029548186e-06, |
|
"loss": 0.0137, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 17.35340729001585, |
|
"grad_norm": 0.22125521302223206, |
|
"learning_rate": 4.711564037151261e-06, |
|
"loss": 0.0137, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 17.369255150554675, |
|
"grad_norm": 0.18663623929023743, |
|
"learning_rate": 4.656197016476716e-06, |
|
"loss": 0.0169, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 17.385103011093502, |
|
"grad_norm": 0.1977252960205078, |
|
"learning_rate": 4.60114134770055e-06, |
|
"loss": 0.0142, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 17.40095087163233, |
|
"grad_norm": 0.1531880646944046, |
|
"learning_rate": 4.54639740886093e-06, |
|
"loss": 0.0134, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 17.416798732171156, |
|
"grad_norm": 0.25299400091171265, |
|
"learning_rate": 4.4919655758555055e-06, |
|
"loss": 0.0115, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 17.432646592709983, |
|
"grad_norm": 0.15232089161872864, |
|
"learning_rate": 4.4378462224388514e-06, |
|
"loss": 0.0121, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 17.448494453248813, |
|
"grad_norm": 0.2122395932674408, |
|
"learning_rate": 4.3840397202199515e-06, |
|
"loss": 0.0138, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 17.46434231378764, |
|
"grad_norm": 0.4084971845149994, |
|
"learning_rate": 4.330546438659555e-06, |
|
"loss": 0.0169, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 17.480190174326466, |
|
"grad_norm": 0.13414064049720764, |
|
"learning_rate": 4.2773667450677346e-06, |
|
"loss": 0.0115, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 17.496038034865293, |
|
"grad_norm": 0.24400712549686432, |
|
"learning_rate": 4.224501004601311e-06, |
|
"loss": 0.0165, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 17.51188589540412, |
|
"grad_norm": 0.15812645852565765, |
|
"learning_rate": 4.1719495802613254e-06, |
|
"loss": 0.0139, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 17.527733755942947, |
|
"grad_norm": 0.32170212268829346, |
|
"learning_rate": 4.119712832890599e-06, |
|
"loss": 0.0173, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 17.543581616481774, |
|
"grad_norm": 0.26727718114852905, |
|
"learning_rate": 4.0677911211712494e-06, |
|
"loss": 0.0137, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 17.5594294770206, |
|
"grad_norm": 0.2177404910326004, |
|
"learning_rate": 4.0161848016221804e-06, |
|
"loss": 0.0115, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 17.57527733755943, |
|
"grad_norm": 0.08245435357093811, |
|
"learning_rate": 3.964894228596683e-06, |
|
"loss": 0.0125, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 17.591125198098258, |
|
"grad_norm": 0.2010851800441742, |
|
"learning_rate": 3.913919754279966e-06, |
|
"loss": 0.0196, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 17.606973058637085, |
|
"grad_norm": 0.333839476108551, |
|
"learning_rate": 3.8632617286867845e-06, |
|
"loss": 0.0168, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 17.62282091917591, |
|
"grad_norm": 0.09080642461776733, |
|
"learning_rate": 3.8129204996589894e-06, |
|
"loss": 0.017, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 17.638668779714738, |
|
"grad_norm": 0.12128207087516785, |
|
"learning_rate": 3.7628964128631428e-06, |
|
"loss": 0.0146, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 17.654516640253565, |
|
"grad_norm": 0.15360169112682343, |
|
"learning_rate": 3.7131898117881924e-06, |
|
"loss": 0.0125, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 17.67036450079239, |
|
"grad_norm": 0.18307170271873474, |
|
"learning_rate": 3.6638010377430476e-06, |
|
"loss": 0.012, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 17.686212361331222, |
|
"grad_norm": 0.17119954526424408, |
|
"learning_rate": 3.6147304298542963e-06, |
|
"loss": 0.0159, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 17.70206022187005, |
|
"grad_norm": 0.18213894963264465, |
|
"learning_rate": 3.5659783250638344e-06, |
|
"loss": 0.0119, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 17.717908082408876, |
|
"grad_norm": 0.22571374475955963, |
|
"learning_rate": 3.517545058126548e-06, |
|
"loss": 0.0142, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 17.733755942947703, |
|
"grad_norm": 0.1815493106842041, |
|
"learning_rate": 3.4694309616080665e-06, |
|
"loss": 0.02, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 17.74960380348653, |
|
"grad_norm": 0.18275891244411469, |
|
"learning_rate": 3.4216363658824136e-06, |
|
"loss": 0.0135, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 17.765451664025356, |
|
"grad_norm": 0.14396269619464874, |
|
"learning_rate": 3.3741615991297938e-06, |
|
"loss": 0.0138, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 17.781299524564183, |
|
"grad_norm": 0.16869209706783295, |
|
"learning_rate": 3.327006987334308e-06, |
|
"loss": 0.015, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 17.79714738510301, |
|
"grad_norm": 0.2123693972826004, |
|
"learning_rate": 3.2801728542817155e-06, |
|
"loss": 0.0125, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 17.812995245641837, |
|
"grad_norm": 0.16149067878723145, |
|
"learning_rate": 3.2336595215572364e-06, |
|
"loss": 0.013, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 17.828843106180667, |
|
"grad_norm": 0.21204307675361633, |
|
"learning_rate": 3.1874673085432848e-06, |
|
"loss": 0.0165, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 17.844690966719494, |
|
"grad_norm": 0.16572032868862152, |
|
"learning_rate": 3.1415965324173567e-06, |
|
"loss": 0.0148, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 17.86053882725832, |
|
"grad_norm": 0.11765672266483307, |
|
"learning_rate": 3.0960475081497966e-06, |
|
"loss": 0.0174, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 17.876386687797147, |
|
"grad_norm": 0.11968225240707397, |
|
"learning_rate": 3.0508205485016426e-06, |
|
"loss": 0.0144, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 17.892234548335974, |
|
"grad_norm": 0.2578866183757782, |
|
"learning_rate": 3.0059159640225097e-06, |
|
"loss": 0.0146, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 17.9080824088748, |
|
"grad_norm": 0.21918439865112305, |
|
"learning_rate": 2.961334063048393e-06, |
|
"loss": 0.0155, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 17.923930269413628, |
|
"grad_norm": 0.16526588797569275, |
|
"learning_rate": 2.917075151699622e-06, |
|
"loss": 0.0177, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 17.939778129952458, |
|
"grad_norm": 0.21584181487560272, |
|
"learning_rate": 2.8731395338787215e-06, |
|
"loss": 0.0164, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 17.955625990491285, |
|
"grad_norm": 0.19519634544849396, |
|
"learning_rate": 2.8295275112683207e-06, |
|
"loss": 0.0166, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 17.97147385103011, |
|
"grad_norm": 0.21799765527248383, |
|
"learning_rate": 2.7862393833291036e-06, |
|
"loss": 0.0152, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 17.98732171156894, |
|
"grad_norm": 0.13291296362876892, |
|
"learning_rate": 2.743275447297733e-06, |
|
"loss": 0.0142, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 18.003169572107765, |
|
"grad_norm": 0.21296854317188263, |
|
"learning_rate": 2.7006359981848196e-06, |
|
"loss": 0.0135, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 18.019017432646592, |
|
"grad_norm": 0.16779382526874542, |
|
"learning_rate": 2.6583213287729115e-06, |
|
"loss": 0.0107, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 18.03486529318542, |
|
"grad_norm": 0.3331531882286072, |
|
"learning_rate": 2.616331729614424e-06, |
|
"loss": 0.015, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 18.050713153724246, |
|
"grad_norm": 0.18505552411079407, |
|
"learning_rate": 2.574667489029725e-06, |
|
"loss": 0.0143, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 18.066561014263076, |
|
"grad_norm": 0.2790921628475189, |
|
"learning_rate": 2.533328893105108e-06, |
|
"loss": 0.0161, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 18.082408874801903, |
|
"grad_norm": 0.2316390722990036, |
|
"learning_rate": 2.492316225690827e-06, |
|
"loss": 0.0158, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 18.09825673534073, |
|
"grad_norm": 0.2133951485157013, |
|
"learning_rate": 2.4516297683991773e-06, |
|
"loss": 0.014, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 18.114104595879557, |
|
"grad_norm": 0.23200847208499908, |
|
"learning_rate": 2.411269800602517e-06, |
|
"loss": 0.013, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 18.129952456418383, |
|
"grad_norm": 0.18895216286182404, |
|
"learning_rate": 2.371236599431387e-06, |
|
"loss": 0.0163, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 18.14580031695721, |
|
"grad_norm": 0.18238064646720886, |
|
"learning_rate": 2.3315304397726e-06, |
|
"loss": 0.0124, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 18.161648177496037, |
|
"grad_norm": 0.1541883647441864, |
|
"learning_rate": 2.2921515942673276e-06, |
|
"loss": 0.0119, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 18.177496038034864, |
|
"grad_norm": 0.1508757770061493, |
|
"learning_rate": 2.2531003333092826e-06, |
|
"loss": 0.0179, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 18.193343898573694, |
|
"grad_norm": 0.18820171058177948, |
|
"learning_rate": 2.2143769250427883e-06, |
|
"loss": 0.0123, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 18.20919175911252, |
|
"grad_norm": 0.15870815515518188, |
|
"learning_rate": 2.175981635361013e-06, |
|
"loss": 0.0126, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 18.225039619651348, |
|
"grad_norm": 0.16902989149093628, |
|
"learning_rate": 2.1379147279040777e-06, |
|
"loss": 0.0123, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 18.240887480190175, |
|
"grad_norm": 0.12179669737815857, |
|
"learning_rate": 2.1001764640572963e-06, |
|
"loss": 0.0154, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 18.256735340729, |
|
"grad_norm": 0.23037730157375336, |
|
"learning_rate": 2.0627671029493535e-06, |
|
"loss": 0.0153, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 18.272583201267828, |
|
"grad_norm": 0.21997253596782684, |
|
"learning_rate": 2.02568690145053e-06, |
|
"loss": 0.0123, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 18.288431061806655, |
|
"grad_norm": 0.1361498236656189, |
|
"learning_rate": 1.988936114170953e-06, |
|
"loss": 0.0179, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 18.304278922345482, |
|
"grad_norm": 0.20903484523296356, |
|
"learning_rate": 1.9525149934588314e-06, |
|
"loss": 0.0118, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 18.320126782884312, |
|
"grad_norm": 0.34163960814476013, |
|
"learning_rate": 1.916423789398725e-06, |
|
"loss": 0.0145, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 18.33597464342314, |
|
"grad_norm": 0.226941779255867, |
|
"learning_rate": 1.8806627498098305e-06, |
|
"loss": 0.0139, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 18.351822503961966, |
|
"grad_norm": 0.1451648324728012, |
|
"learning_rate": 1.8452321202442724e-06, |
|
"loss": 0.0116, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 18.367670364500793, |
|
"grad_norm": 0.19719494879245758, |
|
"learning_rate": 1.810132143985438e-06, |
|
"loss": 0.0123, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 18.38351822503962, |
|
"grad_norm": 0.1801900416612625, |
|
"learning_rate": 1.7753630620463035e-06, |
|
"loss": 0.0094, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 18.399366085578446, |
|
"grad_norm": 0.17285539209842682, |
|
"learning_rate": 1.740925113167735e-06, |
|
"loss": 0.0184, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 18.415213946117273, |
|
"grad_norm": 0.1344527304172516, |
|
"learning_rate": 1.7068185338169174e-06, |
|
"loss": 0.0123, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 18.4310618066561, |
|
"grad_norm": 0.21449725329875946, |
|
"learning_rate": 1.6730435581856719e-06, |
|
"loss": 0.0127, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 18.44690966719493, |
|
"grad_norm": 0.153366357088089, |
|
"learning_rate": 1.6396004181888803e-06, |
|
"loss": 0.0115, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 18.462757527733757, |
|
"grad_norm": 0.20724429190158844, |
|
"learning_rate": 1.6064893434628914e-06, |
|
"loss": 0.0213, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 18.478605388272584, |
|
"grad_norm": 0.20763236284255981, |
|
"learning_rate": 1.5737105613639336e-06, |
|
"loss": 0.0165, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 18.49445324881141, |
|
"grad_norm": 0.1959114670753479, |
|
"learning_rate": 1.5412642969665546e-06, |
|
"loss": 0.0165, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 18.510301109350237, |
|
"grad_norm": 0.1801106333732605, |
|
"learning_rate": 1.5091507730620735e-06, |
|
"loss": 0.0201, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 18.526148969889064, |
|
"grad_norm": 0.23710688948631287, |
|
"learning_rate": 1.4773702101570807e-06, |
|
"loss": 0.0127, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 18.54199683042789, |
|
"grad_norm": 0.19984515011310577, |
|
"learning_rate": 1.4459228264718683e-06, |
|
"loss": 0.0134, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 18.557844690966718, |
|
"grad_norm": 0.34799715876579285, |
|
"learning_rate": 1.41480883793898e-06, |
|
"loss": 0.0139, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 18.573692551505548, |
|
"grad_norm": 0.23937344551086426, |
|
"learning_rate": 1.3840284582017193e-06, |
|
"loss": 0.0168, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 18.589540412044375, |
|
"grad_norm": 0.1457284539937973, |
|
"learning_rate": 1.3535818986126492e-06, |
|
"loss": 0.0149, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 18.605388272583202, |
|
"grad_norm": 0.14516040682792664, |
|
"learning_rate": 1.3234693682321886e-06, |
|
"loss": 0.0136, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 18.62123613312203, |
|
"grad_norm": 0.1960999220609665, |
|
"learning_rate": 1.2936910738271524e-06, |
|
"loss": 0.0197, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 18.637083993660855, |
|
"grad_norm": 0.17722713947296143, |
|
"learning_rate": 1.264247219869319e-06, |
|
"loss": 0.0101, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 18.652931854199682, |
|
"grad_norm": 0.12621328234672546, |
|
"learning_rate": 1.2351380085340592e-06, |
|
"loss": 0.0099, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 18.66877971473851, |
|
"grad_norm": 0.19239826500415802, |
|
"learning_rate": 1.206363639698921e-06, |
|
"loss": 0.017, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 18.684627575277336, |
|
"grad_norm": 0.21515102684497833, |
|
"learning_rate": 1.1779243109422632e-06, |
|
"loss": 0.016, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 18.700475435816166, |
|
"grad_norm": 0.32155877351760864, |
|
"learning_rate": 1.1498202175419136e-06, |
|
"loss": 0.0189, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 18.716323296354993, |
|
"grad_norm": 0.1156759113073349, |
|
"learning_rate": 1.1220515524738017e-06, |
|
"loss": 0.0114, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 18.73217115689382, |
|
"grad_norm": 0.17164525389671326, |
|
"learning_rate": 1.0946185064106552e-06, |
|
"loss": 0.0142, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 18.748019017432647, |
|
"grad_norm": 0.15515750646591187, |
|
"learning_rate": 1.0675212677206892e-06, |
|
"loss": 0.0129, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 18.763866877971473, |
|
"grad_norm": 0.17644274234771729, |
|
"learning_rate": 1.0407600224662917e-06, |
|
"loss": 0.0171, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 18.7797147385103, |
|
"grad_norm": 0.1408577710390091, |
|
"learning_rate": 1.0143349544027791e-06, |
|
"loss": 0.0149, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 18.795562599049127, |
|
"grad_norm": 0.23002204298973083, |
|
"learning_rate": 9.882462449771035e-07, |
|
"loss": 0.0166, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 18.811410459587954, |
|
"grad_norm": 0.16936403512954712, |
|
"learning_rate": 9.624940733266363e-07, |
|
"loss": 0.0141, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 18.827258320126784, |
|
"grad_norm": 0.18246498703956604, |
|
"learning_rate": 9.370786162779033e-07, |
|
"loss": 0.0146, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 18.84310618066561, |
|
"grad_norm": 0.14892500638961792, |
|
"learning_rate": 9.120000483453961e-07, |
|
"loss": 0.0146, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 18.858954041204438, |
|
"grad_norm": 0.23931749165058136, |
|
"learning_rate": 8.872585417303736e-07, |
|
"loss": 0.0165, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 18.874801901743265, |
|
"grad_norm": 0.12671174108982086, |
|
"learning_rate": 8.628542663196625e-07, |
|
"loss": 0.0148, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 18.89064976228209, |
|
"grad_norm": 0.12941120564937592, |
|
"learning_rate": 8.387873896845144e-07, |
|
"loss": 0.014, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 18.90649762282092, |
|
"grad_norm": 0.17071138322353363, |
|
"learning_rate": 8.150580770794336e-07, |
|
"loss": 0.0117, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 18.922345483359745, |
|
"grad_norm": 0.18729938566684723, |
|
"learning_rate": 7.916664914410455e-07, |
|
"loss": 0.0163, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 18.938193343898575, |
|
"grad_norm": 0.15300016105175018, |
|
"learning_rate": 7.686127933869968e-07, |
|
"loss": 0.0136, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 18.954041204437402, |
|
"grad_norm": 0.24584755301475525, |
|
"learning_rate": 7.458971412148241e-07, |
|
"loss": 0.0124, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 18.96988906497623, |
|
"grad_norm": 0.1396564543247223, |
|
"learning_rate": 7.235196909008924e-07, |
|
"loss": 0.0105, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 18.985736925515056, |
|
"grad_norm": 0.45825517177581787, |
|
"learning_rate": 7.014805960993131e-07, |
|
"loss": 0.0173, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 19.001584786053883, |
|
"grad_norm": 0.3947998285293579, |
|
"learning_rate": 6.797800081408845e-07, |
|
"loss": 0.0108, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 19.01743264659271, |
|
"grad_norm": 0.16845867037773132, |
|
"learning_rate": 6.584180760320635e-07, |
|
"loss": 0.0125, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 19.033280507131536, |
|
"grad_norm": 0.17425452172756195, |
|
"learning_rate": 6.373949464539286e-07, |
|
"loss": 0.0124, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 19.049128367670363, |
|
"grad_norm": 0.29528307914733887, |
|
"learning_rate": 6.167107637611858e-07, |
|
"loss": 0.012, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 19.064976228209193, |
|
"grad_norm": 0.22592425346374512, |
|
"learning_rate": 5.963656699811693e-07, |
|
"loss": 0.0128, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 19.08082408874802, |
|
"grad_norm": 0.2710273265838623, |
|
"learning_rate": 5.763598048128704e-07, |
|
"loss": 0.0118, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 19.096671949286847, |
|
"grad_norm": 0.21170739829540253, |
|
"learning_rate": 5.566933056259882e-07, |
|
"loss": 0.0149, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 19.112519809825674, |
|
"grad_norm": 0.20141953229904175, |
|
"learning_rate": 5.373663074599522e-07, |
|
"loss": 0.0119, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 19.1283676703645, |
|
"grad_norm": 0.19637355208396912, |
|
"learning_rate": 5.183789430230346e-07, |
|
"loss": 0.0136, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 19.144215530903328, |
|
"grad_norm": 0.16335146129131317, |
|
"learning_rate": 4.99731342691423e-07, |
|
"loss": 0.0138, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 19.160063391442154, |
|
"grad_norm": 0.16980670392513275, |
|
"learning_rate": 4.814236345083156e-07, |
|
"loss": 0.0146, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 19.17591125198098, |
|
"grad_norm": 0.3072208762168884, |
|
"learning_rate": 4.6345594418304996e-07, |
|
"loss": 0.0141, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 19.19175911251981, |
|
"grad_norm": 0.18183228373527527, |
|
"learning_rate": 4.458283950902642e-07, |
|
"loss": 0.014, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 19.20760697305864, |
|
"grad_norm": 0.160085067152977, |
|
"learning_rate": 4.285411082689927e-07, |
|
"loss": 0.0131, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 19.223454833597465, |
|
"grad_norm": 0.24544082581996918, |
|
"learning_rate": 4.115942024218944e-07, |
|
"loss": 0.011, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 19.239302694136292, |
|
"grad_norm": 0.21625439822673798, |
|
"learning_rate": 3.9498779391439754e-07, |
|
"loss": 0.0135, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 19.25515055467512, |
|
"grad_norm": 0.23580490052700043, |
|
"learning_rate": 3.787219967739231e-07, |
|
"loss": 0.0131, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 19.270998415213946, |
|
"grad_norm": 0.3027212917804718, |
|
"learning_rate": 3.627969226890959e-07, |
|
"loss": 0.0146, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 19.286846275752772, |
|
"grad_norm": 0.13139936327934265, |
|
"learning_rate": 3.4721268100896265e-07, |
|
"loss": 0.0111, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 19.3026941362916, |
|
"grad_norm": 0.18760831654071808, |
|
"learning_rate": 3.319693787422751e-07, |
|
"loss": 0.0177, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 19.31854199683043, |
|
"grad_norm": 0.1672467142343521, |
|
"learning_rate": 3.170671205567133e-07, |
|
"loss": 0.0183, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 19.334389857369256, |
|
"grad_norm": 0.13311269879341125, |
|
"learning_rate": 3.025060087782028e-07, |
|
"loss": 0.0158, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 19.350237717908083, |
|
"grad_norm": 0.1697445511817932, |
|
"learning_rate": 2.8828614339018735e-07, |
|
"loss": 0.0118, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 19.36608557844691, |
|
"grad_norm": 0.12869006395339966, |
|
"learning_rate": 2.744076220329628e-07, |
|
"loss": 0.0131, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 19.381933438985737, |
|
"grad_norm": 0.1751239150762558, |
|
"learning_rate": 2.6087054000298874e-07, |
|
"loss": 0.0163, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 19.397781299524564, |
|
"grad_norm": 0.23643170297145844, |
|
"learning_rate": 2.4767499025223904e-07, |
|
"loss": 0.0092, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 19.41362916006339, |
|
"grad_norm": 0.11966560781002045, |
|
"learning_rate": 2.3482106338758025e-07, |
|
"loss": 0.0106, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 19.429477020602217, |
|
"grad_norm": 0.15015937387943268, |
|
"learning_rate": 2.2230884767011628e-07, |
|
"loss": 0.0139, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 19.445324881141048, |
|
"grad_norm": 0.21535207331180573, |
|
"learning_rate": 2.101384290146169e-07, |
|
"loss": 0.0129, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 19.461172741679874, |
|
"grad_norm": 0.19800494611263275, |
|
"learning_rate": 1.9830989098890142e-07, |
|
"loss": 0.0098, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 19.4770206022187, |
|
"grad_norm": 0.18215620517730713, |
|
"learning_rate": 1.8682331481328364e-07, |
|
"loss": 0.0137, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 19.492868462757528, |
|
"grad_norm": 0.18857750296592712, |
|
"learning_rate": 1.756787793600001e-07, |
|
"loss": 0.0127, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 19.508716323296355, |
|
"grad_norm": 0.23549525439739227, |
|
"learning_rate": 1.6487636115268824e-07, |
|
"loss": 0.0144, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 19.52456418383518, |
|
"grad_norm": 0.16930314898490906, |
|
"learning_rate": 1.5441613436582014e-07, |
|
"loss": 0.0136, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 19.54041204437401, |
|
"grad_norm": 0.20466506481170654, |
|
"learning_rate": 1.4429817082425302e-07, |
|
"loss": 0.015, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 19.556259904912835, |
|
"grad_norm": 0.2871796190738678, |
|
"learning_rate": 1.3452254000267394e-07, |
|
"loss": 0.0117, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 19.572107765451666, |
|
"grad_norm": 0.2035956084728241, |
|
"learning_rate": 1.2508930902517813e-07, |
|
"loss": 0.0137, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 19.587955625990492, |
|
"grad_norm": 0.096625417470932, |
|
"learning_rate": 1.1599854266476918e-07, |
|
"loss": 0.0136, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 19.60380348652932, |
|
"grad_norm": 0.1756078600883484, |
|
"learning_rate": 1.0725030334292064e-07, |
|
"loss": 0.0157, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 19.619651347068146, |
|
"grad_norm": 0.12860575318336487, |
|
"learning_rate": 9.884465112917074e-08, |
|
"loss": 0.0138, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 19.635499207606973, |
|
"grad_norm": 0.2300836741924286, |
|
"learning_rate": 9.078164374067833e-08, |
|
"loss": 0.0155, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 19.6513470681458, |
|
"grad_norm": 0.16416415572166443, |
|
"learning_rate": 8.306133654185089e-08, |
|
"loss": 0.0114, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 19.667194928684626, |
|
"grad_norm": 0.12426438182592392, |
|
"learning_rate": 7.568378254395047e-08, |
|
"loss": 0.014, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 19.683042789223453, |
|
"grad_norm": 0.16134649515151978, |
|
"learning_rate": 6.864903240474397e-08, |
|
"loss": 0.0129, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 19.698890649762284, |
|
"grad_norm": 0.10580016672611237, |
|
"learning_rate": 6.195713442812556e-08, |
|
"loss": 0.0098, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 19.71473851030111, |
|
"grad_norm": 0.25324514508247375, |
|
"learning_rate": 5.560813456382818e-08, |
|
"loss": 0.0145, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 19.730586370839937, |
|
"grad_norm": 0.10774058848619461, |
|
"learning_rate": 4.96020764070626e-08, |
|
"loss": 0.0117, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 19.746434231378764, |
|
"grad_norm": 0.14597612619400024, |
|
"learning_rate": 4.393900119826211e-08, |
|
"loss": 0.0203, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 19.76228209191759, |
|
"grad_norm": 0.22840741276741028, |
|
"learning_rate": 3.861894782276609e-08, |
|
"loss": 0.0155, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 19.778129952456418, |
|
"grad_norm": 0.17510531842708588, |
|
"learning_rate": 3.3641952810559155e-08, |
|
"loss": 0.015, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 19.793977812995244, |
|
"grad_norm": 0.1512196958065033, |
|
"learning_rate": 2.9008050336032376e-08, |
|
"loss": 0.0137, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 19.80982567353407, |
|
"grad_norm": 0.3339158296585083, |
|
"learning_rate": 2.471727221775022e-08, |
|
"loss": 0.0121, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 19.8256735340729, |
|
"grad_norm": 0.17482948303222656, |
|
"learning_rate": 2.0769647918206237e-08, |
|
"loss": 0.0122, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 19.84152139461173, |
|
"grad_norm": 0.19882513582706451, |
|
"learning_rate": 1.7165204543656554e-08, |
|
"loss": 0.0136, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 19.857369255150555, |
|
"grad_norm": 0.2373553216457367, |
|
"learning_rate": 1.3903966843897831e-08, |
|
"loss": 0.0152, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 19.873217115689382, |
|
"grad_norm": 0.17455127835273743, |
|
"learning_rate": 1.0985957212122922e-08, |
|
"loss": 0.0108, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 19.88906497622821, |
|
"grad_norm": 0.10235228389501572, |
|
"learning_rate": 8.411195684765449e-09, |
|
"loss": 0.0143, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 19.904912836767036, |
|
"grad_norm": 0.3095182776451111, |
|
"learning_rate": 6.179699941349926e-09, |
|
"loss": 0.011, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 19.920760697305862, |
|
"grad_norm": 0.11435042321681976, |
|
"learning_rate": 4.291485304375176e-09, |
|
"loss": 0.0132, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 19.936608557844693, |
|
"grad_norm": 0.22614504396915436, |
|
"learning_rate": 2.7465647392088676e-09, |
|
"loss": 0.0135, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 19.95245641838352, |
|
"grad_norm": 0.22306819260120392, |
|
"learning_rate": 1.544948854009798e-09, |
|
"loss": 0.0141, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 19.968304278922346, |
|
"grad_norm": 0.16913668811321259, |
|
"learning_rate": 6.866458996279689e-10, |
|
"loss": 0.0131, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 19.984152139461173, |
|
"grad_norm": 0.28053924441337585, |
|
"learning_rate": 1.7166176958238746e-10, |
|
"loss": 0.0113, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.22396335005760193, |
|
"learning_rate": 0.0, |
|
"loss": 0.0095, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 12620, |
|
"total_flos": 1.0795351858465092e+18, |
|
"train_loss": 0.03321190329177066, |
|
"train_runtime": 10224.5982, |
|
"train_samples_per_second": 33.32, |
|
"train_steps_per_second": 1.234 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 12620, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0795351858465092e+18, |
|
"train_batch_size": 27, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|