|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 9940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005030181086519115, |
|
"grad_norm": 7.163974285125732, |
|
"learning_rate": 4.024144869215292e-06, |
|
"loss": 0.8528, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01006036217303823, |
|
"grad_norm": 5.244224548339844, |
|
"learning_rate": 8.048289738430584e-06, |
|
"loss": 0.7448, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015090543259557344, |
|
"grad_norm": 2.7584030628204346, |
|
"learning_rate": 1.2072434607645876e-05, |
|
"loss": 0.5215, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02012072434607646, |
|
"grad_norm": 2.765652656555176, |
|
"learning_rate": 1.609657947686117e-05, |
|
"loss": 0.3683, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.025150905432595575, |
|
"grad_norm": 2.0063560009002686, |
|
"learning_rate": 2.012072434607646e-05, |
|
"loss": 0.2833, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.030181086519114688, |
|
"grad_norm": 3.22554087638855, |
|
"learning_rate": 2.4144869215291752e-05, |
|
"loss": 0.2435, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.035211267605633804, |
|
"grad_norm": 1.5418182611465454, |
|
"learning_rate": 2.8169014084507046e-05, |
|
"loss": 0.188, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04024144869215292, |
|
"grad_norm": 7.7757697105407715, |
|
"learning_rate": 3.219315895372234e-05, |
|
"loss": 0.1598, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04527162977867203, |
|
"grad_norm": 2.0921378135681152, |
|
"learning_rate": 3.621730382293763e-05, |
|
"loss": 0.1485, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05030181086519115, |
|
"grad_norm": 1.6707618236541748, |
|
"learning_rate": 4.024144869215292e-05, |
|
"loss": 0.1427, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05533199195171026, |
|
"grad_norm": 2.270603895187378, |
|
"learning_rate": 4.426559356136821e-05, |
|
"loss": 0.1314, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.060362173038229376, |
|
"grad_norm": 1.4634528160095215, |
|
"learning_rate": 4.8289738430583503e-05, |
|
"loss": 0.1273, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06539235412474849, |
|
"grad_norm": 1.9672114849090576, |
|
"learning_rate": 5.2313883299798795e-05, |
|
"loss": 0.0889, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07042253521126761, |
|
"grad_norm": 1.466834545135498, |
|
"learning_rate": 5.633802816901409e-05, |
|
"loss": 0.1037, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07545271629778671, |
|
"grad_norm": 1.9027611017227173, |
|
"learning_rate": 6.036217303822938e-05, |
|
"loss": 0.1027, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08048289738430583, |
|
"grad_norm": 0.8662084937095642, |
|
"learning_rate": 6.438631790744468e-05, |
|
"loss": 0.0917, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08551307847082495, |
|
"grad_norm": 2.0497639179229736, |
|
"learning_rate": 6.841046277665996e-05, |
|
"loss": 0.0964, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09054325955734406, |
|
"grad_norm": 1.6987979412078857, |
|
"learning_rate": 7.243460764587526e-05, |
|
"loss": 0.0868, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09557344064386318, |
|
"grad_norm": 0.845194935798645, |
|
"learning_rate": 7.645875251509054e-05, |
|
"loss": 0.1019, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1006036217303823, |
|
"grad_norm": 1.4700064659118652, |
|
"learning_rate": 8.048289738430584e-05, |
|
"loss": 0.1007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1056338028169014, |
|
"grad_norm": 1.8693934679031372, |
|
"learning_rate": 8.450704225352113e-05, |
|
"loss": 0.0897, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11066398390342053, |
|
"grad_norm": 1.1521185636520386, |
|
"learning_rate": 8.853118712273642e-05, |
|
"loss": 0.1064, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11569416498993963, |
|
"grad_norm": 1.094040036201477, |
|
"learning_rate": 9.255533199195171e-05, |
|
"loss": 0.0706, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12072434607645875, |
|
"grad_norm": 1.2210173606872559, |
|
"learning_rate": 9.657947686116701e-05, |
|
"loss": 0.078, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12575452716297786, |
|
"grad_norm": 0.688697874546051, |
|
"learning_rate": 0.00010060362173038229, |
|
"loss": 0.0685, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13078470824949698, |
|
"grad_norm": 0.9590277075767517, |
|
"learning_rate": 0.00010462776659959759, |
|
"loss": 0.0799, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1358148893360161, |
|
"grad_norm": 0.8572778105735779, |
|
"learning_rate": 0.00010865191146881289, |
|
"loss": 0.0705, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"grad_norm": 0.8274833559989929, |
|
"learning_rate": 0.00011267605633802819, |
|
"loss": 0.0887, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14587525150905434, |
|
"grad_norm": 0.9543363451957703, |
|
"learning_rate": 0.00011670020120724347, |
|
"loss": 0.0618, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15090543259557343, |
|
"grad_norm": 0.7259362936019897, |
|
"learning_rate": 0.00012072434607645876, |
|
"loss": 0.075, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15593561368209255, |
|
"grad_norm": 0.8904047012329102, |
|
"learning_rate": 0.00012474849094567405, |
|
"loss": 0.07, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16096579476861167, |
|
"grad_norm": 0.7555816173553467, |
|
"learning_rate": 0.00012877263581488935, |
|
"loss": 0.0677, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1659959758551308, |
|
"grad_norm": 0.7622585892677307, |
|
"learning_rate": 0.00013279678068410465, |
|
"loss": 0.0762, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1710261569416499, |
|
"grad_norm": 1.1940929889678955, |
|
"learning_rate": 0.00013682092555331992, |
|
"loss": 0.059, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.176056338028169, |
|
"grad_norm": 0.582521915435791, |
|
"learning_rate": 0.00014084507042253522, |
|
"loss": 0.0732, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18108651911468812, |
|
"grad_norm": 0.858220636844635, |
|
"learning_rate": 0.00014486921529175052, |
|
"loss": 0.06, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.18611670020120724, |
|
"grad_norm": 0.5738099217414856, |
|
"learning_rate": 0.0001488933601609658, |
|
"loss": 0.0568, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19114688128772636, |
|
"grad_norm": 0.581499457359314, |
|
"learning_rate": 0.00015291750503018109, |
|
"loss": 0.062, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.19617706237424548, |
|
"grad_norm": 0.5559177994728088, |
|
"learning_rate": 0.00015694164989939638, |
|
"loss": 0.0774, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2012072434607646, |
|
"grad_norm": 0.6640293598175049, |
|
"learning_rate": 0.00016096579476861168, |
|
"loss": 0.058, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2062374245472837, |
|
"grad_norm": 0.5883966088294983, |
|
"learning_rate": 0.00016498993963782695, |
|
"loss": 0.0625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2112676056338028, |
|
"grad_norm": 0.5281286835670471, |
|
"learning_rate": 0.00016901408450704225, |
|
"loss": 0.0733, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21629778672032193, |
|
"grad_norm": 0.46224042773246765, |
|
"learning_rate": 0.00017303822937625755, |
|
"loss": 0.0644, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22132796780684105, |
|
"grad_norm": 0.4865374267101288, |
|
"learning_rate": 0.00017706237424547285, |
|
"loss": 0.0677, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.22635814889336017, |
|
"grad_norm": 1.1236791610717773, |
|
"learning_rate": 0.00018108651911468815, |
|
"loss": 0.0719, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23138832997987926, |
|
"grad_norm": 1.1186903715133667, |
|
"learning_rate": 0.00018511066398390342, |
|
"loss": 0.064, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23641851106639838, |
|
"grad_norm": 0.6385508179664612, |
|
"learning_rate": 0.00018913480885311872, |
|
"loss": 0.074, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2414486921529175, |
|
"grad_norm": 1.3683863878250122, |
|
"learning_rate": 0.00019315895372233401, |
|
"loss": 0.0626, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.24647887323943662, |
|
"grad_norm": 0.5213209390640259, |
|
"learning_rate": 0.0001971830985915493, |
|
"loss": 0.0585, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2515090543259557, |
|
"grad_norm": 0.988146960735321, |
|
"learning_rate": 0.00019999995019278672, |
|
"loss": 0.0479, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25653923541247486, |
|
"grad_norm": 0.707419216632843, |
|
"learning_rate": 0.0001999990647325972, |
|
"loss": 0.0558, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26156941649899396, |
|
"grad_norm": 0.5720793604850769, |
|
"learning_rate": 0.0001999970724567263, |
|
"loss": 0.0383, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2665995975855131, |
|
"grad_norm": 0.53882896900177, |
|
"learning_rate": 0.00019999397338722502, |
|
"loss": 0.0565, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2716297786720322, |
|
"grad_norm": 0.8112373352050781, |
|
"learning_rate": 0.00019998976755839472, |
|
"loss": 0.0647, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2766599597585513, |
|
"grad_norm": 0.7053709626197815, |
|
"learning_rate": 0.00019998445501678657, |
|
"loss": 0.0533, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"grad_norm": 0.40134745836257935, |
|
"learning_rate": 0.0001999780358212011, |
|
"loss": 0.0621, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.28672032193158953, |
|
"grad_norm": 0.9213513135910034, |
|
"learning_rate": 0.00019997051004268777, |
|
"loss": 0.0729, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2917505030181087, |
|
"grad_norm": 0.6868863105773926, |
|
"learning_rate": 0.00019996187776454374, |
|
"loss": 0.0636, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.29678068410462777, |
|
"grad_norm": 1.0402840375900269, |
|
"learning_rate": 0.0001999521390823134, |
|
"loss": 0.0561, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.30181086519114686, |
|
"grad_norm": 0.6680485010147095, |
|
"learning_rate": 0.00019994129410378695, |
|
"loss": 0.0551, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.306841046277666, |
|
"grad_norm": 0.6051433682441711, |
|
"learning_rate": 0.00019992934294899944, |
|
"loss": 0.0596, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3118712273641851, |
|
"grad_norm": 0.5517066121101379, |
|
"learning_rate": 0.00019991628575022946, |
|
"loss": 0.057, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.31690140845070425, |
|
"grad_norm": 0.4259048104286194, |
|
"learning_rate": 0.00019990212265199738, |
|
"loss": 0.0451, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.32193158953722334, |
|
"grad_norm": 0.4159329831600189, |
|
"learning_rate": 0.0001998868538110641, |
|
"loss": 0.0493, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.32696177062374243, |
|
"grad_norm": 0.5596720576286316, |
|
"learning_rate": 0.0001998704793964291, |
|
"loss": 0.0556, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3319919517102616, |
|
"grad_norm": 0.821570098400116, |
|
"learning_rate": 0.00019985299958932866, |
|
"loss": 0.0517, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.33702213279678067, |
|
"grad_norm": 0.6846859455108643, |
|
"learning_rate": 0.0001998344145832339, |
|
"loss": 0.0367, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3420523138832998, |
|
"grad_norm": 0.6848050951957703, |
|
"learning_rate": 0.00019981472458384844, |
|
"loss": 0.0536, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3470824949698189, |
|
"grad_norm": 2.0011439323425293, |
|
"learning_rate": 0.00019979392980910637, |
|
"loss": 0.0505, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"grad_norm": 0.7617112994194031, |
|
"learning_rate": 0.00019977203048916962, |
|
"loss": 0.0518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.360905259847641, |
|
"learning_rate": 0.00019974902686642558, |
|
"loss": 0.0545, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.36217303822937624, |
|
"grad_norm": 0.8060867190361023, |
|
"learning_rate": 0.00019972491919548438, |
|
"loss": 0.0481, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3672032193158954, |
|
"grad_norm": 0.7293612360954285, |
|
"learning_rate": 0.00019969970774317593, |
|
"loss": 0.0676, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3722334004024145, |
|
"grad_norm": 0.5890267491340637, |
|
"learning_rate": 0.00019967339278854714, |
|
"loss": 0.0571, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3772635814889336, |
|
"grad_norm": 0.7763799428939819, |
|
"learning_rate": 0.00019964597462285888, |
|
"loss": 0.057, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3822937625754527, |
|
"grad_norm": 0.5891818404197693, |
|
"learning_rate": 0.00019961745354958246, |
|
"loss": 0.0473, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3873239436619718, |
|
"grad_norm": 0.41080498695373535, |
|
"learning_rate": 0.00019958782988439654, |
|
"loss": 0.0506, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.39235412474849096, |
|
"grad_norm": 0.46914100646972656, |
|
"learning_rate": 0.00019955710395518363, |
|
"loss": 0.053, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.39738430583501005, |
|
"grad_norm": 0.8423359990119934, |
|
"learning_rate": 0.00019952527610202624, |
|
"loss": 0.0429, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4024144869215292, |
|
"grad_norm": 0.5299778580665588, |
|
"learning_rate": 0.00019949234667720336, |
|
"loss": 0.0499, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4074446680080483, |
|
"grad_norm": 0.5854944586753845, |
|
"learning_rate": 0.00019945831604518645, |
|
"loss": 0.0473, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4124748490945674, |
|
"grad_norm": 0.6145129203796387, |
|
"learning_rate": 0.0001994231845826354, |
|
"loss": 0.0645, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.41750503018108653, |
|
"grad_norm": 0.6725190877914429, |
|
"learning_rate": 0.00019938695267839436, |
|
"loss": 0.0511, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"grad_norm": 0.5802650451660156, |
|
"learning_rate": 0.0001993496207334875, |
|
"loss": 0.0388, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4275653923541248, |
|
"grad_norm": 0.6143836975097656, |
|
"learning_rate": 0.00019931118916111448, |
|
"loss": 0.043, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.43259557344064387, |
|
"grad_norm": 0.6360625624656677, |
|
"learning_rate": 0.00019927165838664598, |
|
"loss": 0.0541, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.43762575452716296, |
|
"grad_norm": 0.49990031123161316, |
|
"learning_rate": 0.00019923102884761892, |
|
"loss": 0.0412, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4426559356136821, |
|
"grad_norm": 0.6556900143623352, |
|
"learning_rate": 0.00019918930099373157, |
|
"loss": 0.055, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4476861167002012, |
|
"grad_norm": 0.42863693833351135, |
|
"learning_rate": 0.00019914647528683865, |
|
"loss": 0.046, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.45271629778672035, |
|
"grad_norm": 0.31682249903678894, |
|
"learning_rate": 0.00019910255220094634, |
|
"loss": 0.0477, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45774647887323944, |
|
"grad_norm": 0.5968728065490723, |
|
"learning_rate": 0.0001990575322222067, |
|
"loss": 0.0469, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.46277665995975853, |
|
"grad_norm": 0.4193131923675537, |
|
"learning_rate": 0.00019901141584891262, |
|
"loss": 0.0584, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4678068410462777, |
|
"grad_norm": 0.5087130069732666, |
|
"learning_rate": 0.00019896420359149207, |
|
"loss": 0.0595, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.47283702213279677, |
|
"grad_norm": 0.3542875349521637, |
|
"learning_rate": 0.00019891589597250265, |
|
"loss": 0.0384, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4778672032193159, |
|
"grad_norm": 0.3090812563896179, |
|
"learning_rate": 0.00019886649352662567, |
|
"loss": 0.0427, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.482897384305835, |
|
"grad_norm": 0.6682177782058716, |
|
"learning_rate": 0.00019881599680066024, |
|
"loss": 0.0565, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4879275653923541, |
|
"grad_norm": 0.4167076349258423, |
|
"learning_rate": 0.0001987644063535173, |
|
"loss": 0.0392, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.49295774647887325, |
|
"grad_norm": 0.4293626546859741, |
|
"learning_rate": 0.00019871172275621332, |
|
"loss": 0.0409, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.49798792756539234, |
|
"grad_norm": 0.6032306551933289, |
|
"learning_rate": 0.00019865794659186406, |
|
"loss": 0.0453, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5030181086519114, |
|
"grad_norm": 0.5772558450698853, |
|
"learning_rate": 0.00019860307845567815, |
|
"loss": 0.0413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5080482897384306, |
|
"grad_norm": 0.5984110236167908, |
|
"learning_rate": 0.00019854711895495036, |
|
"loss": 0.0551, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5130784708249497, |
|
"grad_norm": 0.46663838624954224, |
|
"learning_rate": 0.00019849006870905503, |
|
"loss": 0.0369, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5181086519114688, |
|
"grad_norm": 0.5539775490760803, |
|
"learning_rate": 0.00019843192834943912, |
|
"loss": 0.0378, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5231388329979879, |
|
"grad_norm": 0.5635536313056946, |
|
"learning_rate": 0.0001983726985196153, |
|
"loss": 0.0394, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.528169014084507, |
|
"grad_norm": 0.6095893979072571, |
|
"learning_rate": 0.00019831237987515474, |
|
"loss": 0.0354, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5331991951710262, |
|
"grad_norm": 0.4370132386684418, |
|
"learning_rate": 0.00019825097308367987, |
|
"loss": 0.0477, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5382293762575453, |
|
"grad_norm": 0.7766854763031006, |
|
"learning_rate": 0.00019818847882485704, |
|
"loss": 0.0368, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5432595573440644, |
|
"grad_norm": 0.8621564507484436, |
|
"learning_rate": 0.00019812489779038903, |
|
"loss": 0.0493, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5482897384305835, |
|
"grad_norm": 0.41698649525642395, |
|
"learning_rate": 0.00019806023068400723, |
|
"loss": 0.047, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5533199195171026, |
|
"grad_norm": 0.334310382604599, |
|
"learning_rate": 0.00019799447822146403, |
|
"loss": 0.0352, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5583501006036218, |
|
"grad_norm": 0.36509206891059875, |
|
"learning_rate": 0.0001979276411305248, |
|
"loss": 0.0387, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 0.49226775765419006, |
|
"learning_rate": 0.00019785972015095988, |
|
"loss": 0.0412, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.56841046277666, |
|
"grad_norm": 0.48167258501052856, |
|
"learning_rate": 0.0001977907160345363, |
|
"loss": 0.0454, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5734406438631791, |
|
"grad_norm": 0.43359094858169556, |
|
"learning_rate": 0.00019772062954500965, |
|
"loss": 0.0355, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5784708249496981, |
|
"grad_norm": 0.4004541039466858, |
|
"learning_rate": 0.00019764946145811542, |
|
"loss": 0.0401, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5835010060362174, |
|
"grad_norm": 0.4662073254585266, |
|
"learning_rate": 0.00019757721256156047, |
|
"loss": 0.0382, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5885311871227364, |
|
"grad_norm": 0.3257390260696411, |
|
"learning_rate": 0.00019750388365501447, |
|
"loss": 0.042, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5935613682092555, |
|
"grad_norm": 0.5180749297142029, |
|
"learning_rate": 0.0001974294755501008, |
|
"loss": 0.0428, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5985915492957746, |
|
"grad_norm": 0.4941740334033966, |
|
"learning_rate": 0.00019735398907038779, |
|
"loss": 0.04, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6036217303822937, |
|
"grad_norm": 0.3722670078277588, |
|
"learning_rate": 0.00019727742505137936, |
|
"loss": 0.0493, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6086519114688129, |
|
"grad_norm": 0.32677406072616577, |
|
"learning_rate": 0.0001971997843405061, |
|
"loss": 0.0462, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.613682092555332, |
|
"grad_norm": 0.48656272888183594, |
|
"learning_rate": 0.00019712106779711555, |
|
"loss": 0.0451, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6187122736418511, |
|
"grad_norm": 0.4287594258785248, |
|
"learning_rate": 0.00019704127629246293, |
|
"loss": 0.0294, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6237424547283702, |
|
"grad_norm": 0.46294787526130676, |
|
"learning_rate": 0.0001969604107097014, |
|
"loss": 0.0462, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6287726358148893, |
|
"grad_norm": 0.32905468344688416, |
|
"learning_rate": 0.00019687847194387221, |
|
"loss": 0.0324, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6338028169014085, |
|
"grad_norm": 0.4024835228919983, |
|
"learning_rate": 0.00019679546090189503, |
|
"loss": 0.0407, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6388329979879276, |
|
"grad_norm": 0.539588451385498, |
|
"learning_rate": 0.00019671137850255766, |
|
"loss": 0.0368, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6438631790744467, |
|
"grad_norm": 0.32176288962364197, |
|
"learning_rate": 0.00019662622567650595, |
|
"loss": 0.0344, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6488933601609658, |
|
"grad_norm": 0.45465490221977234, |
|
"learning_rate": 0.0001965400033662336, |
|
"loss": 0.0361, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6539235412474849, |
|
"grad_norm": 0.47942492365837097, |
|
"learning_rate": 0.00019645271252607155, |
|
"loss": 0.0408, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6589537223340041, |
|
"grad_norm": 0.2223718762397766, |
|
"learning_rate": 0.00019636435412217758, |
|
"loss": 0.0347, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6639839034205232, |
|
"grad_norm": 0.550037682056427, |
|
"learning_rate": 0.00019627492913252547, |
|
"loss": 0.0381, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6690140845070423, |
|
"grad_norm": 0.39280569553375244, |
|
"learning_rate": 0.0001961844385468943, |
|
"loss": 0.0455, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6740442655935613, |
|
"grad_norm": 0.64002925157547, |
|
"learning_rate": 0.00019609288336685742, |
|
"loss": 0.0495, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6790744466800804, |
|
"grad_norm": 0.34422725439071655, |
|
"learning_rate": 0.00019600026460577142, |
|
"loss": 0.0309, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6841046277665996, |
|
"grad_norm": 0.42025962471961975, |
|
"learning_rate": 0.00019590658328876484, |
|
"loss": 0.0479, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6891348088531187, |
|
"grad_norm": 0.48690280318260193, |
|
"learning_rate": 0.0001958118404527269, |
|
"loss": 0.0352, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6941649899396378, |
|
"grad_norm": 0.4300714433193207, |
|
"learning_rate": 0.0001957160371462959, |
|
"loss": 0.0367, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6991951710261569, |
|
"grad_norm": 0.3296925723552704, |
|
"learning_rate": 0.00019561917442984788, |
|
"loss": 0.0399, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 0.5653759837150574, |
|
"learning_rate": 0.00019552125337548462, |
|
"loss": 0.0354, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7092555331991952, |
|
"grad_norm": 0.39718741178512573, |
|
"learning_rate": 0.00019542227506702173, |
|
"loss": 0.0514, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.5332531332969666, |
|
"learning_rate": 0.00019532224059997692, |
|
"loss": 0.0395, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7193158953722334, |
|
"grad_norm": 0.41001367568969727, |
|
"learning_rate": 0.0001952211510815578, |
|
"loss": 0.0465, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7243460764587525, |
|
"grad_norm": 0.4375529885292053, |
|
"learning_rate": 0.0001951190076306494, |
|
"loss": 0.0332, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7293762575452716, |
|
"grad_norm": 0.3857330083847046, |
|
"learning_rate": 0.00019501581137780204, |
|
"loss": 0.0372, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7344064386317908, |
|
"grad_norm": 0.2521601915359497, |
|
"learning_rate": 0.0001949115634652187, |
|
"loss": 0.0342, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7394366197183099, |
|
"grad_norm": 0.32540401816368103, |
|
"learning_rate": 0.00019480626504674245, |
|
"loss": 0.0301, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.744466800804829, |
|
"grad_norm": 0.27026352286338806, |
|
"learning_rate": 0.00019469991728784356, |
|
"loss": 0.0354, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7494969818913481, |
|
"grad_norm": 0.5376250147819519, |
|
"learning_rate": 0.00019459252136560674, |
|
"loss": 0.0429, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7545271629778671, |
|
"grad_norm": 0.36513984203338623, |
|
"learning_rate": 0.00019448407846871804, |
|
"loss": 0.0396, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7595573440643864, |
|
"grad_norm": 0.16466562449932098, |
|
"learning_rate": 0.0001943745897974516, |
|
"loss": 0.0252, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7645875251509054, |
|
"grad_norm": 0.2507416307926178, |
|
"learning_rate": 0.0001942640565636566, |
|
"loss": 0.0338, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7696177062374245, |
|
"grad_norm": 0.27079200744628906, |
|
"learning_rate": 0.0001941524799907436, |
|
"loss": 0.0331, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7746478873239436, |
|
"grad_norm": 0.5537086129188538, |
|
"learning_rate": 0.00019403986131367123, |
|
"loss": 0.0312, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7796780684104627, |
|
"grad_norm": 0.37343883514404297, |
|
"learning_rate": 0.00019392620177893224, |
|
"loss": 0.0447, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7847082494969819, |
|
"grad_norm": 0.32680392265319824, |
|
"learning_rate": 0.00019381150264454, |
|
"loss": 0.0275, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.789738430583501, |
|
"grad_norm": 0.64634770154953, |
|
"learning_rate": 0.00019369576518001437, |
|
"loss": 0.034, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7947686116700201, |
|
"grad_norm": 0.2528303861618042, |
|
"learning_rate": 0.00019357899066636773, |
|
"loss": 0.0298, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7997987927565392, |
|
"grad_norm": 0.4246571660041809, |
|
"learning_rate": 0.00019346118039609086, |
|
"loss": 0.0446, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8048289738430584, |
|
"grad_norm": 0.3372986316680908, |
|
"learning_rate": 0.0001933423356731384, |
|
"loss": 0.0363, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8098591549295775, |
|
"grad_norm": 0.49657967686653137, |
|
"learning_rate": 0.00019322245781291475, |
|
"loss": 0.039, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8148893360160966, |
|
"grad_norm": 0.24667495489120483, |
|
"learning_rate": 0.00019310154814225925, |
|
"loss": 0.0326, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8199195171026157, |
|
"grad_norm": 0.43383368849754333, |
|
"learning_rate": 0.00019297960799943161, |
|
"loss": 0.0402, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8249496981891348, |
|
"grad_norm": 0.513130247592926, |
|
"learning_rate": 0.00019285663873409715, |
|
"loss": 0.0357, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.829979879275654, |
|
"grad_norm": 0.4496906101703644, |
|
"learning_rate": 0.00019273264170731157, |
|
"loss": 0.0287, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8350100603621731, |
|
"grad_norm": 0.467529296875, |
|
"learning_rate": 0.00019260761829150637, |
|
"loss": 0.0437, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8400402414486922, |
|
"grad_norm": 0.402126282453537, |
|
"learning_rate": 0.0001924815698704732, |
|
"loss": 0.0336, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"grad_norm": 0.41219964623451233, |
|
"learning_rate": 0.00019235449783934881, |
|
"loss": 0.0353, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8501006036217303, |
|
"grad_norm": 0.6443662047386169, |
|
"learning_rate": 0.00019222640360459954, |
|
"loss": 0.0515, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8551307847082495, |
|
"grad_norm": 0.3046891391277313, |
|
"learning_rate": 0.0001920972885840057, |
|
"loss": 0.0339, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8601609657947686, |
|
"grad_norm": 0.5683744549751282, |
|
"learning_rate": 0.00019196715420664596, |
|
"loss": 0.0377, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8651911468812877, |
|
"grad_norm": 0.46794387698173523, |
|
"learning_rate": 0.0001918360019128815, |
|
"loss": 0.0414, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8702213279678068, |
|
"grad_norm": 0.4453373849391937, |
|
"learning_rate": 0.00019170383315434002, |
|
"loss": 0.0289, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8752515090543259, |
|
"grad_norm": 0.44001007080078125, |
|
"learning_rate": 0.00019157064939389978, |
|
"loss": 0.0433, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8802816901408451, |
|
"grad_norm": 0.28705525398254395, |
|
"learning_rate": 0.00019143645210567328, |
|
"loss": 0.0327, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8853118712273642, |
|
"grad_norm": 0.49537310004234314, |
|
"learning_rate": 0.00019130124277499109, |
|
"loss": 0.0375, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8903420523138833, |
|
"grad_norm": 0.5446850657463074, |
|
"learning_rate": 0.00019116502289838523, |
|
"loss": 0.0357, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8953722334004024, |
|
"grad_norm": 0.49629637598991394, |
|
"learning_rate": 0.0001910277939835728, |
|
"loss": 0.0291, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9004024144869215, |
|
"grad_norm": 0.3879849910736084, |
|
"learning_rate": 0.00019088955754943912, |
|
"loss": 0.0397, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9054325955734407, |
|
"grad_norm": 0.4239087402820587, |
|
"learning_rate": 0.00019075031512602104, |
|
"loss": 0.0417, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9104627766599598, |
|
"grad_norm": 0.5208977460861206, |
|
"learning_rate": 0.00019061006825448997, |
|
"loss": 0.0345, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9154929577464789, |
|
"grad_norm": 0.28360894322395325, |
|
"learning_rate": 0.0001904688184871348, |
|
"loss": 0.0302, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.920523138832998, |
|
"grad_norm": 0.41871729493141174, |
|
"learning_rate": 0.00019032656738734467, |
|
"loss": 0.0381, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9255533199195171, |
|
"grad_norm": 0.3086782693862915, |
|
"learning_rate": 0.00019018331652959184, |
|
"loss": 0.0397, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9305835010060363, |
|
"grad_norm": 0.45297491550445557, |
|
"learning_rate": 0.00019003906749941405, |
|
"loss": 0.0347, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9356136820925554, |
|
"grad_norm": 0.576064944267273, |
|
"learning_rate": 0.00018989382189339718, |
|
"loss": 0.033, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9406438631790744, |
|
"grad_norm": 0.4694220721721649, |
|
"learning_rate": 0.00018974758131915732, |
|
"loss": 0.0279, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9456740442655935, |
|
"grad_norm": 0.3088204264640808, |
|
"learning_rate": 0.00018960034739532336, |
|
"loss": 0.0344, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9507042253521126, |
|
"grad_norm": 0.3804841637611389, |
|
"learning_rate": 0.00018945212175151856, |
|
"loss": 0.0421, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9557344064386318, |
|
"grad_norm": 0.4735386073589325, |
|
"learning_rate": 0.00018930290602834298, |
|
"loss": 0.0434, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9607645875251509, |
|
"grad_norm": 0.6512228846549988, |
|
"learning_rate": 0.0001891527018773551, |
|
"loss": 0.0332, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.96579476861167, |
|
"grad_norm": 0.3166397213935852, |
|
"learning_rate": 0.00018900151096105358, |
|
"loss": 0.0271, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9708249496981891, |
|
"grad_norm": 0.36923766136169434, |
|
"learning_rate": 0.00018884933495285882, |
|
"loss": 0.0339, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9758551307847082, |
|
"grad_norm": 0.43903985619544983, |
|
"learning_rate": 0.0001886961755370945, |
|
"loss": 0.0289, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9808853118712274, |
|
"grad_norm": 0.4627651274204254, |
|
"learning_rate": 0.00018854203440896888, |
|
"loss": 0.0356, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"grad_norm": 0.42779436707496643, |
|
"learning_rate": 0.0001883869132745561, |
|
"loss": 0.0346, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9909456740442656, |
|
"grad_norm": 0.4050188660621643, |
|
"learning_rate": 0.00018823081385077733, |
|
"loss": 0.0365, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9959758551307847, |
|
"grad_norm": 0.28512129187583923, |
|
"learning_rate": 0.00018807373786538153, |
|
"loss": 0.0399, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0010060362173039, |
|
"grad_norm": 0.3914794623851776, |
|
"learning_rate": 0.00018791568705692668, |
|
"loss": 0.034, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0060362173038229, |
|
"grad_norm": 0.4772788882255554, |
|
"learning_rate": 0.00018775666317476022, |
|
"loss": 0.0308, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.011066398390342, |
|
"grad_norm": 0.2963551878929138, |
|
"learning_rate": 0.0001875966679789999, |
|
"loss": 0.0309, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0160965794768613, |
|
"grad_norm": 0.17126326262950897, |
|
"learning_rate": 0.0001874357032405142, |
|
"loss": 0.0264, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0211267605633803, |
|
"grad_norm": 0.3092329800128937, |
|
"learning_rate": 0.00018727377074090272, |
|
"loss": 0.0325, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0261569416498995, |
|
"grad_norm": 0.23635587096214294, |
|
"learning_rate": 0.00018711087227247657, |
|
"loss": 0.0318, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0311871227364184, |
|
"grad_norm": 0.4015599489212036, |
|
"learning_rate": 0.00018694700963823837, |
|
"loss": 0.0276, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0362173038229376, |
|
"grad_norm": 0.3345971405506134, |
|
"learning_rate": 0.00018678218465186243, |
|
"loss": 0.0327, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0412474849094568, |
|
"grad_norm": 0.5647363066673279, |
|
"learning_rate": 0.0001866163991376746, |
|
"loss": 0.0331, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0462776659959758, |
|
"grad_norm": 0.3128892481327057, |
|
"learning_rate": 0.0001864496549306321, |
|
"loss": 0.0377, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.051307847082495, |
|
"grad_norm": 0.45699113607406616, |
|
"learning_rate": 0.00018628195387630325, |
|
"loss": 0.0285, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"grad_norm": 0.2944326400756836, |
|
"learning_rate": 0.00018611329783084697, |
|
"loss": 0.0337, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0613682092555332, |
|
"grad_norm": 0.5147525668144226, |
|
"learning_rate": 0.00018594368866099226, |
|
"loss": 0.0403, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0663983903420524, |
|
"grad_norm": 0.3949408233165741, |
|
"learning_rate": 0.0001857731282440176, |
|
"loss": 0.0398, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 0.6203738451004028, |
|
"learning_rate": 0.00018560161846773002, |
|
"loss": 0.0374, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.0764587525150906, |
|
"grad_norm": 0.5248531699180603, |
|
"learning_rate": 0.00018542916123044444, |
|
"loss": 0.035, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.0814889336016096, |
|
"grad_norm": 0.25072234869003296, |
|
"learning_rate": 0.00018525575844096243, |
|
"loss": 0.0315, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0865191146881288, |
|
"grad_norm": 0.46970999240875244, |
|
"learning_rate": 0.00018508141201855125, |
|
"loss": 0.0473, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.091549295774648, |
|
"grad_norm": 0.4322168231010437, |
|
"learning_rate": 0.00018490612389292243, |
|
"loss": 0.0322, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.096579476861167, |
|
"grad_norm": 0.31416237354278564, |
|
"learning_rate": 0.0001847298960042106, |
|
"loss": 0.026, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1016096579476862, |
|
"grad_norm": 0.4525179862976074, |
|
"learning_rate": 0.0001845527303029519, |
|
"loss": 0.0341, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1066398390342052, |
|
"grad_norm": 0.3020547926425934, |
|
"learning_rate": 0.00018437462875006247, |
|
"loss": 0.0362, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1116700201207244, |
|
"grad_norm": 0.24597640335559845, |
|
"learning_rate": 0.0001841955933168166, |
|
"loss": 0.0319, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1167002012072436, |
|
"grad_norm": 0.5903005003929138, |
|
"learning_rate": 0.00018401562598482517, |
|
"loss": 0.0302, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1217303822937625, |
|
"grad_norm": 0.3023013174533844, |
|
"learning_rate": 0.00018383472874601334, |
|
"loss": 0.0372, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"grad_norm": 0.39982765913009644, |
|
"learning_rate": 0.00018365290360259894, |
|
"loss": 0.0285, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1317907444668007, |
|
"grad_norm": 0.32488811016082764, |
|
"learning_rate": 0.00018347015256706998, |
|
"loss": 0.0301, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.13682092555332, |
|
"grad_norm": 0.3721481263637543, |
|
"learning_rate": 0.00018328647766216246, |
|
"loss": 0.0466, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1418511066398391, |
|
"grad_norm": 0.37539154291152954, |
|
"learning_rate": 0.00018310188092083803, |
|
"loss": 0.0305, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1468812877263581, |
|
"grad_norm": 0.4233405590057373, |
|
"learning_rate": 0.00018291636438626152, |
|
"loss": 0.0383, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1519114688128773, |
|
"grad_norm": 0.3756147623062134, |
|
"learning_rate": 0.00018272993011177822, |
|
"loss": 0.0339, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1569416498993963, |
|
"grad_norm": 0.2825983762741089, |
|
"learning_rate": 0.00018254258016089123, |
|
"loss": 0.0323, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1619718309859155, |
|
"grad_norm": 0.3585676848888397, |
|
"learning_rate": 0.00018235431660723855, |
|
"loss": 0.0257, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.1670020120724347, |
|
"grad_norm": 0.34615543484687805, |
|
"learning_rate": 0.00018216514153457025, |
|
"loss": 0.031, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1720321931589537, |
|
"grad_norm": 0.3647988438606262, |
|
"learning_rate": 0.00018197505703672522, |
|
"loss": 0.0298, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.1770623742454729, |
|
"grad_norm": 0.2951864004135132, |
|
"learning_rate": 0.0001817840652176082, |
|
"loss": 0.0295, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.1820925553319919, |
|
"grad_norm": 0.24966521561145782, |
|
"learning_rate": 0.00018159216819116635, |
|
"loss": 0.0331, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.187122736418511, |
|
"grad_norm": 0.31395167112350464, |
|
"learning_rate": 0.00018139936808136593, |
|
"loss": 0.0299, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.1921529175050303, |
|
"grad_norm": 0.2532835006713867, |
|
"learning_rate": 0.00018120566702216877, |
|
"loss": 0.0327, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.1971830985915493, |
|
"grad_norm": 0.30898261070251465, |
|
"learning_rate": 0.00018101106715750855, |
|
"loss": 0.026, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2022132796780685, |
|
"grad_norm": 0.34393829107284546, |
|
"learning_rate": 0.0001808155706412673, |
|
"loss": 0.0413, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2072434607645874, |
|
"grad_norm": 0.32124215364456177, |
|
"learning_rate": 0.00018061917963725134, |
|
"loss": 0.0282, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2122736418511066, |
|
"grad_norm": 0.4290277063846588, |
|
"learning_rate": 0.0001804218963191674, |
|
"loss": 0.0316, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.2173038229376258, |
|
"grad_norm": 0.43430641293525696, |
|
"learning_rate": 0.00018022372287059866, |
|
"loss": 0.031, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2223340040241448, |
|
"grad_norm": 0.2853562831878662, |
|
"learning_rate": 0.00018002466148498035, |
|
"loss": 0.028, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.227364185110664, |
|
"grad_norm": 0.40154245495796204, |
|
"learning_rate": 0.00017982471436557579, |
|
"loss": 0.0309, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.232394366197183, |
|
"grad_norm": 0.45144811272621155, |
|
"learning_rate": 0.00017962388372545177, |
|
"loss": 0.0313, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2374245472837022, |
|
"grad_norm": 0.310092031955719, |
|
"learning_rate": 0.00017942217178745396, |
|
"loss": 0.0257, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.2424547283702214, |
|
"grad_norm": 0.31381756067276, |
|
"learning_rate": 0.00017921958078418278, |
|
"loss": 0.0289, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2474849094567404, |
|
"grad_norm": 0.2533479630947113, |
|
"learning_rate": 0.00017901611295796806, |
|
"loss": 0.0307, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2525150905432596, |
|
"grad_norm": 0.2752733826637268, |
|
"learning_rate": 0.0001788117705608447, |
|
"loss": 0.0358, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2575452716297786, |
|
"grad_norm": 0.40528246760368347, |
|
"learning_rate": 0.00017860655585452763, |
|
"loss": 0.0358, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2625754527162978, |
|
"grad_norm": 0.37113526463508606, |
|
"learning_rate": 0.00017840047111038651, |
|
"loss": 0.0319, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"grad_norm": 0.37338030338287354, |
|
"learning_rate": 0.00017819351860942105, |
|
"loss": 0.0292, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.272635814889336, |
|
"grad_norm": 0.4197677671909332, |
|
"learning_rate": 0.00017798570064223533, |
|
"loss": 0.0335, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.2776659959758552, |
|
"grad_norm": 0.3801611363887787, |
|
"learning_rate": 0.0001777770195090128, |
|
"loss": 0.0364, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.2826961770623742, |
|
"grad_norm": 0.49140316247940063, |
|
"learning_rate": 0.00017756747751949045, |
|
"loss": 0.0244, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2877263581488934, |
|
"grad_norm": 0.37291839718818665, |
|
"learning_rate": 0.00017735707699293368, |
|
"loss": 0.0375, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.2927565392354126, |
|
"grad_norm": 0.27540504932403564, |
|
"learning_rate": 0.0001771458202581102, |
|
"loss": 0.036, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.2977867203219315, |
|
"grad_norm": 0.3908439576625824, |
|
"learning_rate": 0.00017693370965326467, |
|
"loss": 0.0343, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3028169014084507, |
|
"grad_norm": 0.46150150895118713, |
|
"learning_rate": 0.00017672074752609235, |
|
"loss": 0.0277, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3078470824949697, |
|
"grad_norm": 0.3451060950756073, |
|
"learning_rate": 0.0001765069362337136, |
|
"loss": 0.0366, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.312877263581489, |
|
"grad_norm": 0.5258057117462158, |
|
"learning_rate": 0.00017629227814264738, |
|
"loss": 0.0305, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3179074446680081, |
|
"grad_norm": 0.3369615972042084, |
|
"learning_rate": 0.00017607677562878543, |
|
"loss": 0.0303, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3229376257545271, |
|
"grad_norm": 0.48052117228507996, |
|
"learning_rate": 0.00017586043107736558, |
|
"loss": 0.0353, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3279678068410463, |
|
"grad_norm": 0.44058990478515625, |
|
"learning_rate": 0.0001756432468829457, |
|
"loss": 0.0357, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.3329979879275653, |
|
"grad_norm": 0.3025529980659485, |
|
"learning_rate": 0.000175425225449377, |
|
"loss": 0.03, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.3380281690140845, |
|
"grad_norm": 0.3708013892173767, |
|
"learning_rate": 0.00017520636918977743, |
|
"loss": 0.0263, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.3430583501006037, |
|
"grad_norm": 0.41252729296684265, |
|
"learning_rate": 0.00017498668052650502, |
|
"loss": 0.0376, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.3480885311871227, |
|
"grad_norm": 0.4583453834056854, |
|
"learning_rate": 0.0001747661618911312, |
|
"loss": 0.035, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.3531187122736419, |
|
"grad_norm": 0.26764222979545593, |
|
"learning_rate": 0.00017454481572441353, |
|
"loss": 0.0258, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.3581488933601609, |
|
"grad_norm": 0.36722269654273987, |
|
"learning_rate": 0.000174322644476269, |
|
"loss": 0.025, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.36317907444668, |
|
"grad_norm": 0.2594809830188751, |
|
"learning_rate": 0.0001740996506057469, |
|
"loss": 0.0271, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.3682092555331993, |
|
"grad_norm": 0.36106789112091064, |
|
"learning_rate": 0.00017387583658100142, |
|
"loss": 0.0293, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.3732394366197183, |
|
"grad_norm": 0.3363661468029022, |
|
"learning_rate": 0.00017365120487926441, |
|
"loss": 0.0249, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.3782696177062375, |
|
"grad_norm": 0.275418758392334, |
|
"learning_rate": 0.000173425757986818, |
|
"loss": 0.028, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.3832997987927564, |
|
"grad_norm": 0.26659509539604187, |
|
"learning_rate": 0.00017319949839896722, |
|
"loss": 0.023, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.3883299798792756, |
|
"grad_norm": 0.461508184671402, |
|
"learning_rate": 0.00017297242862001198, |
|
"loss": 0.0302, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.3933601609657948, |
|
"grad_norm": 0.30697330832481384, |
|
"learning_rate": 0.0001727445511632197, |
|
"loss": 0.0252, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.3983903420523138, |
|
"grad_norm": 0.35531529784202576, |
|
"learning_rate": 0.00017251586855079746, |
|
"loss": 0.0251, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.403420523138833, |
|
"grad_norm": 0.4393905997276306, |
|
"learning_rate": 0.0001722863833138639, |
|
"loss": 0.0339, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 0.59965980052948, |
|
"learning_rate": 0.00017205609799242138, |
|
"loss": 0.0344, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.4134808853118712, |
|
"grad_norm": 0.2698121964931488, |
|
"learning_rate": 0.0001718250151353278, |
|
"loss": 0.0309, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4185110663983904, |
|
"grad_norm": 0.5065796375274658, |
|
"learning_rate": 0.00017159313730026837, |
|
"loss": 0.0271, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.4235412474849094, |
|
"grad_norm": 0.39271390438079834, |
|
"learning_rate": 0.0001713604670537273, |
|
"loss": 0.0362, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.2914319932460785, |
|
"learning_rate": 0.00017112700697095954, |
|
"loss": 0.036, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4336016096579476, |
|
"grad_norm": 0.33919307589530945, |
|
"learning_rate": 0.00017089275963596195, |
|
"loss": 0.0303, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4386317907444668, |
|
"grad_norm": 0.38898414373397827, |
|
"learning_rate": 0.0001706577276414451, |
|
"loss": 0.0336, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.443661971830986, |
|
"grad_norm": 0.3567386567592621, |
|
"learning_rate": 0.00017042191358880424, |
|
"loss": 0.0315, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.448692152917505, |
|
"grad_norm": 0.5339713096618652, |
|
"learning_rate": 0.00017018532008809074, |
|
"loss": 0.0299, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.4537223340040242, |
|
"grad_norm": 0.4281061887741089, |
|
"learning_rate": 0.00016994794975798305, |
|
"loss": 0.0327, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.4587525150905432, |
|
"grad_norm": 0.3700096905231476, |
|
"learning_rate": 0.00016970980522575775, |
|
"loss": 0.0299, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.4637826961770624, |
|
"grad_norm": 0.41299402713775635, |
|
"learning_rate": 0.00016947088912726052, |
|
"loss": 0.0347, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.4688128772635816, |
|
"grad_norm": 0.2901936173439026, |
|
"learning_rate": 0.00016923120410687695, |
|
"loss": 0.0297, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.4738430583501005, |
|
"grad_norm": 0.4472912549972534, |
|
"learning_rate": 0.00016899075281750326, |
|
"loss": 0.0274, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.4788732394366197, |
|
"grad_norm": 0.3437786102294922, |
|
"learning_rate": 0.00016874953792051693, |
|
"loss": 0.0353, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.4839034205231387, |
|
"grad_norm": 0.31530702114105225, |
|
"learning_rate": 0.00016850756208574717, |
|
"loss": 0.0369, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.488933601609658, |
|
"grad_norm": 0.37885403633117676, |
|
"learning_rate": 0.00016826482799144556, |
|
"loss": 0.0326, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4939637826961771, |
|
"grad_norm": 0.3617626428604126, |
|
"learning_rate": 0.00016802133832425625, |
|
"loss": 0.0294, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.4989939637826961, |
|
"grad_norm": 0.34277963638305664, |
|
"learning_rate": 0.0001677770957791862, |
|
"loss": 0.0269, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.5040241448692153, |
|
"grad_norm": 0.46021127700805664, |
|
"learning_rate": 0.00016753210305957557, |
|
"loss": 0.0307, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.5090543259557343, |
|
"grad_norm": 0.5008649230003357, |
|
"learning_rate": 0.0001672863628770675, |
|
"loss": 0.0434, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5140845070422535, |
|
"grad_norm": 0.2918837368488312, |
|
"learning_rate": 0.0001670398779515784, |
|
"loss": 0.0297, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.5191146881287727, |
|
"grad_norm": 0.30590662360191345, |
|
"learning_rate": 0.00016679265101126743, |
|
"loss": 0.03, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.524144869215292, |
|
"grad_norm": 0.29896557331085205, |
|
"learning_rate": 0.00016654468479250688, |
|
"loss": 0.0332, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.529175050301811, |
|
"grad_norm": 0.5691266059875488, |
|
"learning_rate": 0.00016629598203985135, |
|
"loss": 0.0312, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5342052313883299, |
|
"grad_norm": 0.5821362733840942, |
|
"learning_rate": 0.00016604654550600762, |
|
"loss": 0.0329, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.539235412474849, |
|
"grad_norm": 0.3751659691333771, |
|
"learning_rate": 0.00016579637795180425, |
|
"loss": 0.029, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.5442655935613683, |
|
"grad_norm": 0.30066096782684326, |
|
"learning_rate": 0.0001655454821461608, |
|
"loss": 0.0207, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"grad_norm": 0.5700194239616394, |
|
"learning_rate": 0.00016529386086605737, |
|
"loss": 0.0464, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.5543259557344065, |
|
"grad_norm": 0.3803764581680298, |
|
"learning_rate": 0.00016504151689650386, |
|
"loss": 0.0295, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.5593561368209254, |
|
"grad_norm": 0.28992944955825806, |
|
"learning_rate": 0.0001647884530305089, |
|
"loss": 0.0339, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.5643863179074446, |
|
"grad_norm": 0.1968836933374405, |
|
"learning_rate": 0.00016453467206904935, |
|
"loss": 0.027, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.5694164989939638, |
|
"grad_norm": 0.4015430510044098, |
|
"learning_rate": 0.00016428017682103892, |
|
"loss": 0.0308, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.574446680080483, |
|
"grad_norm": 0.2642582356929779, |
|
"learning_rate": 0.00016402497010329725, |
|
"loss": 0.0293, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.579476861167002, |
|
"grad_norm": 0.446898877620697, |
|
"learning_rate": 0.00016376905474051873, |
|
"loss": 0.0274, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.584507042253521, |
|
"grad_norm": 0.4257184565067291, |
|
"learning_rate": 0.00016351243356524123, |
|
"loss": 0.032, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.5895372233400402, |
|
"grad_norm": 0.3191612660884857, |
|
"learning_rate": 0.00016325510941781474, |
|
"loss": 0.0286, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.5945674044265594, |
|
"grad_norm": 0.2913196086883545, |
|
"learning_rate": 0.00016299708514636992, |
|
"loss": 0.0331, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.5995975855130786, |
|
"grad_norm": 0.35634562373161316, |
|
"learning_rate": 0.00016273836360678652, |
|
"loss": 0.0289, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6046277665995976, |
|
"grad_norm": 0.4977535009384155, |
|
"learning_rate": 0.00016247894766266196, |
|
"loss": 0.0308, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.6096579476861166, |
|
"grad_norm": 0.32202795147895813, |
|
"learning_rate": 0.0001622188401852794, |
|
"loss": 0.0297, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6146881287726358, |
|
"grad_norm": 0.18511469662189484, |
|
"learning_rate": 0.00016195804405357613, |
|
"loss": 0.0243, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.619718309859155, |
|
"grad_norm": 0.2580728530883789, |
|
"learning_rate": 0.00016169656215411164, |
|
"loss": 0.0247, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.6247484909456742, |
|
"grad_norm": 0.46906596422195435, |
|
"learning_rate": 0.00016143439738103564, |
|
"loss": 0.0245, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.6297786720321932, |
|
"grad_norm": 0.39322683215141296, |
|
"learning_rate": 0.00016117155263605608, |
|
"loss": 0.0282, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.6348088531187122, |
|
"grad_norm": 0.2526596188545227, |
|
"learning_rate": 0.00016090803082840707, |
|
"loss": 0.025, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.6398390342052314, |
|
"grad_norm": 0.3361895978450775, |
|
"learning_rate": 0.00016064383487481655, |
|
"loss": 0.0208, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.6448692152917506, |
|
"grad_norm": 0.30806657671928406, |
|
"learning_rate": 0.0001603789676994741, |
|
"loss": 0.0289, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.6498993963782698, |
|
"grad_norm": 0.47636348009109497, |
|
"learning_rate": 0.00016011343223399865, |
|
"loss": 0.0232, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.6549295774647887, |
|
"grad_norm": 0.3957202732563019, |
|
"learning_rate": 0.00015984723141740576, |
|
"loss": 0.0338, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.6599597585513077, |
|
"grad_norm": 0.3279256820678711, |
|
"learning_rate": 0.0001595803681960754, |
|
"loss": 0.032, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.664989939637827, |
|
"grad_norm": 0.1826469749212265, |
|
"learning_rate": 0.00015931284552371918, |
|
"loss": 0.0202, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.6700201207243461, |
|
"grad_norm": 0.37201932072639465, |
|
"learning_rate": 0.00015904466636134772, |
|
"loss": 0.0272, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.6750503018108653, |
|
"grad_norm": 0.3043290078639984, |
|
"learning_rate": 0.00015877583367723773, |
|
"loss": 0.0313, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.6800804828973843, |
|
"grad_norm": 0.31204384565353394, |
|
"learning_rate": 0.00015850635044689938, |
|
"loss": 0.0275, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.6851106639839033, |
|
"grad_norm": 0.22221866250038147, |
|
"learning_rate": 0.00015823621965304325, |
|
"loss": 0.0254, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"grad_norm": 0.22048987448215485, |
|
"learning_rate": 0.00015796544428554725, |
|
"loss": 0.026, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.6951710261569417, |
|
"grad_norm": 0.34224534034729004, |
|
"learning_rate": 0.00015769402734142367, |
|
"loss": 0.0329, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.700201207243461, |
|
"grad_norm": 0.24496717751026154, |
|
"learning_rate": 0.00015742197182478586, |
|
"loss": 0.0233, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.70523138832998, |
|
"grad_norm": 0.3360805809497833, |
|
"learning_rate": 0.00015714928074681515, |
|
"loss": 0.0272, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.7102615694164989, |
|
"grad_norm": 0.21322770416736603, |
|
"learning_rate": 0.00015687595712572737, |
|
"loss": 0.0245, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.715291750503018, |
|
"grad_norm": 0.31249675154685974, |
|
"learning_rate": 0.00015660200398673948, |
|
"loss": 0.0254, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.7203219315895373, |
|
"grad_norm": 0.37619125843048096, |
|
"learning_rate": 0.00015632742436203615, |
|
"loss": 0.0292, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.7253521126760565, |
|
"grad_norm": 0.29316648840904236, |
|
"learning_rate": 0.00015605222129073617, |
|
"loss": 0.0264, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.7303822937625755, |
|
"grad_norm": 0.31073886156082153, |
|
"learning_rate": 0.0001557763978188588, |
|
"loss": 0.0322, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7354124748490944, |
|
"grad_norm": 0.3250234127044678, |
|
"learning_rate": 0.00015549995699929, |
|
"loss": 0.0338, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.7404426559356136, |
|
"grad_norm": 0.3571024537086487, |
|
"learning_rate": 0.00015522290189174867, |
|
"loss": 0.028, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.7454728370221329, |
|
"grad_norm": 0.3457786440849304, |
|
"learning_rate": 0.000154945235562753, |
|
"loss": 0.0289, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.750503018108652, |
|
"grad_norm": 0.3116254508495331, |
|
"learning_rate": 0.00015466696108558611, |
|
"loss": 0.024, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.755533199195171, |
|
"grad_norm": 0.43166661262512207, |
|
"learning_rate": 0.00015438808154026238, |
|
"loss": 0.0305, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.76056338028169, |
|
"grad_norm": 0.3314637541770935, |
|
"learning_rate": 0.0001541086000134932, |
|
"loss": 0.023, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7655935613682092, |
|
"grad_norm": 0.407803475856781, |
|
"learning_rate": 0.0001538285195986529, |
|
"loss": 0.029, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.7706237424547284, |
|
"grad_norm": 0.28635403513908386, |
|
"learning_rate": 0.0001535478433957444, |
|
"loss": 0.0302, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.7756539235412476, |
|
"grad_norm": 0.43160927295684814, |
|
"learning_rate": 0.000153266574511365, |
|
"loss": 0.0271, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.7806841046277666, |
|
"grad_norm": 0.4747959077358246, |
|
"learning_rate": 0.0001529847160586719, |
|
"loss": 0.034, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 0.2701944410800934, |
|
"learning_rate": 0.0001527022711573479, |
|
"loss": 0.028, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.7907444668008048, |
|
"grad_norm": 0.20618535578250885, |
|
"learning_rate": 0.00015241924293356665, |
|
"loss": 0.0311, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.795774647887324, |
|
"grad_norm": 0.3267618417739868, |
|
"learning_rate": 0.0001521356345199582, |
|
"loss": 0.0358, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.8008048289738432, |
|
"grad_norm": 0.24747027456760406, |
|
"learning_rate": 0.0001518514490555743, |
|
"loss": 0.0296, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.8058350100603622, |
|
"grad_norm": 0.23700428009033203, |
|
"learning_rate": 0.00015156668968585368, |
|
"loss": 0.0313, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.8108651911468812, |
|
"grad_norm": 0.29978063702583313, |
|
"learning_rate": 0.0001512813595625872, |
|
"loss": 0.0321, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8158953722334004, |
|
"grad_norm": 0.33228403329849243, |
|
"learning_rate": 0.00015099546184388287, |
|
"loss": 0.0305, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.8209255533199196, |
|
"grad_norm": 0.3012440502643585, |
|
"learning_rate": 0.00015070899969413115, |
|
"loss": 0.0264, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.8259557344064388, |
|
"grad_norm": 0.31137219071388245, |
|
"learning_rate": 0.00015042197628396966, |
|
"loss": 0.0276, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"grad_norm": 0.2960556745529175, |
|
"learning_rate": 0.0001501343947902482, |
|
"loss": 0.0244, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.8360160965794767, |
|
"grad_norm": 0.41761451959609985, |
|
"learning_rate": 0.0001498462583959936, |
|
"loss": 0.0271, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.841046277665996, |
|
"grad_norm": 0.2327389270067215, |
|
"learning_rate": 0.00014955757029037455, |
|
"loss": 0.0209, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.8460764587525151, |
|
"grad_norm": 0.4204856753349304, |
|
"learning_rate": 0.0001492683336686661, |
|
"loss": 0.0301, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.8511066398390343, |
|
"grad_norm": 0.31292521953582764, |
|
"learning_rate": 0.00014897855173221452, |
|
"loss": 0.0248, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.8561368209255533, |
|
"grad_norm": 0.304541677236557, |
|
"learning_rate": 0.00014868822768840162, |
|
"loss": 0.0237, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.8611670020120723, |
|
"grad_norm": 0.41588783264160156, |
|
"learning_rate": 0.00014839736475060966, |
|
"loss": 0.0268, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.8661971830985915, |
|
"grad_norm": 0.376002699136734, |
|
"learning_rate": 0.0001481059661381852, |
|
"loss": 0.0287, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.8712273641851107, |
|
"grad_norm": 0.3036446273326874, |
|
"learning_rate": 0.00014781403507640404, |
|
"loss": 0.0313, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.87625754527163, |
|
"grad_norm": 0.15947189927101135, |
|
"learning_rate": 0.00014752157479643515, |
|
"loss": 0.0269, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.881287726358149, |
|
"grad_norm": 0.3008073568344116, |
|
"learning_rate": 0.00014722858853530502, |
|
"loss": 0.031, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.8863179074446679, |
|
"grad_norm": 0.34803229570388794, |
|
"learning_rate": 0.00014693507953586192, |
|
"loss": 0.0289, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.891348088531187, |
|
"grad_norm": 0.47900477051734924, |
|
"learning_rate": 0.0001466410510467398, |
|
"loss": 0.0239, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.8963782696177063, |
|
"grad_norm": 0.3730434775352478, |
|
"learning_rate": 0.00014634650632232255, |
|
"loss": 0.0247, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.9014084507042255, |
|
"grad_norm": 0.2846371829509735, |
|
"learning_rate": 0.00014605144862270782, |
|
"loss": 0.0274, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.9064386317907445, |
|
"grad_norm": 0.27134642004966736, |
|
"learning_rate": 0.00014575588121367111, |
|
"loss": 0.0295, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.9114688128772634, |
|
"grad_norm": 0.2962862253189087, |
|
"learning_rate": 0.00014545980736662943, |
|
"loss": 0.029, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.9164989939637826, |
|
"grad_norm": 0.4870172441005707, |
|
"learning_rate": 0.00014516323035860515, |
|
"loss": 0.0334, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.9215291750503019, |
|
"grad_norm": 0.4422418475151062, |
|
"learning_rate": 0.00014486615347218985, |
|
"loss": 0.0236, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.926559356136821, |
|
"grad_norm": 0.15602879226207733, |
|
"learning_rate": 0.00014456857999550787, |
|
"loss": 0.025, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.93158953722334, |
|
"grad_norm": 0.3513808846473694, |
|
"learning_rate": 0.00014427051322217987, |
|
"loss": 0.0294, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.936619718309859, |
|
"grad_norm": 0.40983107686042786, |
|
"learning_rate": 0.00014397195645128645, |
|
"loss": 0.0249, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.9416498993963782, |
|
"grad_norm": 0.22102007269859314, |
|
"learning_rate": 0.00014367291298733178, |
|
"loss": 0.0296, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.9466800804828974, |
|
"grad_norm": 0.2670123279094696, |
|
"learning_rate": 0.00014337338614020672, |
|
"loss": 0.028, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.9517102615694166, |
|
"grad_norm": 0.3241349160671234, |
|
"learning_rate": 0.00014307337922515238, |
|
"loss": 0.0295, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.9567404426559356, |
|
"grad_norm": 0.2833192050457001, |
|
"learning_rate": 0.00014277289556272342, |
|
"loss": 0.0354, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.9617706237424546, |
|
"grad_norm": 0.31631726026535034, |
|
"learning_rate": 0.0001424719384787512, |
|
"loss": 0.0279, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.9668008048289738, |
|
"grad_norm": 0.38899293541908264, |
|
"learning_rate": 0.0001421705113043071, |
|
"loss": 0.0223, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"grad_norm": 0.33592867851257324, |
|
"learning_rate": 0.00014186861737566558, |
|
"loss": 0.0256, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.9768611670020122, |
|
"grad_norm": 0.29194048047065735, |
|
"learning_rate": 0.00014156626003426724, |
|
"loss": 0.0285, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.9818913480885312, |
|
"grad_norm": 0.5011776089668274, |
|
"learning_rate": 0.00014126344262668176, |
|
"loss": 0.0367, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.9869215291750502, |
|
"grad_norm": 0.29997947812080383, |
|
"learning_rate": 0.00014096016850457117, |
|
"loss": 0.0283, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.9919517102615694, |
|
"grad_norm": 0.26518872380256653, |
|
"learning_rate": 0.0001406564410246523, |
|
"loss": 0.0367, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.9969818913480886, |
|
"grad_norm": 0.23170867562294006, |
|
"learning_rate": 0.00014035226354866, |
|
"loss": 0.0278, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.0020120724346078, |
|
"grad_norm": 0.25075894594192505, |
|
"learning_rate": 0.0001400476394433098, |
|
"loss": 0.0248, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.007042253521127, |
|
"grad_norm": 0.31776684522628784, |
|
"learning_rate": 0.00013974257208026054, |
|
"loss": 0.0239, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.0120724346076457, |
|
"grad_norm": 0.33745163679122925, |
|
"learning_rate": 0.00013943706483607725, |
|
"loss": 0.0221, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.017102615694165, |
|
"grad_norm": 0.21726344525814056, |
|
"learning_rate": 0.00013913112109219364, |
|
"loss": 0.0262, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.022132796780684, |
|
"grad_norm": 0.3767722249031067, |
|
"learning_rate": 0.0001388247442348747, |
|
"loss": 0.023, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.0271629778672033, |
|
"grad_norm": 0.4051326513290405, |
|
"learning_rate": 0.00013851793765517925, |
|
"loss": 0.0248, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.0321931589537225, |
|
"grad_norm": 0.2341754138469696, |
|
"learning_rate": 0.00013821070474892238, |
|
"loss": 0.0229, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.0372233400402413, |
|
"grad_norm": 0.30645281076431274, |
|
"learning_rate": 0.00013790304891663792, |
|
"loss": 0.0336, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.0422535211267605, |
|
"grad_norm": 0.37985026836395264, |
|
"learning_rate": 0.00013759497356354068, |
|
"loss": 0.025, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.0472837022132797, |
|
"grad_norm": 0.35980600118637085, |
|
"learning_rate": 0.00013728648209948886, |
|
"loss": 0.0344, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.052313883299799, |
|
"grad_norm": 0.23638148605823517, |
|
"learning_rate": 0.0001369775779389463, |
|
"loss": 0.029, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.057344064386318, |
|
"grad_norm": 0.21310564875602722, |
|
"learning_rate": 0.00013666826450094467, |
|
"loss": 0.0236, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.062374245472837, |
|
"grad_norm": 0.256998747587204, |
|
"learning_rate": 0.00013635854520904563, |
|
"loss": 0.023, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.067404426559356, |
|
"grad_norm": 0.23511482775211334, |
|
"learning_rate": 0.00013604842349130295, |
|
"loss": 0.0202, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.0724346076458753, |
|
"grad_norm": 0.3412628769874573, |
|
"learning_rate": 0.00013573790278022452, |
|
"loss": 0.023, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.0774647887323945, |
|
"grad_norm": 0.3615216314792633, |
|
"learning_rate": 0.0001354269865127344, |
|
"loss": 0.0217, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.0824949698189137, |
|
"grad_norm": 0.17151637375354767, |
|
"learning_rate": 0.0001351156781301348, |
|
"loss": 0.019, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.0875251509054324, |
|
"grad_norm": 0.29277974367141724, |
|
"learning_rate": 0.0001348039810780679, |
|
"loss": 0.0294, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.0925553319919517, |
|
"grad_norm": 0.2807733714580536, |
|
"learning_rate": 0.00013449189880647782, |
|
"loss": 0.0195, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.097585513078471, |
|
"grad_norm": 0.31441447138786316, |
|
"learning_rate": 0.00013417943476957248, |
|
"loss": 0.0203, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.10261569416499, |
|
"grad_norm": 0.3053465783596039, |
|
"learning_rate": 0.0001338665924257851, |
|
"loss": 0.0243, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.1076458752515093, |
|
"grad_norm": 0.22241270542144775, |
|
"learning_rate": 0.0001335533752377362, |
|
"loss": 0.0233, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 0.2186601608991623, |
|
"learning_rate": 0.00013323978667219513, |
|
"loss": 0.0215, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.1177062374245472, |
|
"grad_norm": 0.2129117101430893, |
|
"learning_rate": 0.00013292583020004184, |
|
"loss": 0.0236, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.1227364185110664, |
|
"grad_norm": 0.40424126386642456, |
|
"learning_rate": 0.00013261150929622822, |
|
"loss": 0.0286, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.1277665995975856, |
|
"grad_norm": 0.27489861845970154, |
|
"learning_rate": 0.00013229682743973992, |
|
"loss": 0.0241, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.132796780684105, |
|
"grad_norm": 0.20953650772571564, |
|
"learning_rate": 0.0001319817881135576, |
|
"loss": 0.0214, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.1378269617706236, |
|
"grad_norm": 0.3081333041191101, |
|
"learning_rate": 0.0001316663948046186, |
|
"loss": 0.0269, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 0.28377407789230347, |
|
"learning_rate": 0.00013135065100377814, |
|
"loss": 0.0207, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.147887323943662, |
|
"grad_norm": 0.28675928711891174, |
|
"learning_rate": 0.00013103456020577078, |
|
"loss": 0.0251, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.152917505030181, |
|
"grad_norm": 0.21341602504253387, |
|
"learning_rate": 0.00013071812590917175, |
|
"loss": 0.0244, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.1579476861167004, |
|
"grad_norm": 0.27167797088623047, |
|
"learning_rate": 0.0001304013516163583, |
|
"loss": 0.0348, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.162977867203219, |
|
"grad_norm": 0.2633381187915802, |
|
"learning_rate": 0.00013008424083347072, |
|
"loss": 0.0174, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.1680080482897384, |
|
"grad_norm": 0.30004456639289856, |
|
"learning_rate": 0.00012976679707037367, |
|
"loss": 0.0212, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.1730382293762576, |
|
"grad_norm": 0.2568664252758026, |
|
"learning_rate": 0.00012944902384061746, |
|
"loss": 0.0222, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.1780684104627768, |
|
"grad_norm": 0.4086577594280243, |
|
"learning_rate": 0.00012913092466139883, |
|
"loss": 0.0205, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.183098591549296, |
|
"grad_norm": 0.34458601474761963, |
|
"learning_rate": 0.00012881250305352233, |
|
"loss": 0.0166, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.1881287726358147, |
|
"grad_norm": 0.20635934174060822, |
|
"learning_rate": 0.00012849376254136125, |
|
"loss": 0.0232, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.193158953722334, |
|
"grad_norm": 0.39725396037101746, |
|
"learning_rate": 0.00012817470665281853, |
|
"loss": 0.0259, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.198189134808853, |
|
"grad_norm": 0.23941123485565186, |
|
"learning_rate": 0.0001278553389192878, |
|
"loss": 0.0243, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.2032193158953723, |
|
"grad_norm": 0.25489136576652527, |
|
"learning_rate": 0.00012753566287561428, |
|
"loss": 0.0234, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.2082494969818915, |
|
"grad_norm": 0.24230477213859558, |
|
"learning_rate": 0.00012721568206005562, |
|
"loss": 0.0283, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.2132796780684103, |
|
"grad_norm": 0.1920616328716278, |
|
"learning_rate": 0.00012689540001424284, |
|
"loss": 0.0176, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.2183098591549295, |
|
"grad_norm": 0.34041905403137207, |
|
"learning_rate": 0.00012657482028314096, |
|
"loss": 0.019, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.2233400402414487, |
|
"grad_norm": 0.16888068616390228, |
|
"learning_rate": 0.00012625394641500995, |
|
"loss": 0.0212, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.228370221327968, |
|
"grad_norm": 0.30867740511894226, |
|
"learning_rate": 0.00012593278196136525, |
|
"loss": 0.0209, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.233400402414487, |
|
"grad_norm": 0.2323102056980133, |
|
"learning_rate": 0.0001256113304769387, |
|
"loss": 0.0269, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.238430583501006, |
|
"grad_norm": 0.2643173038959503, |
|
"learning_rate": 0.00012528959551963897, |
|
"loss": 0.0204, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.243460764587525, |
|
"grad_norm": 0.26139459013938904, |
|
"learning_rate": 0.00012496758065051234, |
|
"loss": 0.0277, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.2484909456740443, |
|
"grad_norm": 0.5168742537498474, |
|
"learning_rate": 0.0001246452894337032, |
|
"loss": 0.0254, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"grad_norm": 0.4557676911354065, |
|
"learning_rate": 0.0001243227254364147, |
|
"loss": 0.0226, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.2585513078470827, |
|
"grad_norm": 0.3516198396682739, |
|
"learning_rate": 0.00012399989222886906, |
|
"loss": 0.0243, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.2635814889336014, |
|
"grad_norm": 0.20463688671588898, |
|
"learning_rate": 0.00012367679338426833, |
|
"loss": 0.0363, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.2686116700201207, |
|
"grad_norm": 0.384490042924881, |
|
"learning_rate": 0.00012335343247875456, |
|
"loss": 0.0248, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.27364185110664, |
|
"grad_norm": 0.24953685700893402, |
|
"learning_rate": 0.00012302981309137052, |
|
"loss": 0.0327, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.278672032193159, |
|
"grad_norm": 0.25503483414649963, |
|
"learning_rate": 0.00012270593880401974, |
|
"loss": 0.0289, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.2837022132796783, |
|
"grad_norm": 0.2341834008693695, |
|
"learning_rate": 0.00012238181320142722, |
|
"loss": 0.0222, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.288732394366197, |
|
"grad_norm": 0.27594631910324097, |
|
"learning_rate": 0.0001220574398710995, |
|
"loss": 0.0298, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.2937625754527162, |
|
"grad_norm": 0.26579129695892334, |
|
"learning_rate": 0.00012173282240328505, |
|
"loss": 0.0225, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.2987927565392354, |
|
"grad_norm": 0.2124788463115692, |
|
"learning_rate": 0.00012140796439093447, |
|
"loss": 0.0237, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.3038229376257546, |
|
"grad_norm": 0.2432093769311905, |
|
"learning_rate": 0.00012108286942966085, |
|
"loss": 0.0264, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.308853118712274, |
|
"grad_norm": 0.2657240629196167, |
|
"learning_rate": 0.00012075754111769984, |
|
"loss": 0.0193, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.3138832997987926, |
|
"grad_norm": 0.21074149012565613, |
|
"learning_rate": 0.00012043198305586986, |
|
"loss": 0.0176, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.318913480885312, |
|
"grad_norm": 0.29499852657318115, |
|
"learning_rate": 0.00012010619884753238, |
|
"loss": 0.0268, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.323943661971831, |
|
"grad_norm": 0.12746977806091309, |
|
"learning_rate": 0.00011978019209855174, |
|
"loss": 0.0241, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.32897384305835, |
|
"grad_norm": 0.18850328028202057, |
|
"learning_rate": 0.00011945396641725554, |
|
"loss": 0.0165, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.3340040241448694, |
|
"grad_norm": 0.26257720589637756, |
|
"learning_rate": 0.00011912752541439455, |
|
"loss": 0.0239, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.339034205231388, |
|
"grad_norm": 0.399151086807251, |
|
"learning_rate": 0.00011880087270310278, |
|
"loss": 0.0345, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.3440643863179074, |
|
"grad_norm": 0.308682382106781, |
|
"learning_rate": 0.00011847401189885745, |
|
"loss": 0.0268, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.3490945674044266, |
|
"grad_norm": 0.3351981043815613, |
|
"learning_rate": 0.00011814694661943906, |
|
"loss": 0.0262, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.3541247484909458, |
|
"grad_norm": 0.2986966371536255, |
|
"learning_rate": 0.00011781968048489127, |
|
"loss": 0.0199, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.359154929577465, |
|
"grad_norm": 0.18461820483207703, |
|
"learning_rate": 0.00011749221711748086, |
|
"loss": 0.0295, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.3641851106639837, |
|
"grad_norm": 0.33392488956451416, |
|
"learning_rate": 0.0001171645601416576, |
|
"loss": 0.0222, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.369215291750503, |
|
"grad_norm": 0.3629428446292877, |
|
"learning_rate": 0.0001168367131840142, |
|
"loss": 0.0228, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.374245472837022, |
|
"grad_norm": 0.3002311587333679, |
|
"learning_rate": 0.00011650867987324614, |
|
"loss": 0.0209, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.3792756539235413, |
|
"grad_norm": 0.24123568832874298, |
|
"learning_rate": 0.00011618046384011152, |
|
"loss": 0.0189, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.3843058350100605, |
|
"grad_norm": 0.3153541386127472, |
|
"learning_rate": 0.0001158520687173908, |
|
"loss": 0.0306, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.3893360160965793, |
|
"grad_norm": 0.36185353994369507, |
|
"learning_rate": 0.00011552349813984678, |
|
"loss": 0.0181, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"grad_norm": 0.21552976965904236, |
|
"learning_rate": 0.00011519475574418405, |
|
"loss": 0.0199, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.3993963782696177, |
|
"grad_norm": 0.3644528388977051, |
|
"learning_rate": 0.00011486584516900907, |
|
"loss": 0.0251, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.404426559356137, |
|
"grad_norm": 0.3115015923976898, |
|
"learning_rate": 0.00011453677005478968, |
|
"loss": 0.0343, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.409456740442656, |
|
"grad_norm": 0.41604316234588623, |
|
"learning_rate": 0.00011420753404381499, |
|
"loss": 0.03, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.414486921529175, |
|
"grad_norm": 0.3559543192386627, |
|
"learning_rate": 0.00011387814078015482, |
|
"loss": 0.0246, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.419517102615694, |
|
"grad_norm": 0.1890721470117569, |
|
"learning_rate": 0.00011354859390961958, |
|
"loss": 0.0209, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.4245472837022133, |
|
"grad_norm": 0.30657851696014404, |
|
"learning_rate": 0.00011321889707971979, |
|
"loss": 0.02, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.4295774647887325, |
|
"grad_norm": 0.2877327501773834, |
|
"learning_rate": 0.00011288905393962586, |
|
"loss": 0.0237, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.4346076458752517, |
|
"grad_norm": 0.3301834166049957, |
|
"learning_rate": 0.00011255906814012744, |
|
"loss": 0.0219, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.4396378269617705, |
|
"grad_norm": 0.21552981436252594, |
|
"learning_rate": 0.00011222894333359338, |
|
"loss": 0.0221, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.4446680080482897, |
|
"grad_norm": 0.33082151412963867, |
|
"learning_rate": 0.00011189868317393086, |
|
"loss": 0.021, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.449698189134809, |
|
"grad_norm": 0.30314934253692627, |
|
"learning_rate": 0.00011156829131654543, |
|
"loss": 0.0311, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.454728370221328, |
|
"grad_norm": 0.31226858496665955, |
|
"learning_rate": 0.00011123777141830008, |
|
"loss": 0.0272, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.4597585513078473, |
|
"grad_norm": 0.32737886905670166, |
|
"learning_rate": 0.00011090712713747514, |
|
"loss": 0.0248, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.464788732394366, |
|
"grad_norm": 0.2482103705406189, |
|
"learning_rate": 0.00011057636213372755, |
|
"loss": 0.0195, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.4698189134808852, |
|
"grad_norm": 0.4342176616191864, |
|
"learning_rate": 0.00011024548006805051, |
|
"loss": 0.0265, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.4748490945674044, |
|
"grad_norm": 0.3512166440486908, |
|
"learning_rate": 0.00010991448460273287, |
|
"loss": 0.0296, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.4798792756539236, |
|
"grad_norm": 0.4724549949169159, |
|
"learning_rate": 0.00010958337940131857, |
|
"loss": 0.0265, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.484909456740443, |
|
"grad_norm": 0.29837068915367126, |
|
"learning_rate": 0.00010925216812856621, |
|
"loss": 0.0249, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.4899396378269616, |
|
"grad_norm": 0.14961381256580353, |
|
"learning_rate": 0.00010892085445040836, |
|
"loss": 0.0198, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.494969818913481, |
|
"grad_norm": 0.2834800183773041, |
|
"learning_rate": 0.00010858944203391106, |
|
"loss": 0.0262, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.3048977255821228, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 0.0193, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.505030181086519, |
|
"grad_norm": 0.42306941747665405, |
|
"learning_rate": 0.00010792633565958603, |
|
"loss": 0.0164, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.5100603621730384, |
|
"grad_norm": 0.29460790753364563, |
|
"learning_rate": 0.00010759464904119229, |
|
"loss": 0.0219, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.515090543259557, |
|
"grad_norm": 0.2361808717250824, |
|
"learning_rate": 0.00010726287836324582, |
|
"loss": 0.0229, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5201207243460764, |
|
"grad_norm": 0.2822340130805969, |
|
"learning_rate": 0.00010693102729787088, |
|
"loss": 0.03, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.5251509054325956, |
|
"grad_norm": 0.3465881049633026, |
|
"learning_rate": 0.00010659909951808145, |
|
"loss": 0.0208, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.5301810865191148, |
|
"grad_norm": 0.27967023849487305, |
|
"learning_rate": 0.00010626709869774056, |
|
"loss": 0.0257, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"grad_norm": 0.4234648644924164, |
|
"learning_rate": 0.00010593502851151977, |
|
"loss": 0.0245, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.5402414486921527, |
|
"grad_norm": 0.2795238792896271, |
|
"learning_rate": 0.00010560289263485836, |
|
"loss": 0.0201, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.545271629778672, |
|
"grad_norm": 0.27361616492271423, |
|
"learning_rate": 0.00010527069474392266, |
|
"loss": 0.0152, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.550301810865191, |
|
"grad_norm": 0.21000875532627106, |
|
"learning_rate": 0.00010493843851556539, |
|
"loss": 0.0203, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.5553319919517103, |
|
"grad_norm": 0.265424519777298, |
|
"learning_rate": 0.00010460612762728498, |
|
"loss": 0.0227, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.5603621730382295, |
|
"grad_norm": 0.2341059446334839, |
|
"learning_rate": 0.00010427376575718488, |
|
"loss": 0.0257, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.5653923541247483, |
|
"grad_norm": 0.3186612129211426, |
|
"learning_rate": 0.00010394135658393278, |
|
"loss": 0.0249, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.5704225352112675, |
|
"grad_norm": 0.22894106805324554, |
|
"learning_rate": 0.00010360890378671997, |
|
"loss": 0.0222, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.5754527162977867, |
|
"grad_norm": 0.32082125544548035, |
|
"learning_rate": 0.00010327641104522052, |
|
"loss": 0.0258, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.580482897384306, |
|
"grad_norm": 0.21953271329402924, |
|
"learning_rate": 0.00010294388203955067, |
|
"loss": 0.0199, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.585513078470825, |
|
"grad_norm": 0.14508609473705292, |
|
"learning_rate": 0.00010261132045022804, |
|
"loss": 0.0219, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.590543259557344, |
|
"grad_norm": 0.23425619304180145, |
|
"learning_rate": 0.00010227872995813083, |
|
"loss": 0.0238, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.595573440643863, |
|
"grad_norm": 0.18086619675159454, |
|
"learning_rate": 0.00010194611424445721, |
|
"loss": 0.0225, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.6006036217303823, |
|
"grad_norm": 0.2463824599981308, |
|
"learning_rate": 0.0001016134769906845, |
|
"loss": 0.0199, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.6056338028169015, |
|
"grad_norm": 0.23146170377731323, |
|
"learning_rate": 0.00010128082187852846, |
|
"loss": 0.0215, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.6106639839034207, |
|
"grad_norm": 0.24659287929534912, |
|
"learning_rate": 0.00010094815258990241, |
|
"loss": 0.0192, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.6156941649899395, |
|
"grad_norm": 0.18079569935798645, |
|
"learning_rate": 0.00010061547280687664, |
|
"loss": 0.0201, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.6207243460764587, |
|
"grad_norm": 0.21298180520534515, |
|
"learning_rate": 0.00010028278621163762, |
|
"loss": 0.0208, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.625754527162978, |
|
"grad_norm": 0.21694862842559814, |
|
"learning_rate": 9.99500964864472e-05, |
|
"loss": 0.0151, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.630784708249497, |
|
"grad_norm": 0.2606973648071289, |
|
"learning_rate": 9.961740731360184e-05, |
|
"loss": 0.0219, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.6358148893360163, |
|
"grad_norm": 0.34560972452163696, |
|
"learning_rate": 9.928472237539196e-05, |
|
"loss": 0.0188, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.640845070422535, |
|
"grad_norm": 0.26653993129730225, |
|
"learning_rate": 9.895204535406104e-05, |
|
"loss": 0.0161, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.6458752515090542, |
|
"grad_norm": 0.16782517731189728, |
|
"learning_rate": 9.861937993176495e-05, |
|
"loss": 0.0167, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.6509054325955734, |
|
"grad_norm": 0.2072647660970688, |
|
"learning_rate": 9.828672979053119e-05, |
|
"loss": 0.0185, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.6559356136820926, |
|
"grad_norm": 0.34195080399513245, |
|
"learning_rate": 9.795409861221813e-05, |
|
"loss": 0.0156, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.660965794768612, |
|
"grad_norm": 0.3683469891548157, |
|
"learning_rate": 9.762149007847424e-05, |
|
"loss": 0.0166, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.6659959758551306, |
|
"grad_norm": 0.2584269940853119, |
|
"learning_rate": 9.728890787069737e-05, |
|
"loss": 0.0241, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.67102615694165, |
|
"grad_norm": 0.1505228877067566, |
|
"learning_rate": 9.695635566999397e-05, |
|
"loss": 0.0286, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"grad_norm": 0.27530187368392944, |
|
"learning_rate": 9.662383715713837e-05, |
|
"loss": 0.0243, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.681086519114688, |
|
"grad_norm": 0.3398008942604065, |
|
"learning_rate": 9.629135601253204e-05, |
|
"loss": 0.0286, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.6861167002012074, |
|
"grad_norm": 0.20086489617824554, |
|
"learning_rate": 9.595891591616282e-05, |
|
"loss": 0.0192, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.691146881287726, |
|
"grad_norm": 0.25084254145622253, |
|
"learning_rate": 9.562652054756429e-05, |
|
"loss": 0.0277, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.6961770623742454, |
|
"grad_norm": 0.25037065148353577, |
|
"learning_rate": 9.529417358577489e-05, |
|
"loss": 0.0214, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.7012072434607646, |
|
"grad_norm": 0.18306289613246918, |
|
"learning_rate": 9.496187870929732e-05, |
|
"loss": 0.0157, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.7062374245472838, |
|
"grad_norm": 0.17593130469322205, |
|
"learning_rate": 9.462963959605778e-05, |
|
"loss": 0.0206, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.711267605633803, |
|
"grad_norm": 0.3547375500202179, |
|
"learning_rate": 9.429745992336522e-05, |
|
"loss": 0.0217, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.7162977867203217, |
|
"grad_norm": 0.26571425795555115, |
|
"learning_rate": 9.396534336787081e-05, |
|
"loss": 0.0198, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.721327967806841, |
|
"grad_norm": 0.3025442957878113, |
|
"learning_rate": 9.363329360552703e-05, |
|
"loss": 0.021, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.72635814889336, |
|
"grad_norm": 0.30082857608795166, |
|
"learning_rate": 9.330131431154708e-05, |
|
"loss": 0.0214, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.7313883299798793, |
|
"grad_norm": 0.18924900889396667, |
|
"learning_rate": 9.29694091603642e-05, |
|
"loss": 0.0141, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.7364185110663986, |
|
"grad_norm": 0.28385961055755615, |
|
"learning_rate": 9.263758182559103e-05, |
|
"loss": 0.0235, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.7414486921529173, |
|
"grad_norm": 0.3015013635158539, |
|
"learning_rate": 9.230583597997888e-05, |
|
"loss": 0.0234, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.7464788732394365, |
|
"grad_norm": 0.3755188584327698, |
|
"learning_rate": 9.197417529537716e-05, |
|
"loss": 0.0206, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.7515090543259557, |
|
"grad_norm": 0.2895854413509369, |
|
"learning_rate": 9.16426034426926e-05, |
|
"loss": 0.0266, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.756539235412475, |
|
"grad_norm": 0.35291990637779236, |
|
"learning_rate": 9.131112409184886e-05, |
|
"loss": 0.0173, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.761569416498994, |
|
"grad_norm": 0.2543307840824127, |
|
"learning_rate": 9.097974091174568e-05, |
|
"loss": 0.0173, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.766599597585513, |
|
"grad_norm": 0.14231809973716736, |
|
"learning_rate": 9.064845757021833e-05, |
|
"loss": 0.0162, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.771629778672032, |
|
"grad_norm": 0.23540771007537842, |
|
"learning_rate": 9.031727773399709e-05, |
|
"loss": 0.0187, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.7766599597585513, |
|
"grad_norm": 0.2745067775249481, |
|
"learning_rate": 8.998620506866663e-05, |
|
"loss": 0.0195, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.7816901408450705, |
|
"grad_norm": 0.22975705564022064, |
|
"learning_rate": 8.965524323862535e-05, |
|
"loss": 0.0201, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.7867203219315897, |
|
"grad_norm": 0.2695607841014862, |
|
"learning_rate": 8.9324395907045e-05, |
|
"loss": 0.0152, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.7917505030181085, |
|
"grad_norm": 0.3059997260570526, |
|
"learning_rate": 8.899366673582994e-05, |
|
"loss": 0.0205, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.7967806841046277, |
|
"grad_norm": 0.2964552938938141, |
|
"learning_rate": 8.866305938557675e-05, |
|
"loss": 0.0216, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.801810865191147, |
|
"grad_norm": 0.23506313562393188, |
|
"learning_rate": 8.833257751553365e-05, |
|
"loss": 0.0223, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.806841046277666, |
|
"grad_norm": 0.2811063230037689, |
|
"learning_rate": 8.800222478356e-05, |
|
"loss": 0.0153, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.8118712273641853, |
|
"grad_norm": 0.1559114307165146, |
|
"learning_rate": 8.767200484608584e-05, |
|
"loss": 0.0235, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 0.3868955075740814, |
|
"learning_rate": 8.734192135807142e-05, |
|
"loss": 0.0238, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.8219315895372232, |
|
"grad_norm": 0.3004298210144043, |
|
"learning_rate": 8.701197797296671e-05, |
|
"loss": 0.0205, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.8269617706237424, |
|
"grad_norm": 0.30548134446144104, |
|
"learning_rate": 8.668217834267096e-05, |
|
"loss": 0.0213, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.8319919517102616, |
|
"grad_norm": 0.4484052360057831, |
|
"learning_rate": 8.635252611749239e-05, |
|
"loss": 0.015, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.837022132796781, |
|
"grad_norm": 0.2639616131782532, |
|
"learning_rate": 8.602302494610764e-05, |
|
"loss": 0.0241, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.8420523138832996, |
|
"grad_norm": 0.290503591299057, |
|
"learning_rate": 8.569367847552143e-05, |
|
"loss": 0.0215, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.847082494969819, |
|
"grad_norm": 0.2626757025718689, |
|
"learning_rate": 8.536449035102624e-05, |
|
"loss": 0.0249, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.852112676056338, |
|
"grad_norm": 0.18214431405067444, |
|
"learning_rate": 8.50354642161619e-05, |
|
"loss": 0.0155, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.23727771639823914, |
|
"learning_rate": 8.47066037126754e-05, |
|
"loss": 0.027, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.8621730382293764, |
|
"grad_norm": 0.2826462984085083, |
|
"learning_rate": 8.437791248048037e-05, |
|
"loss": 0.0189, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.867203219315895, |
|
"grad_norm": 0.2718562185764313, |
|
"learning_rate": 8.404939415761693e-05, |
|
"loss": 0.0235, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.8722334004024144, |
|
"grad_norm": 0.27361899614334106, |
|
"learning_rate": 8.372105238021138e-05, |
|
"loss": 0.0235, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.8772635814889336, |
|
"grad_norm": 0.18473175168037415, |
|
"learning_rate": 8.339289078243602e-05, |
|
"loss": 0.0158, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.8822937625754528, |
|
"grad_norm": 0.24013389647006989, |
|
"learning_rate": 8.306491299646884e-05, |
|
"loss": 0.0215, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.887323943661972, |
|
"grad_norm": 0.22806933522224426, |
|
"learning_rate": 8.273712265245336e-05, |
|
"loss": 0.0148, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.8923541247484907, |
|
"grad_norm": 0.23373086750507355, |
|
"learning_rate": 8.240952337845844e-05, |
|
"loss": 0.0224, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.89738430583501, |
|
"grad_norm": 0.19605842232704163, |
|
"learning_rate": 8.208211880043812e-05, |
|
"loss": 0.0146, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.902414486921529, |
|
"grad_norm": 0.2649703621864319, |
|
"learning_rate": 8.175491254219151e-05, |
|
"loss": 0.0168, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.9074446680080483, |
|
"grad_norm": 0.1927386075258255, |
|
"learning_rate": 8.142790822532266e-05, |
|
"loss": 0.017, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.9124748490945676, |
|
"grad_norm": 0.18656544387340546, |
|
"learning_rate": 8.110110946920049e-05, |
|
"loss": 0.0198, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.9175050301810863, |
|
"grad_norm": 0.1952710896730423, |
|
"learning_rate": 8.077451989091868e-05, |
|
"loss": 0.0172, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.9225352112676055, |
|
"grad_norm": 0.31542059779167175, |
|
"learning_rate": 8.044814310525574e-05, |
|
"loss": 0.024, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.9275653923541247, |
|
"grad_norm": 0.22629360854625702, |
|
"learning_rate": 8.012198272463486e-05, |
|
"loss": 0.0234, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.932595573440644, |
|
"grad_norm": 0.2527533769607544, |
|
"learning_rate": 7.979604235908406e-05, |
|
"loss": 0.0192, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.937625754527163, |
|
"grad_norm": 0.27879974246025085, |
|
"learning_rate": 7.947032561619617e-05, |
|
"loss": 0.0162, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.942655935613682, |
|
"grad_norm": 0.33129119873046875, |
|
"learning_rate": 7.914483610108888e-05, |
|
"loss": 0.0196, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.947686116700201, |
|
"grad_norm": 0.29584264755249023, |
|
"learning_rate": 7.881957741636486e-05, |
|
"loss": 0.0226, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.9527162977867203, |
|
"grad_norm": 0.3168732821941376, |
|
"learning_rate": 7.849455316207197e-05, |
|
"loss": 0.0178, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"grad_norm": 0.1575564593076706, |
|
"learning_rate": 7.816976693566324e-05, |
|
"loss": 0.0221, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.9627766599597587, |
|
"grad_norm": 0.2546273469924927, |
|
"learning_rate": 7.784522233195716e-05, |
|
"loss": 0.0167, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.9678068410462775, |
|
"grad_norm": 0.19285175204277039, |
|
"learning_rate": 7.75209229430979e-05, |
|
"loss": 0.0196, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.9728370221327967, |
|
"grad_norm": 0.25114384293556213, |
|
"learning_rate": 7.719687235851554e-05, |
|
"loss": 0.0224, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.977867203219316, |
|
"grad_norm": 0.2990531623363495, |
|
"learning_rate": 7.687307416488625e-05, |
|
"loss": 0.0185, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.982897384305835, |
|
"grad_norm": 0.19075430929660797, |
|
"learning_rate": 7.654953194609281e-05, |
|
"loss": 0.0238, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.9879275653923543, |
|
"grad_norm": 0.28794053196907043, |
|
"learning_rate": 7.62262492831847e-05, |
|
"loss": 0.0198, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.992957746478873, |
|
"grad_norm": 0.26047641038894653, |
|
"learning_rate": 7.590322975433857e-05, |
|
"loss": 0.014, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.9979879275653922, |
|
"grad_norm": 0.2440265715122223, |
|
"learning_rate": 7.558047693481868e-05, |
|
"loss": 0.017, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 3.0030181086519114, |
|
"grad_norm": 0.21606865525245667, |
|
"learning_rate": 7.525799439693723e-05, |
|
"loss": 0.0173, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 3.0080482897384306, |
|
"grad_norm": 0.24635550379753113, |
|
"learning_rate": 7.493578571001497e-05, |
|
"loss": 0.0117, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 3.01307847082495, |
|
"grad_norm": 0.17602556943893433, |
|
"learning_rate": 7.461385444034145e-05, |
|
"loss": 0.0149, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 3.0181086519114686, |
|
"grad_norm": 0.2443585842847824, |
|
"learning_rate": 7.429220415113582e-05, |
|
"loss": 0.02, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.023138832997988, |
|
"grad_norm": 0.25802722573280334, |
|
"learning_rate": 7.39708384025072e-05, |
|
"loss": 0.0185, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 3.028169014084507, |
|
"grad_norm": 0.1731959879398346, |
|
"learning_rate": 7.364976075141534e-05, |
|
"loss": 0.0159, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 3.033199195171026, |
|
"grad_norm": 0.2295977920293808, |
|
"learning_rate": 7.33289747516313e-05, |
|
"loss": 0.0223, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 3.0382293762575454, |
|
"grad_norm": 0.27841848134994507, |
|
"learning_rate": 7.300848395369801e-05, |
|
"loss": 0.0246, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 3.043259557344064, |
|
"grad_norm": 0.2819521725177765, |
|
"learning_rate": 7.268829190489111e-05, |
|
"loss": 0.0175, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.0482897384305834, |
|
"grad_norm": 0.32469579577445984, |
|
"learning_rate": 7.236840214917949e-05, |
|
"loss": 0.0179, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 3.0533199195171026, |
|
"grad_norm": 0.28703153133392334, |
|
"learning_rate": 7.204881822718625e-05, |
|
"loss": 0.0191, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 3.058350100603622, |
|
"grad_norm": 0.27047568559646606, |
|
"learning_rate": 7.172954367614945e-05, |
|
"loss": 0.0162, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 3.063380281690141, |
|
"grad_norm": 0.2722102403640747, |
|
"learning_rate": 7.141058202988293e-05, |
|
"loss": 0.0189, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 3.0684104627766597, |
|
"grad_norm": 0.2391163408756256, |
|
"learning_rate": 7.109193681873726e-05, |
|
"loss": 0.0175, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.073440643863179, |
|
"grad_norm": 0.4041613042354584, |
|
"learning_rate": 7.077361156956055e-05, |
|
"loss": 0.021, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 3.078470824949698, |
|
"grad_norm": 0.2219713032245636, |
|
"learning_rate": 7.045560980565957e-05, |
|
"loss": 0.0169, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.0835010060362174, |
|
"grad_norm": 0.21718035638332367, |
|
"learning_rate": 7.013793504676061e-05, |
|
"loss": 0.0194, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 3.0885311871227366, |
|
"grad_norm": 0.19979757070541382, |
|
"learning_rate": 6.982059080897059e-05, |
|
"loss": 0.0165, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 3.0935613682092553, |
|
"grad_norm": 0.2233651578426361, |
|
"learning_rate": 6.950358060473814e-05, |
|
"loss": 0.0151, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"grad_norm": 0.24543243646621704, |
|
"learning_rate": 6.918690794281475e-05, |
|
"loss": 0.019, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 3.1036217303822937, |
|
"grad_norm": 0.3978748917579651, |
|
"learning_rate": 6.887057632821587e-05, |
|
"loss": 0.0216, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 3.108651911468813, |
|
"grad_norm": 0.22909055650234222, |
|
"learning_rate": 6.855458926218219e-05, |
|
"loss": 0.0206, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 3.113682092555332, |
|
"grad_norm": 0.32850995659828186, |
|
"learning_rate": 6.823895024214077e-05, |
|
"loss": 0.0145, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 3.118712273641851, |
|
"grad_norm": 0.22510278224945068, |
|
"learning_rate": 6.79236627616665e-05, |
|
"loss": 0.0215, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.12374245472837, |
|
"grad_norm": 0.2886231541633606, |
|
"learning_rate": 6.760873031044328e-05, |
|
"loss": 0.0152, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 3.1287726358148893, |
|
"grad_norm": 0.17671062052249908, |
|
"learning_rate": 6.729415637422543e-05, |
|
"loss": 0.0242, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 3.1338028169014085, |
|
"grad_norm": 0.3519156873226166, |
|
"learning_rate": 6.69799444347992e-05, |
|
"loss": 0.0158, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 3.1388329979879277, |
|
"grad_norm": 0.2459878772497177, |
|
"learning_rate": 6.666609796994416e-05, |
|
"loss": 0.0175, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 3.1438631790744465, |
|
"grad_norm": 0.20523589849472046, |
|
"learning_rate": 6.635262045339465e-05, |
|
"loss": 0.0174, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.1488933601609657, |
|
"grad_norm": 0.23958848416805267, |
|
"learning_rate": 6.603951535480146e-05, |
|
"loss": 0.0215, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 3.153923541247485, |
|
"grad_norm": 0.21609632670879364, |
|
"learning_rate": 6.572678613969331e-05, |
|
"loss": 0.0205, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 3.158953722334004, |
|
"grad_norm": 0.20294173061847687, |
|
"learning_rate": 6.541443626943855e-05, |
|
"loss": 0.0177, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 3.1639839034205233, |
|
"grad_norm": 0.21324007213115692, |
|
"learning_rate": 6.51024692012069e-05, |
|
"loss": 0.0187, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 3.169014084507042, |
|
"grad_norm": 0.42404302954673767, |
|
"learning_rate": 6.479088838793106e-05, |
|
"loss": 0.0205, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.1740442655935612, |
|
"grad_norm": 0.2945919334888458, |
|
"learning_rate": 6.447969727826859e-05, |
|
"loss": 0.0148, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 3.1790744466800804, |
|
"grad_norm": 0.3685127794742584, |
|
"learning_rate": 6.41688993165637e-05, |
|
"loss": 0.0194, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 3.1841046277665996, |
|
"grad_norm": 0.295194536447525, |
|
"learning_rate": 6.385849794280915e-05, |
|
"loss": 0.0202, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 3.189134808853119, |
|
"grad_norm": 0.2186952531337738, |
|
"learning_rate": 6.354849659260815e-05, |
|
"loss": 0.0172, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 3.1941649899396376, |
|
"grad_norm": 0.22984078526496887, |
|
"learning_rate": 6.323889869713637e-05, |
|
"loss": 0.015, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.199195171026157, |
|
"grad_norm": 0.2033555507659912, |
|
"learning_rate": 6.292970768310387e-05, |
|
"loss": 0.0145, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.204225352112676, |
|
"grad_norm": 0.3300851583480835, |
|
"learning_rate": 6.262092697271732e-05, |
|
"loss": 0.0202, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.209255533199195, |
|
"grad_norm": 0.2983047366142273, |
|
"learning_rate": 6.2312559983642e-05, |
|
"loss": 0.0174, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 3.2142857142857144, |
|
"grad_norm": 0.1934136003255844, |
|
"learning_rate": 6.200461012896402e-05, |
|
"loss": 0.0233, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 3.219315895372233, |
|
"grad_norm": 0.23245446383953094, |
|
"learning_rate": 6.169708081715253e-05, |
|
"loss": 0.018, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.2243460764587524, |
|
"grad_norm": 0.24971647560596466, |
|
"learning_rate": 6.1389975452022e-05, |
|
"loss": 0.0167, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 3.2293762575452716, |
|
"grad_norm": 0.23975920677185059, |
|
"learning_rate": 6.108329743269461e-05, |
|
"loss": 0.014, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 3.234406438631791, |
|
"grad_norm": 0.14767713844776154, |
|
"learning_rate": 6.07770501535625e-05, |
|
"loss": 0.019, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"grad_norm": 0.22770993411540985, |
|
"learning_rate": 6.047123700425026e-05, |
|
"loss": 0.0128, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 3.2444668008048287, |
|
"grad_norm": 0.2288396656513214, |
|
"learning_rate": 6.016586136957745e-05, |
|
"loss": 0.0173, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.249496981891348, |
|
"grad_norm": 0.1313876062631607, |
|
"learning_rate": 5.98609266295211e-05, |
|
"loss": 0.0131, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 3.254527162977867, |
|
"grad_norm": 0.18414969742298126, |
|
"learning_rate": 5.955643615917825e-05, |
|
"loss": 0.016, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 3.2595573440643864, |
|
"grad_norm": 0.19654971361160278, |
|
"learning_rate": 5.925239332872878e-05, |
|
"loss": 0.0128, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 3.2645875251509056, |
|
"grad_norm": 0.1665862649679184, |
|
"learning_rate": 5.894880150339783e-05, |
|
"loss": 0.0148, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 3.2696177062374243, |
|
"grad_norm": 0.1726180911064148, |
|
"learning_rate": 5.864566404341878e-05, |
|
"loss": 0.0147, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.2746478873239435, |
|
"grad_norm": 0.2508947253227234, |
|
"learning_rate": 5.834298430399594e-05, |
|
"loss": 0.0212, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 3.2796780684104627, |
|
"grad_norm": 0.18195697665214539, |
|
"learning_rate": 5.804076563526744e-05, |
|
"loss": 0.0163, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 3.284708249496982, |
|
"grad_norm": 0.38305532932281494, |
|
"learning_rate": 5.773901138226826e-05, |
|
"loss": 0.0133, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 3.289738430583501, |
|
"grad_norm": 0.19784590601921082, |
|
"learning_rate": 5.743772488489294e-05, |
|
"loss": 0.0191, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.29476861167002, |
|
"grad_norm": 0.11469227820634842, |
|
"learning_rate": 5.71369094778589e-05, |
|
"loss": 0.0188, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.299798792756539, |
|
"grad_norm": 0.2073812335729599, |
|
"learning_rate": 5.6836568490669384e-05, |
|
"loss": 0.0117, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 3.3048289738430583, |
|
"grad_norm": 0.12024883180856705, |
|
"learning_rate": 5.653670524757667e-05, |
|
"loss": 0.0167, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 3.3098591549295775, |
|
"grad_norm": 0.1882588267326355, |
|
"learning_rate": 5.623732306754511e-05, |
|
"loss": 0.0123, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 3.3148893360160967, |
|
"grad_norm": 0.13178786635398865, |
|
"learning_rate": 5.5938425264214657e-05, |
|
"loss": 0.0156, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 3.3199195171026155, |
|
"grad_norm": 0.1943099945783615, |
|
"learning_rate": 5.564001514586403e-05, |
|
"loss": 0.0117, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.3249496981891347, |
|
"grad_norm": 0.2532840073108673, |
|
"learning_rate": 5.534209601537407e-05, |
|
"loss": 0.0157, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 3.329979879275654, |
|
"grad_norm": 0.216691792011261, |
|
"learning_rate": 5.50446711701913e-05, |
|
"loss": 0.0195, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 3.335010060362173, |
|
"grad_norm": 0.1876860409975052, |
|
"learning_rate": 5.474774390229129e-05, |
|
"loss": 0.0152, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.3400402414486923, |
|
"grad_norm": 0.11754748225212097, |
|
"learning_rate": 5.4451317498142365e-05, |
|
"loss": 0.0159, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.345070422535211, |
|
"grad_norm": 0.5310174822807312, |
|
"learning_rate": 5.4155395238669185e-05, |
|
"loss": 0.0155, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.3501006036217302, |
|
"grad_norm": 0.3709560036659241, |
|
"learning_rate": 5.385998039921627e-05, |
|
"loss": 0.0194, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.3551307847082494, |
|
"grad_norm": 0.25374695658683777, |
|
"learning_rate": 5.3565076249512034e-05, |
|
"loss": 0.0184, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.3601609657947686, |
|
"grad_norm": 0.2403491884469986, |
|
"learning_rate": 5.3270686053632323e-05, |
|
"loss": 0.0153, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.365191146881288, |
|
"grad_norm": 0.23633185029029846, |
|
"learning_rate": 5.29768130699645e-05, |
|
"loss": 0.0161, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.3702213279678066, |
|
"grad_norm": 0.3900575637817383, |
|
"learning_rate": 5.268346055117129e-05, |
|
"loss": 0.0258, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.375251509054326, |
|
"grad_norm": 0.141703262925148, |
|
"learning_rate": 5.239063174415466e-05, |
|
"loss": 0.0121, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"grad_norm": 0.17671038210391998, |
|
"learning_rate": 5.209832989002015e-05, |
|
"loss": 0.0149, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.385311871227364, |
|
"grad_norm": 0.24591059982776642, |
|
"learning_rate": 5.18065582240407e-05, |
|
"loss": 0.0234, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.3903420523138834, |
|
"grad_norm": 0.26220864057540894, |
|
"learning_rate": 5.151531997562116e-05, |
|
"loss": 0.0255, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.395372233400402, |
|
"grad_norm": 0.29695531725883484, |
|
"learning_rate": 5.122461836826218e-05, |
|
"loss": 0.019, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.4004024144869214, |
|
"grad_norm": 0.12897849082946777, |
|
"learning_rate": 5.0934456619524896e-05, |
|
"loss": 0.0191, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.4054325955734406, |
|
"grad_norm": 0.16902518272399902, |
|
"learning_rate": 5.064483794099508e-05, |
|
"loss": 0.0207, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.41046277665996, |
|
"grad_norm": 0.20852209627628326, |
|
"learning_rate": 5.0355765538247636e-05, |
|
"loss": 0.0146, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.415492957746479, |
|
"grad_norm": 0.16347025334835052, |
|
"learning_rate": 5.006724261081118e-05, |
|
"loss": 0.0186, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.4205231388329977, |
|
"grad_norm": 0.18849849700927734, |
|
"learning_rate": 4.977927235213259e-05, |
|
"loss": 0.0217, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.425553319919517, |
|
"grad_norm": 0.30719512701034546, |
|
"learning_rate": 4.9491857949541696e-05, |
|
"loss": 0.0176, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.430583501006036, |
|
"grad_norm": 0.21589145064353943, |
|
"learning_rate": 4.9205002584215855e-05, |
|
"loss": 0.015, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.4356136820925554, |
|
"grad_norm": 0.23759104311466217, |
|
"learning_rate": 4.891870943114496e-05, |
|
"loss": 0.0198, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.4406438631790746, |
|
"grad_norm": 0.26880696415901184, |
|
"learning_rate": 4.86329816590962e-05, |
|
"loss": 0.0142, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.4456740442655933, |
|
"grad_norm": 0.192754328250885, |
|
"learning_rate": 4.8347822430578856e-05, |
|
"loss": 0.0166, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.4507042253521125, |
|
"grad_norm": 0.12232445925474167, |
|
"learning_rate": 4.80632349018096e-05, |
|
"loss": 0.0175, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.4557344064386317, |
|
"grad_norm": 0.1528378129005432, |
|
"learning_rate": 4.777922222267721e-05, |
|
"loss": 0.0134, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.460764587525151, |
|
"grad_norm": 0.37869587540626526, |
|
"learning_rate": 4.749578753670799e-05, |
|
"loss": 0.0209, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.46579476861167, |
|
"grad_norm": 0.24779389798641205, |
|
"learning_rate": 4.721293398103086e-05, |
|
"loss": 0.0167, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.470824949698189, |
|
"grad_norm": 0.1252845823764801, |
|
"learning_rate": 4.6930664686342526e-05, |
|
"loss": 0.0118, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.475855130784708, |
|
"grad_norm": 0.28004470467567444, |
|
"learning_rate": 4.664898277687313e-05, |
|
"loss": 0.0229, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.4808853118712273, |
|
"grad_norm": 0.1652926206588745, |
|
"learning_rate": 4.636789137035129e-05, |
|
"loss": 0.0201, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.4859154929577465, |
|
"grad_norm": 0.12315156310796738, |
|
"learning_rate": 4.6087393577969926e-05, |
|
"loss": 0.0133, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 3.4909456740442657, |
|
"grad_norm": 0.2795407772064209, |
|
"learning_rate": 4.580749250435158e-05, |
|
"loss": 0.0148, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 3.4959758551307845, |
|
"grad_norm": 0.25727009773254395, |
|
"learning_rate": 4.5528191247514226e-05, |
|
"loss": 0.0161, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.5010060362173037, |
|
"grad_norm": 0.2040313482284546, |
|
"learning_rate": 4.524949289883692e-05, |
|
"loss": 0.0169, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 3.506036217303823, |
|
"grad_norm": 0.21510617434978485, |
|
"learning_rate": 4.497140054302548e-05, |
|
"loss": 0.019, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 3.511066398390342, |
|
"grad_norm": 0.22986271977424622, |
|
"learning_rate": 4.469391725807854e-05, |
|
"loss": 0.0177, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 3.5160965794768613, |
|
"grad_norm": 0.1664620190858841, |
|
"learning_rate": 4.4417046115253304e-05, |
|
"loss": 0.0207, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"grad_norm": 0.24035824835300446, |
|
"learning_rate": 4.414079017903166e-05, |
|
"loss": 0.021, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.5261569416498992, |
|
"grad_norm": 0.2778620719909668, |
|
"learning_rate": 4.386515250708627e-05, |
|
"loss": 0.0179, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 3.5311871227364184, |
|
"grad_norm": 0.2031363546848297, |
|
"learning_rate": 4.3590136150246555e-05, |
|
"loss": 0.0151, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 3.5362173038229376, |
|
"grad_norm": 0.29513317346572876, |
|
"learning_rate": 4.3315744152465276e-05, |
|
"loss": 0.0166, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 3.541247484909457, |
|
"grad_norm": 0.2097848355770111, |
|
"learning_rate": 4.3041979550784395e-05, |
|
"loss": 0.0142, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 3.5462776659959756, |
|
"grad_norm": 0.21217374503612518, |
|
"learning_rate": 4.276884537530187e-05, |
|
"loss": 0.0131, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.551307847082495, |
|
"grad_norm": 0.18939641118049622, |
|
"learning_rate": 4.2496344649137776e-05, |
|
"loss": 0.0166, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 3.556338028169014, |
|
"grad_norm": 0.17117512226104736, |
|
"learning_rate": 4.222448038840113e-05, |
|
"loss": 0.0187, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 3.561368209255533, |
|
"grad_norm": 0.15176957845687866, |
|
"learning_rate": 4.1953255602156394e-05, |
|
"loss": 0.0193, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.5663983903420524, |
|
"grad_norm": 0.3019493818283081, |
|
"learning_rate": 4.168267329239002e-05, |
|
"loss": 0.0171, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.18399718403816223, |
|
"learning_rate": 4.141273645397754e-05, |
|
"loss": 0.018, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.5764587525150904, |
|
"grad_norm": 0.16979122161865234, |
|
"learning_rate": 4.114344807465007e-05, |
|
"loss": 0.0209, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 3.5814889336016096, |
|
"grad_norm": 0.2553984820842743, |
|
"learning_rate": 4.087481113496159e-05, |
|
"loss": 0.0123, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 3.586519114688129, |
|
"grad_norm": 0.20643579959869385, |
|
"learning_rate": 4.060682860825559e-05, |
|
"loss": 0.0159, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 3.591549295774648, |
|
"grad_norm": 0.20078648626804352, |
|
"learning_rate": 4.033950346063248e-05, |
|
"loss": 0.0142, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 3.5965794768611667, |
|
"grad_norm": 0.13958579301834106, |
|
"learning_rate": 4.007283865091662e-05, |
|
"loss": 0.0153, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.6016096579476864, |
|
"grad_norm": 0.21849416196346283, |
|
"learning_rate": 3.98068371306235e-05, |
|
"loss": 0.022, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 3.606639839034205, |
|
"grad_norm": 0.2273043692111969, |
|
"learning_rate": 3.954150184392723e-05, |
|
"loss": 0.0141, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 3.6116700201207244, |
|
"grad_norm": 0.18715119361877441, |
|
"learning_rate": 3.927683572762778e-05, |
|
"loss": 0.0149, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 3.6167002012072436, |
|
"grad_norm": 0.27733615040779114, |
|
"learning_rate": 3.9012841711118677e-05, |
|
"loss": 0.0177, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 3.6217303822937623, |
|
"grad_norm": 0.22101663053035736, |
|
"learning_rate": 3.874952271635444e-05, |
|
"loss": 0.0177, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.626760563380282, |
|
"grad_norm": 0.21932683885097504, |
|
"learning_rate": 3.848688165781819e-05, |
|
"loss": 0.0126, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 3.6317907444668007, |
|
"grad_norm": 0.18129754066467285, |
|
"learning_rate": 3.82249214424896e-05, |
|
"loss": 0.0133, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 3.63682092555332, |
|
"grad_norm": 0.2246370166540146, |
|
"learning_rate": 3.796364496981247e-05, |
|
"loss": 0.0148, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 3.641851106639839, |
|
"grad_norm": 0.22842873632907867, |
|
"learning_rate": 3.7703055131662854e-05, |
|
"loss": 0.0129, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 3.646881287726358, |
|
"grad_norm": 0.2930648922920227, |
|
"learning_rate": 3.744315481231694e-05, |
|
"loss": 0.0167, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.6519114688128775, |
|
"grad_norm": 0.17970743775367737, |
|
"learning_rate": 3.7183946888419066e-05, |
|
"loss": 0.0135, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 3.6569416498993963, |
|
"grad_norm": 0.342587411403656, |
|
"learning_rate": 3.692543422895004e-05, |
|
"loss": 0.0183, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"grad_norm": 0.17181113362312317, |
|
"learning_rate": 3.6667619695195285e-05, |
|
"loss": 0.0112, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 3.6670020120724347, |
|
"grad_norm": 0.14279602468013763, |
|
"learning_rate": 3.6410506140713216e-05, |
|
"loss": 0.0139, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 3.6720321931589535, |
|
"grad_norm": 0.15404970943927765, |
|
"learning_rate": 3.615409641130351e-05, |
|
"loss": 0.0197, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.677062374245473, |
|
"grad_norm": 0.21196570992469788, |
|
"learning_rate": 3.589839334497587e-05, |
|
"loss": 0.0114, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 3.682092555331992, |
|
"grad_norm": 0.14234349131584167, |
|
"learning_rate": 3.564339977191834e-05, |
|
"loss": 0.0122, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 3.687122736418511, |
|
"grad_norm": 0.3660185933113098, |
|
"learning_rate": 3.538911851446619e-05, |
|
"loss": 0.0153, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 3.6921529175050303, |
|
"grad_norm": 0.20586389303207397, |
|
"learning_rate": 3.5135552387070636e-05, |
|
"loss": 0.0167, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 3.697183098591549, |
|
"grad_norm": 0.19840209186077118, |
|
"learning_rate": 3.48827041962675e-05, |
|
"loss": 0.0131, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.7022132796780687, |
|
"grad_norm": 0.3007209897041321, |
|
"learning_rate": 3.463057674064646e-05, |
|
"loss": 0.0223, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 3.7072434607645874, |
|
"grad_norm": 0.20882894098758698, |
|
"learning_rate": 3.437917281081975e-05, |
|
"loss": 0.0204, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 3.7122736418511066, |
|
"grad_norm": 0.19728295505046844, |
|
"learning_rate": 3.412849518939155e-05, |
|
"loss": 0.018, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 3.717303822937626, |
|
"grad_norm": 0.2532462179660797, |
|
"learning_rate": 3.387854665092709e-05, |
|
"loss": 0.0161, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 3.7223340040241446, |
|
"grad_norm": 0.1997775435447693, |
|
"learning_rate": 3.3629329961921765e-05, |
|
"loss": 0.0112, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.7273641851106643, |
|
"grad_norm": 0.17711062729358673, |
|
"learning_rate": 3.338084788077085e-05, |
|
"loss": 0.0156, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.732394366197183, |
|
"grad_norm": 0.1401996910572052, |
|
"learning_rate": 3.313310315773864e-05, |
|
"loss": 0.0281, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 3.737424547283702, |
|
"grad_norm": 0.1834188550710678, |
|
"learning_rate": 3.288609853492827e-05, |
|
"loss": 0.0162, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 3.7424547283702214, |
|
"grad_norm": 0.19655165076255798, |
|
"learning_rate": 3.2639836746251216e-05, |
|
"loss": 0.0124, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 3.74748490945674, |
|
"grad_norm": 0.17778749763965607, |
|
"learning_rate": 3.2394320517397015e-05, |
|
"loss": 0.0153, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.75251509054326, |
|
"grad_norm": 0.16552262008190155, |
|
"learning_rate": 3.214955256580327e-05, |
|
"loss": 0.017, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 3.7575452716297786, |
|
"grad_norm": 0.19796250760555267, |
|
"learning_rate": 3.1905535600625314e-05, |
|
"loss": 0.0182, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 3.762575452716298, |
|
"grad_norm": 0.19010251760482788, |
|
"learning_rate": 3.166227232270651e-05, |
|
"loss": 0.0182, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 3.767605633802817, |
|
"grad_norm": 0.21966631710529327, |
|
"learning_rate": 3.141976542454806e-05, |
|
"loss": 0.018, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 3.7726358148893357, |
|
"grad_norm": 0.14321380853652954, |
|
"learning_rate": 3.117801759027959e-05, |
|
"loss": 0.0139, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.7776659959758554, |
|
"grad_norm": 0.22963641583919525, |
|
"learning_rate": 3.093703149562892e-05, |
|
"loss": 0.0112, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 3.782696177062374, |
|
"grad_norm": 0.15116992592811584, |
|
"learning_rate": 3.069680980789294e-05, |
|
"loss": 0.0193, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 3.7877263581488934, |
|
"grad_norm": 0.14807961881160736, |
|
"learning_rate": 3.0457355185907877e-05, |
|
"loss": 0.0222, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 3.7927565392354126, |
|
"grad_norm": 0.19666004180908203, |
|
"learning_rate": 3.0218670280019745e-05, |
|
"loss": 0.0138, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 3.7977867203219313, |
|
"grad_norm": 0.22170531749725342, |
|
"learning_rate": 2.9980757732055277e-05, |
|
"loss": 0.0185, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"grad_norm": 0.1371658444404602, |
|
"learning_rate": 2.974362017529242e-05, |
|
"loss": 0.0135, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 3.8078470824949697, |
|
"grad_norm": 0.1677546203136444, |
|
"learning_rate": 2.9507260234431444e-05, |
|
"loss": 0.0096, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 3.812877263581489, |
|
"grad_norm": 0.2982484996318817, |
|
"learning_rate": 2.9271680525565724e-05, |
|
"loss": 0.0155, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 3.817907444668008, |
|
"grad_norm": 0.23418201506137848, |
|
"learning_rate": 2.9036883656152734e-05, |
|
"loss": 0.0151, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 3.822937625754527, |
|
"grad_norm": 0.30079197883605957, |
|
"learning_rate": 2.8802872224985434e-05, |
|
"loss": 0.0167, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.8279678068410465, |
|
"grad_norm": 0.16451403498649597, |
|
"learning_rate": 2.8569648822163185e-05, |
|
"loss": 0.0113, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 3.8329979879275653, |
|
"grad_norm": 0.28625065088272095, |
|
"learning_rate": 2.8337216029063395e-05, |
|
"loss": 0.0146, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 3.8380281690140845, |
|
"grad_norm": 0.22233416140079498, |
|
"learning_rate": 2.810557641831266e-05, |
|
"loss": 0.0201, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 3.8430583501006037, |
|
"grad_norm": 0.10642603039741516, |
|
"learning_rate": 2.787473255375853e-05, |
|
"loss": 0.0125, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 3.8480885311871225, |
|
"grad_norm": 0.2190830409526825, |
|
"learning_rate": 2.7644686990441027e-05, |
|
"loss": 0.0169, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.853118712273642, |
|
"grad_norm": 0.1361585557460785, |
|
"learning_rate": 2.7415442274564273e-05, |
|
"loss": 0.0144, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 3.858148893360161, |
|
"grad_norm": 0.27642127871513367, |
|
"learning_rate": 2.718700094346851e-05, |
|
"loss": 0.0131, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 3.86317907444668, |
|
"grad_norm": 0.3688669800758362, |
|
"learning_rate": 2.6959365525601822e-05, |
|
"loss": 0.0142, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 3.8682092555331993, |
|
"grad_norm": 0.06550594419240952, |
|
"learning_rate": 2.6732538540492292e-05, |
|
"loss": 0.0151, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 3.873239436619718, |
|
"grad_norm": 0.2921280264854431, |
|
"learning_rate": 2.6506522498720065e-05, |
|
"loss": 0.0107, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.8782696177062377, |
|
"grad_norm": 0.2402195930480957, |
|
"learning_rate": 2.6281319901889488e-05, |
|
"loss": 0.0162, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 3.8832997987927564, |
|
"grad_norm": 0.15326948463916779, |
|
"learning_rate": 2.6056933242601544e-05, |
|
"loss": 0.0171, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 3.8883299798792756, |
|
"grad_norm": 0.15069030225276947, |
|
"learning_rate": 2.5833365004426215e-05, |
|
"loss": 0.0137, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 3.893360160965795, |
|
"grad_norm": 0.2580581307411194, |
|
"learning_rate": 2.561061766187496e-05, |
|
"loss": 0.0159, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 3.8983903420523136, |
|
"grad_norm": 0.20609134435653687, |
|
"learning_rate": 2.538869368037332e-05, |
|
"loss": 0.0165, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.9034205231388333, |
|
"grad_norm": 0.12185626477003098, |
|
"learning_rate": 2.5167595516233722e-05, |
|
"loss": 0.0148, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 3.908450704225352, |
|
"grad_norm": 0.14162567257881165, |
|
"learning_rate": 2.4947325616628225e-05, |
|
"loss": 0.0139, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 3.913480885311871, |
|
"grad_norm": 0.14534728229045868, |
|
"learning_rate": 2.4727886419561374e-05, |
|
"loss": 0.0129, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 3.9185110663983904, |
|
"grad_norm": 0.1677662879228592, |
|
"learning_rate": 2.450928035384339e-05, |
|
"loss": 0.0177, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 3.9235412474849096, |
|
"grad_norm": 0.35635554790496826, |
|
"learning_rate": 2.4291509839063042e-05, |
|
"loss": 0.0138, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.928571428571429, |
|
"grad_norm": 0.1618734896183014, |
|
"learning_rate": 2.407457728556115e-05, |
|
"loss": 0.0133, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 3.9336016096579476, |
|
"grad_norm": 0.1893433928489685, |
|
"learning_rate": 2.385848509440364e-05, |
|
"loss": 0.0152, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 3.938631790744467, |
|
"grad_norm": 0.2308843731880188, |
|
"learning_rate": 2.3643235657355145e-05, |
|
"loss": 0.0128, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"grad_norm": 0.21923568844795227, |
|
"learning_rate": 2.342883135685253e-05, |
|
"loss": 0.0147, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 3.948692152917505, |
|
"grad_norm": 0.24298080801963806, |
|
"learning_rate": 2.321527456597833e-05, |
|
"loss": 0.0177, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.9537223340040244, |
|
"grad_norm": 0.14858295023441315, |
|
"learning_rate": 2.300256764843477e-05, |
|
"loss": 0.0154, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 3.958752515090543, |
|
"grad_norm": 0.21552342176437378, |
|
"learning_rate": 2.2790712958517324e-05, |
|
"loss": 0.013, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 3.9637826961770624, |
|
"grad_norm": 0.2514561712741852, |
|
"learning_rate": 2.2579712841088873e-05, |
|
"loss": 0.0153, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 3.9688128772635816, |
|
"grad_norm": 0.1734912097454071, |
|
"learning_rate": 2.23695696315537e-05, |
|
"loss": 0.0198, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 3.9738430583501008, |
|
"grad_norm": 0.2863246500492096, |
|
"learning_rate": 2.216028565583148e-05, |
|
"loss": 0.016, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.97887323943662, |
|
"grad_norm": 0.2181919515132904, |
|
"learning_rate": 2.1951863230331793e-05, |
|
"loss": 0.012, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 3.9839034205231387, |
|
"grad_norm": 0.2490726262331009, |
|
"learning_rate": 2.174430466192826e-05, |
|
"loss": 0.0121, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 3.988933601609658, |
|
"grad_norm": 0.10584773123264313, |
|
"learning_rate": 2.153761224793317e-05, |
|
"loss": 0.0144, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 3.993963782696177, |
|
"grad_norm": 0.16053903102874756, |
|
"learning_rate": 2.1331788276072007e-05, |
|
"loss": 0.0169, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 3.9989939637826963, |
|
"grad_norm": 0.11051679402589798, |
|
"learning_rate": 2.1126835024458003e-05, |
|
"loss": 0.0094, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.0040241448692155, |
|
"grad_norm": 0.2628016173839569, |
|
"learning_rate": 2.0922754761567143e-05, |
|
"loss": 0.0132, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 4.009054325955734, |
|
"grad_norm": 0.14394591748714447, |
|
"learning_rate": 2.0719549746212897e-05, |
|
"loss": 0.0148, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 4.014084507042254, |
|
"grad_norm": 0.11262981593608856, |
|
"learning_rate": 2.0517222227521304e-05, |
|
"loss": 0.0096, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 4.019114688128773, |
|
"grad_norm": 0.09093613922595978, |
|
"learning_rate": 2.0315774444905965e-05, |
|
"loss": 0.0128, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 4.0241448692152915, |
|
"grad_norm": 0.1167258694767952, |
|
"learning_rate": 2.0115208628043436e-05, |
|
"loss": 0.0167, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.029175050301811, |
|
"grad_norm": 0.1440437287092209, |
|
"learning_rate": 1.9915526996848333e-05, |
|
"loss": 0.0136, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 4.03420523138833, |
|
"grad_norm": 0.23001736402511597, |
|
"learning_rate": 1.971673176144896e-05, |
|
"loss": 0.0133, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 4.0392354124748495, |
|
"grad_norm": 0.18372376263141632, |
|
"learning_rate": 1.9518825122162766e-05, |
|
"loss": 0.0121, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 4.044265593561368, |
|
"grad_norm": 0.16671916842460632, |
|
"learning_rate": 1.932180926947189e-05, |
|
"loss": 0.0124, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 4.049295774647887, |
|
"grad_norm": 0.22989486157894135, |
|
"learning_rate": 1.912568638399915e-05, |
|
"loss": 0.0142, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.054325955734407, |
|
"grad_norm": 0.28540411591529846, |
|
"learning_rate": 1.893045863648364e-05, |
|
"loss": 0.0162, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 4.059356136820925, |
|
"grad_norm": 0.21366238594055176, |
|
"learning_rate": 1.873612818775692e-05, |
|
"loss": 0.0139, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 4.064386317907445, |
|
"grad_norm": 0.08002448827028275, |
|
"learning_rate": 1.8542697188719005e-05, |
|
"loss": 0.0168, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 4.069416498993964, |
|
"grad_norm": 0.17863331735134125, |
|
"learning_rate": 1.83501677803145e-05, |
|
"loss": 0.0098, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 4.074446680080483, |
|
"grad_norm": 0.2530530095100403, |
|
"learning_rate": 1.815854209350908e-05, |
|
"loss": 0.013, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.079476861167002, |
|
"grad_norm": 0.18760378658771515, |
|
"learning_rate": 1.7967822249265677e-05, |
|
"loss": 0.0166, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"grad_norm": 0.09926089644432068, |
|
"learning_rate": 1.777801035852119e-05, |
|
"loss": 0.018, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 4.089537223340041, |
|
"grad_norm": 0.24970552325248718, |
|
"learning_rate": 1.758910852216309e-05, |
|
"loss": 0.0157, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 4.094567404426559, |
|
"grad_norm": 0.12321308255195618, |
|
"learning_rate": 1.7401118831006004e-05, |
|
"loss": 0.0115, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 4.099597585513078, |
|
"grad_norm": 0.1537485420703888, |
|
"learning_rate": 1.721404336576884e-05, |
|
"loss": 0.0092, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.104627766599598, |
|
"grad_norm": 0.13459086418151855, |
|
"learning_rate": 1.702788419705148e-05, |
|
"loss": 0.0116, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 4.109657947686117, |
|
"grad_norm": 0.3569525480270386, |
|
"learning_rate": 1.684264338531214e-05, |
|
"loss": 0.0155, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 4.114688128772636, |
|
"grad_norm": 0.11241878569126129, |
|
"learning_rate": 1.6658322980844298e-05, |
|
"loss": 0.0089, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 4.119718309859155, |
|
"grad_norm": 0.31290173530578613, |
|
"learning_rate": 1.6474925023754174e-05, |
|
"loss": 0.0148, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 4.124748490945674, |
|
"grad_norm": 0.14965899288654327, |
|
"learning_rate": 1.6292451543938124e-05, |
|
"loss": 0.0131, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.129778672032193, |
|
"grad_norm": 0.29743674397468567, |
|
"learning_rate": 1.6110904561060126e-05, |
|
"loss": 0.016, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 4.134808853118712, |
|
"grad_norm": 0.26381608843803406, |
|
"learning_rate": 1.5930286084529457e-05, |
|
"loss": 0.0127, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.139839034205232, |
|
"grad_norm": 0.3434705436229706, |
|
"learning_rate": 1.57505981134784e-05, |
|
"loss": 0.0138, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 4.144869215291751, |
|
"grad_norm": 0.3142394423484802, |
|
"learning_rate": 1.557184263674024e-05, |
|
"loss": 0.0176, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 4.149899396378269, |
|
"grad_norm": 0.17862974107265472, |
|
"learning_rate": 1.5394021632827093e-05, |
|
"loss": 0.0123, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.154929577464789, |
|
"grad_norm": 0.2902679145336151, |
|
"learning_rate": 1.5217137069908128e-05, |
|
"loss": 0.0153, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 4.159959758551308, |
|
"grad_norm": 0.28409862518310547, |
|
"learning_rate": 1.5041190905787772e-05, |
|
"loss": 0.0117, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 4.164989939637827, |
|
"grad_norm": 0.32609495520591736, |
|
"learning_rate": 1.4866185087883933e-05, |
|
"loss": 0.0165, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 4.170020120724346, |
|
"grad_norm": 0.3060837686061859, |
|
"learning_rate": 1.4692121553206595e-05, |
|
"loss": 0.0153, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 4.175050301810865, |
|
"grad_norm": 0.25559473037719727, |
|
"learning_rate": 1.4519002228336232e-05, |
|
"loss": 0.0104, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.1800804828973845, |
|
"grad_norm": 0.12555906176567078, |
|
"learning_rate": 1.4346829029402654e-05, |
|
"loss": 0.0086, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 4.185110663983903, |
|
"grad_norm": 0.12174857407808304, |
|
"learning_rate": 1.4175603862063591e-05, |
|
"loss": 0.0088, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 4.190140845070423, |
|
"grad_norm": 0.11725517362356186, |
|
"learning_rate": 1.4005328621483794e-05, |
|
"loss": 0.0152, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 4.195171026156942, |
|
"grad_norm": 0.20686453580856323, |
|
"learning_rate": 1.3836005192313994e-05, |
|
"loss": 0.0159, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 4.2002012072434605, |
|
"grad_norm": 0.2010798454284668, |
|
"learning_rate": 1.3667635448669913e-05, |
|
"loss": 0.0136, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.20523138832998, |
|
"grad_norm": 0.18625499308109283, |
|
"learning_rate": 1.3500221254111777e-05, |
|
"loss": 0.0222, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 4.210261569416499, |
|
"grad_norm": 0.2409621626138687, |
|
"learning_rate": 1.3333764461623421e-05, |
|
"loss": 0.0234, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 4.2152917505030185, |
|
"grad_norm": 0.12151456624269485, |
|
"learning_rate": 1.3168266913591976e-05, |
|
"loss": 0.0152, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 4.220321931589537, |
|
"grad_norm": 0.2698768675327301, |
|
"learning_rate": 1.3003730441787399e-05, |
|
"loss": 0.0104, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"grad_norm": 0.21013912558555603, |
|
"learning_rate": 1.2840156867342179e-05, |
|
"loss": 0.0095, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.230382293762576, |
|
"grad_norm": 0.10292834788560867, |
|
"learning_rate": 1.2677548000731243e-05, |
|
"loss": 0.0113, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 4.2354124748490944, |
|
"grad_norm": 0.05743186175823212, |
|
"learning_rate": 1.2515905641751824e-05, |
|
"loss": 0.0105, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 4.240442655935614, |
|
"grad_norm": 0.18373721837997437, |
|
"learning_rate": 1.2355231579503645e-05, |
|
"loss": 0.0156, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 4.245472837022133, |
|
"grad_norm": 0.18463782966136932, |
|
"learning_rate": 1.219552759236906e-05, |
|
"loss": 0.0101, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 4.250503018108652, |
|
"grad_norm": 0.13232889771461487, |
|
"learning_rate": 1.2036795447993387e-05, |
|
"loss": 0.0099, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.255533199195171, |
|
"grad_norm": 0.1151013895869255, |
|
"learning_rate": 1.1879036903265328e-05, |
|
"loss": 0.0096, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 4.26056338028169, |
|
"grad_norm": 0.1502464860677719, |
|
"learning_rate": 1.1722253704297492e-05, |
|
"loss": 0.0114, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 4.26559356136821, |
|
"grad_norm": 0.2458263784646988, |
|
"learning_rate": 1.1566447586407169e-05, |
|
"loss": 0.017, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 4.270623742454728, |
|
"grad_norm": 0.2860092222690582, |
|
"learning_rate": 1.1411620274097013e-05, |
|
"loss": 0.017, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 4.275653923541247, |
|
"grad_norm": 0.14720211923122406, |
|
"learning_rate": 1.1257773481036049e-05, |
|
"loss": 0.0135, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.280684104627767, |
|
"grad_norm": 0.3655484616756439, |
|
"learning_rate": 1.110490891004059e-05, |
|
"loss": 0.0228, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.08732811361551285, |
|
"learning_rate": 1.0953028253055542e-05, |
|
"loss": 0.0096, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 4.290744466800805, |
|
"grad_norm": 0.14107802510261536, |
|
"learning_rate": 1.0802133191135566e-05, |
|
"loss": 0.0096, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 4.295774647887324, |
|
"grad_norm": 0.18336671590805054, |
|
"learning_rate": 1.0652225394426441e-05, |
|
"loss": 0.0146, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 4.300804828973843, |
|
"grad_norm": 0.1546245664358139, |
|
"learning_rate": 1.0503306522146738e-05, |
|
"loss": 0.0113, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.305835010060362, |
|
"grad_norm": 0.1467217206954956, |
|
"learning_rate": 1.0355378222569256e-05, |
|
"loss": 0.0136, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 4.310865191146881, |
|
"grad_norm": 0.1393778920173645, |
|
"learning_rate": 1.0208442133002948e-05, |
|
"loss": 0.0107, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 4.315895372233401, |
|
"grad_norm": 0.3030548393726349, |
|
"learning_rate": 1.0062499879774734e-05, |
|
"loss": 0.0153, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 4.32092555331992, |
|
"grad_norm": 0.30646151304244995, |
|
"learning_rate": 9.917553078211417e-06, |
|
"loss": 0.0117, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 4.325955734406438, |
|
"grad_norm": 0.15872900187969208, |
|
"learning_rate": 9.773603332621972e-06, |
|
"loss": 0.0114, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.330985915492958, |
|
"grad_norm": 0.2105736881494522, |
|
"learning_rate": 9.630652236279625e-06, |
|
"loss": 0.0108, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 4.336016096579477, |
|
"grad_norm": 0.13897240161895752, |
|
"learning_rate": 9.488701371404329e-06, |
|
"loss": 0.0089, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 4.341046277665996, |
|
"grad_norm": 0.2540101408958435, |
|
"learning_rate": 9.347752309145241e-06, |
|
"loss": 0.0212, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 4.346076458752515, |
|
"grad_norm": 0.10785099118947983, |
|
"learning_rate": 9.20780660956324e-06, |
|
"loss": 0.0082, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 4.351106639839034, |
|
"grad_norm": 0.10478243976831436, |
|
"learning_rate": 9.068865821613803e-06, |
|
"loss": 0.0131, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 4.3561368209255535, |
|
"grad_norm": 0.12173474580049515, |
|
"learning_rate": 8.930931483129667e-06, |
|
"loss": 0.0107, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 4.361167002012072, |
|
"grad_norm": 0.19661535322666168, |
|
"learning_rate": 8.794005120804082e-06, |
|
"loss": 0.0121, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"grad_norm": 0.1414082646369934, |
|
"learning_rate": 8.658088250173624e-06, |
|
"loss": 0.0106, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 4.371227364185111, |
|
"grad_norm": 0.07380446791648865, |
|
"learning_rate": 8.523182375601635e-06, |
|
"loss": 0.0116, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 4.3762575452716295, |
|
"grad_norm": 0.1488698124885559, |
|
"learning_rate": 8.389288990261413e-06, |
|
"loss": 0.0093, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.381287726358149, |
|
"grad_norm": 0.17949314415454865, |
|
"learning_rate": 8.256409576119827e-06, |
|
"loss": 0.0099, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 4.386317907444668, |
|
"grad_norm": 0.34648457169532776, |
|
"learning_rate": 8.124545603920842e-06, |
|
"loss": 0.0109, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.3913480885311875, |
|
"grad_norm": 0.16411487758159637, |
|
"learning_rate": 7.993698533169192e-06, |
|
"loss": 0.0092, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 4.396378269617706, |
|
"grad_norm": 0.20851458609104156, |
|
"learning_rate": 7.863869812114366e-06, |
|
"loss": 0.0125, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 4.401408450704225, |
|
"grad_norm": 0.13391834497451782, |
|
"learning_rate": 7.73506087773439e-06, |
|
"loss": 0.0111, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.406438631790745, |
|
"grad_norm": 0.2575322687625885, |
|
"learning_rate": 7.60727315572013e-06, |
|
"loss": 0.0137, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 4.4114688128772634, |
|
"grad_norm": 0.1566155105829239, |
|
"learning_rate": 7.480508060459346e-06, |
|
"loss": 0.0121, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 4.416498993963783, |
|
"grad_norm": 0.23036964237689972, |
|
"learning_rate": 7.3547669950211005e-06, |
|
"loss": 0.0189, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 4.421529175050302, |
|
"grad_norm": 0.16328741610050201, |
|
"learning_rate": 7.230051351140266e-06, |
|
"loss": 0.0188, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 4.426559356136821, |
|
"grad_norm": 0.45226040482521057, |
|
"learning_rate": 7.106362509202036e-06, |
|
"loss": 0.0175, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.43158953722334, |
|
"grad_norm": 0.4067038297653198, |
|
"learning_rate": 6.983701838226708e-06, |
|
"loss": 0.0134, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 4.436619718309859, |
|
"grad_norm": 0.1728140115737915, |
|
"learning_rate": 6.86207069585455e-06, |
|
"loss": 0.0096, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 4.441649899396379, |
|
"grad_norm": 0.22685836255550385, |
|
"learning_rate": 6.741470428330676e-06, |
|
"loss": 0.0135, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 4.446680080482897, |
|
"grad_norm": 0.18156301975250244, |
|
"learning_rate": 6.621902370490274e-06, |
|
"loss": 0.0144, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 4.451710261569416, |
|
"grad_norm": 0.20519036054611206, |
|
"learning_rate": 6.503367845743702e-06, |
|
"loss": 0.015, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.456740442655936, |
|
"grad_norm": 0.26189279556274414, |
|
"learning_rate": 6.385868166061981e-06, |
|
"loss": 0.011, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 4.461770623742455, |
|
"grad_norm": 0.1770821064710617, |
|
"learning_rate": 6.269404631962106e-06, |
|
"loss": 0.0112, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 4.466800804828974, |
|
"grad_norm": 0.31295013427734375, |
|
"learning_rate": 6.153978532492821e-06, |
|
"loss": 0.0186, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 4.471830985915493, |
|
"grad_norm": 0.47270047664642334, |
|
"learning_rate": 6.0395911452202355e-06, |
|
"loss": 0.0149, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 4.476861167002012, |
|
"grad_norm": 0.1958407759666443, |
|
"learning_rate": 5.926243736213743e-06, |
|
"loss": 0.0076, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.481891348088531, |
|
"grad_norm": 0.14082856476306915, |
|
"learning_rate": 5.813937560031979e-06, |
|
"loss": 0.014, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 4.48692152917505, |
|
"grad_norm": 0.2103276550769806, |
|
"learning_rate": 5.702673859708896e-06, |
|
"loss": 0.0121, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 4.49195171026157, |
|
"grad_norm": 0.10313666611909866, |
|
"learning_rate": 5.592453866740155e-06, |
|
"loss": 0.0118, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 4.496981891348089, |
|
"grad_norm": 0.23443832993507385, |
|
"learning_rate": 5.48327880106927e-06, |
|
"loss": 0.0123, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 4.502012072434607, |
|
"grad_norm": 0.08273012936115265, |
|
"learning_rate": 5.375149871074336e-06, |
|
"loss": 0.0101, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"grad_norm": 0.13498452305793762, |
|
"learning_rate": 5.268068273554483e-06, |
|
"loss": 0.0142, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 4.512072434607646, |
|
"grad_norm": 0.12951943278312683, |
|
"learning_rate": 5.1620351937167076e-06, |
|
"loss": 0.0111, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 4.517102615694165, |
|
"grad_norm": 0.18177278339862823, |
|
"learning_rate": 5.057051805162749e-06, |
|
"loss": 0.0143, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 4.522132796780684, |
|
"grad_norm": 0.23840029537677765, |
|
"learning_rate": 4.953119269876061e-06, |
|
"loss": 0.0121, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 4.527162977867203, |
|
"grad_norm": 0.13470517098903656, |
|
"learning_rate": 4.8502387382090345e-06, |
|
"loss": 0.0136, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.5321931589537225, |
|
"grad_norm": 0.1814277470111847, |
|
"learning_rate": 4.748411348870141e-06, |
|
"loss": 0.0167, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 4.537223340040241, |
|
"grad_norm": 0.3026091158390045, |
|
"learning_rate": 4.647638228911466e-06, |
|
"loss": 0.0157, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 4.542253521126761, |
|
"grad_norm": 0.13517017662525177, |
|
"learning_rate": 4.547920493716118e-06, |
|
"loss": 0.0093, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 4.54728370221328, |
|
"grad_norm": 0.06449972838163376, |
|
"learning_rate": 4.4492592469859486e-06, |
|
"loss": 0.0101, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 4.5523138832997985, |
|
"grad_norm": 0.24287287890911102, |
|
"learning_rate": 4.3516555807293415e-06, |
|
"loss": 0.015, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.557344064386318, |
|
"grad_norm": 0.13659290969371796, |
|
"learning_rate": 4.255110575249055e-06, |
|
"loss": 0.0098, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 4.562374245472837, |
|
"grad_norm": 0.258847177028656, |
|
"learning_rate": 4.1596252991303655e-06, |
|
"loss": 0.0109, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 4.5674044265593565, |
|
"grad_norm": 0.2787615954875946, |
|
"learning_rate": 4.065200809229163e-06, |
|
"loss": 0.0116, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 4.572434607645875, |
|
"grad_norm": 0.1802290678024292, |
|
"learning_rate": 3.971838150660268e-06, |
|
"loss": 0.0101, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 4.577464788732394, |
|
"grad_norm": 0.22423778474330902, |
|
"learning_rate": 3.879538356785917e-06, |
|
"loss": 0.0103, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.582494969818914, |
|
"grad_norm": 0.18749003112316132, |
|
"learning_rate": 3.7883024492042286e-06, |
|
"loss": 0.0084, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 4.5875251509054324, |
|
"grad_norm": 0.08380598574876785, |
|
"learning_rate": 3.698131437737995e-06, |
|
"loss": 0.0092, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 4.592555331991952, |
|
"grad_norm": 0.11922164261341095, |
|
"learning_rate": 3.6090263204234363e-06, |
|
"loss": 0.0129, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 4.597585513078471, |
|
"grad_norm": 0.13963715732097626, |
|
"learning_rate": 3.520988083499199e-06, |
|
"loss": 0.0113, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 4.60261569416499, |
|
"grad_norm": 0.15656189620494843, |
|
"learning_rate": 3.434017701395431e-06, |
|
"loss": 0.0109, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.607645875251509, |
|
"grad_norm": 0.0764361172914505, |
|
"learning_rate": 3.348116136722912e-06, |
|
"loss": 0.0131, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 4.612676056338028, |
|
"grad_norm": 0.23831793665885925, |
|
"learning_rate": 3.2632843402625625e-06, |
|
"loss": 0.0115, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 4.617706237424548, |
|
"grad_norm": 0.19923029839992523, |
|
"learning_rate": 3.1795232509547633e-06, |
|
"loss": 0.0102, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 4.622736418511066, |
|
"grad_norm": 0.1905340999364853, |
|
"learning_rate": 3.096833795889076e-06, |
|
"loss": 0.0104, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 4.627766599597585, |
|
"grad_norm": 0.2911848723888397, |
|
"learning_rate": 3.015216890293904e-06, |
|
"loss": 0.0113, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.632796780684105, |
|
"grad_norm": 0.19489365816116333, |
|
"learning_rate": 2.9346734375264027e-06, |
|
"loss": 0.0145, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 4.637826961770624, |
|
"grad_norm": 0.10868648439645767, |
|
"learning_rate": 2.8552043290624997e-06, |
|
"loss": 0.0128, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 4.642857142857143, |
|
"grad_norm": 0.21155446767807007, |
|
"learning_rate": 2.7768104444869436e-06, |
|
"loss": 0.0127, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"grad_norm": 0.20911335945129395, |
|
"learning_rate": 2.6994926514836925e-06, |
|
"loss": 0.0159, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 4.652917505030181, |
|
"grad_norm": 0.20620648562908173, |
|
"learning_rate": 2.6232518058261658e-06, |
|
"loss": 0.0136, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.6579476861167, |
|
"grad_norm": 0.0914982482790947, |
|
"learning_rate": 2.5480887513679166e-06, |
|
"loss": 0.0114, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 4.662977867203219, |
|
"grad_norm": 0.09038899838924408, |
|
"learning_rate": 2.4740043200332074e-06, |
|
"loss": 0.0125, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 4.668008048289739, |
|
"grad_norm": 0.13401928544044495, |
|
"learning_rate": 2.400999331807796e-06, |
|
"loss": 0.0155, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 4.673038229376258, |
|
"grad_norm": 0.18889258801937103, |
|
"learning_rate": 2.3290745947298966e-06, |
|
"loss": 0.0098, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 4.678068410462776, |
|
"grad_norm": 0.15076126158237457, |
|
"learning_rate": 2.258230904881231e-06, |
|
"loss": 0.0151, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.683098591549296, |
|
"grad_norm": 0.17581748962402344, |
|
"learning_rate": 2.1884690463781833e-06, |
|
"loss": 0.0148, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 4.688128772635815, |
|
"grad_norm": 0.08170731365680695, |
|
"learning_rate": 2.1197897913632026e-06, |
|
"loss": 0.0216, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 4.693158953722334, |
|
"grad_norm": 0.19143046438694, |
|
"learning_rate": 2.0521938999961243e-06, |
|
"loss": 0.0166, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 4.698189134808853, |
|
"grad_norm": 0.13908398151397705, |
|
"learning_rate": 1.9856821204458864e-06, |
|
"loss": 0.0121, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 4.703219315895372, |
|
"grad_norm": 0.1692146509885788, |
|
"learning_rate": 1.9202551888821807e-06, |
|
"loss": 0.0073, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.7082494969818915, |
|
"grad_norm": 0.11902793496847153, |
|
"learning_rate": 1.855913829467315e-06, |
|
"loss": 0.0098, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 4.71327967806841, |
|
"grad_norm": 0.3413757383823395, |
|
"learning_rate": 1.7926587543482088e-06, |
|
"loss": 0.0225, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 4.71830985915493, |
|
"grad_norm": 0.13018754124641418, |
|
"learning_rate": 1.7304906636485097e-06, |
|
"loss": 0.0097, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 4.723340040241449, |
|
"grad_norm": 0.17999660968780518, |
|
"learning_rate": 1.6694102454608118e-06, |
|
"loss": 0.0174, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 4.7283702213279675, |
|
"grad_norm": 0.1070287749171257, |
|
"learning_rate": 1.6094181758390947e-06, |
|
"loss": 0.0116, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.733400402414487, |
|
"grad_norm": 0.18182510137557983, |
|
"learning_rate": 1.5505151187912071e-06, |
|
"loss": 0.0109, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 4.738430583501006, |
|
"grad_norm": 0.1901055872440338, |
|
"learning_rate": 1.4927017262715059e-06, |
|
"loss": 0.0093, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 4.7434607645875255, |
|
"grad_norm": 0.1983153074979782, |
|
"learning_rate": 1.435978638173685e-06, |
|
"loss": 0.0153, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 4.748490945674044, |
|
"grad_norm": 0.1601441502571106, |
|
"learning_rate": 1.3803464823236356e-06, |
|
"loss": 0.0101, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 4.753521126760563, |
|
"grad_norm": 0.1466078907251358, |
|
"learning_rate": 1.325805874472552e-06, |
|
"loss": 0.0183, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.758551307847083, |
|
"grad_norm": 0.24373824894428253, |
|
"learning_rate": 1.272357418290082e-06, |
|
"loss": 0.008, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 4.7635814889336014, |
|
"grad_norm": 0.08891261368989944, |
|
"learning_rate": 1.2200017053576318e-06, |
|
"loss": 0.0106, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 4.768611670020121, |
|
"grad_norm": 0.17067329585552216, |
|
"learning_rate": 1.1687393151618931e-06, |
|
"loss": 0.0096, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 4.77364185110664, |
|
"grad_norm": 0.1952274590730667, |
|
"learning_rate": 1.1185708150883268e-06, |
|
"loss": 0.0122, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 4.778672032193159, |
|
"grad_norm": 0.18577119708061218, |
|
"learning_rate": 1.0694967604149563e-06, |
|
"loss": 0.0132, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.783702213279678, |
|
"grad_norm": 0.19541311264038086, |
|
"learning_rate": 1.0215176943061955e-06, |
|
"loss": 0.0135, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"grad_norm": 0.14900928735733032, |
|
"learning_rate": 9.746341478068298e-07, |
|
"loss": 0.0122, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 4.793762575452717, |
|
"grad_norm": 0.2078647017478943, |
|
"learning_rate": 9.288466398361783e-07, |
|
"loss": 0.0098, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 4.798792756539235, |
|
"grad_norm": 0.137592151761055, |
|
"learning_rate": 8.841556771822746e-07, |
|
"loss": 0.0142, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 4.803822937625754, |
|
"grad_norm": 0.1908939927816391, |
|
"learning_rate": 8.405617544963385e-07, |
|
"loss": 0.0143, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.808853118712274, |
|
"grad_norm": 0.17629876732826233, |
|
"learning_rate": 7.980653542872584e-07, |
|
"loss": 0.0097, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 4.813883299798793, |
|
"grad_norm": 0.11937274038791656, |
|
"learning_rate": 7.566669469162513e-07, |
|
"loss": 0.0126, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 4.818913480885312, |
|
"grad_norm": 0.21683438122272491, |
|
"learning_rate": 7.16366990591677e-07, |
|
"loss": 0.0089, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 4.823943661971831, |
|
"grad_norm": 0.1359615921974182, |
|
"learning_rate": 6.771659313639212e-07, |
|
"loss": 0.01, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 4.82897384305835, |
|
"grad_norm": 0.23794005811214447, |
|
"learning_rate": 6.390642031205318e-07, |
|
"loss": 0.0183, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.834004024144869, |
|
"grad_norm": 0.10441375523805618, |
|
"learning_rate": 6.020622275813459e-07, |
|
"loss": 0.0107, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 4.839034205231388, |
|
"grad_norm": 0.2170896977186203, |
|
"learning_rate": 5.661604142938703e-07, |
|
"loss": 0.0144, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 4.844064386317908, |
|
"grad_norm": 0.1544094830751419, |
|
"learning_rate": 5.313591606287194e-07, |
|
"loss": 0.0095, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 4.849094567404427, |
|
"grad_norm": 0.09597515314817429, |
|
"learning_rate": 4.976588517752178e-07, |
|
"loss": 0.0078, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 4.854124748490945, |
|
"grad_norm": 0.18722732365131378, |
|
"learning_rate": 4.6505986073717143e-07, |
|
"loss": 0.0167, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.859154929577465, |
|
"grad_norm": 0.15623292326927185, |
|
"learning_rate": 4.3356254832869204e-07, |
|
"loss": 0.0122, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 4.864185110663984, |
|
"grad_norm": 0.19763480126857758, |
|
"learning_rate": 4.0316726317023435e-07, |
|
"loss": 0.0274, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 4.869215291750503, |
|
"grad_norm": 0.0955481231212616, |
|
"learning_rate": 3.7387434168473235e-07, |
|
"loss": 0.0147, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 4.874245472837022, |
|
"grad_norm": 0.18850040435791016, |
|
"learning_rate": 3.4568410809385774e-07, |
|
"loss": 0.0146, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 4.879275653923541, |
|
"grad_norm": 0.2946015000343323, |
|
"learning_rate": 3.185968744144563e-07, |
|
"loss": 0.0107, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.8843058350100605, |
|
"grad_norm": 0.29948338866233826, |
|
"learning_rate": 2.926129404550837e-07, |
|
"loss": 0.0088, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 4.889336016096579, |
|
"grad_norm": 0.23983007669448853, |
|
"learning_rate": 2.6773259381268625e-07, |
|
"loss": 0.0176, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 4.894366197183099, |
|
"grad_norm": 0.11162281781435013, |
|
"learning_rate": 2.439561098694254e-07, |
|
"loss": 0.0112, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 4.899396378269618, |
|
"grad_norm": 0.1520148515701294, |
|
"learning_rate": 2.212837517896027e-07, |
|
"loss": 0.0105, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 4.9044265593561365, |
|
"grad_norm": 0.1838511973619461, |
|
"learning_rate": 1.9971577051678404e-07, |
|
"loss": 0.0193, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.909456740442656, |
|
"grad_norm": 0.1083097979426384, |
|
"learning_rate": 1.7925240477100203e-07, |
|
"loss": 0.0142, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 4.914486921529175, |
|
"grad_norm": 0.16764095425605774, |
|
"learning_rate": 1.598938810461137e-07, |
|
"loss": 0.0102, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 4.9195171026156945, |
|
"grad_norm": 0.22907580435276031, |
|
"learning_rate": 1.416404136073024e-07, |
|
"loss": 0.0144, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 4.924547283702213, |
|
"grad_norm": 0.2832927703857422, |
|
"learning_rate": 1.2449220448870204e-07, |
|
"loss": 0.0134, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"grad_norm": 0.10838713496923447, |
|
"learning_rate": 1.0844944349114316e-07, |
|
"loss": 0.01, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.934607645875252, |
|
"grad_norm": 0.0964408740401268, |
|
"learning_rate": 9.351230818008815e-08, |
|
"loss": 0.0109, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 4.9396378269617705, |
|
"grad_norm": 0.1444048136472702, |
|
"learning_rate": 7.968096388364377e-08, |
|
"loss": 0.0113, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 4.94466800804829, |
|
"grad_norm": 0.09675177931785583, |
|
"learning_rate": 6.69555636907182e-08, |
|
"loss": 0.0104, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 4.949698189134809, |
|
"grad_norm": 0.14633896946907043, |
|
"learning_rate": 5.533624844936691e-08, |
|
"loss": 0.009, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 4.954728370221328, |
|
"grad_norm": 0.27734798192977905, |
|
"learning_rate": 4.4823146765182735e-08, |
|
"loss": 0.011, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 4.959758551307847, |
|
"grad_norm": 0.2644200921058655, |
|
"learning_rate": 3.5416374999919235e-08, |
|
"loss": 0.0124, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 4.964788732394366, |
|
"grad_norm": 0.24501149356365204, |
|
"learning_rate": 2.7116037270169538e-08, |
|
"loss": 0.0123, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 4.969818913480886, |
|
"grad_norm": 0.24458244442939758, |
|
"learning_rate": 1.9922225446245e-08, |
|
"loss": 0.0146, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 4.974849094567404, |
|
"grad_norm": 0.06575839966535568, |
|
"learning_rate": 1.383501915112051e-08, |
|
"loss": 0.0089, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 4.979879275653923, |
|
"grad_norm": 0.2329985499382019, |
|
"learning_rate": 8.854485759568487e-09, |
|
"loss": 0.0136, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.984909456740443, |
|
"grad_norm": 0.17096395790576935, |
|
"learning_rate": 4.980680397448367e-09, |
|
"loss": 0.0118, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 4.989939637826962, |
|
"grad_norm": 0.16430173814296722, |
|
"learning_rate": 2.213645941029352e-09, |
|
"loss": 0.0088, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 4.994969818913481, |
|
"grad_norm": 0.23126807808876038, |
|
"learning_rate": 5.534130165907314e-10, |
|
"loss": 0.0184, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.1565922051668167, |
|
"learning_rate": 0.0, |
|
"loss": 0.0118, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 9940, |
|
"total_flos": 3.545130433511883e+17, |
|
"train_loss": 0.029606477042259105, |
|
"train_runtime": 4441.4052, |
|
"train_samples_per_second": 35.808, |
|
"train_steps_per_second": 2.238 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9940, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.545130433511883e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|