|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.4056355472983433, |
|
"eval_steps": 500, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003096454559529339, |
|
"grad_norm": 7.865213871002197, |
|
"learning_rate": 6.193868070610096e-07, |
|
"loss": 10.5439, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006192909119058678, |
|
"grad_norm": 5.684272289276123, |
|
"learning_rate": 1.2387736141220192e-06, |
|
"loss": 10.2888, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009289363678588018, |
|
"grad_norm": 4.032341003417969, |
|
"learning_rate": 1.8581604211830287e-06, |
|
"loss": 9.9454, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012385818238117356, |
|
"grad_norm": 3.232361316680908, |
|
"learning_rate": 2.4775472282440385e-06, |
|
"loss": 9.6908, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015482272797646695, |
|
"grad_norm": 2.7629575729370117, |
|
"learning_rate": 3.096934035305048e-06, |
|
"loss": 9.491, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.018578727357176035, |
|
"grad_norm": 2.439429998397827, |
|
"learning_rate": 3.7163208423660575e-06, |
|
"loss": 9.3421, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.021675181916705373, |
|
"grad_norm": 2.311237335205078, |
|
"learning_rate": 4.335707649427067e-06, |
|
"loss": 9.2172, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02477163647623471, |
|
"grad_norm": 2.1415603160858154, |
|
"learning_rate": 4.955094456488077e-06, |
|
"loss": 9.1165, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02786809103576405, |
|
"grad_norm": 2.0442802906036377, |
|
"learning_rate": 5.574481263549087e-06, |
|
"loss": 9.0171, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03096454559529339, |
|
"grad_norm": 2.0417075157165527, |
|
"learning_rate": 6.193868070610096e-06, |
|
"loss": 8.9188, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.034061000154822725, |
|
"grad_norm": 1.906326413154602, |
|
"learning_rate": 6.813254877671105e-06, |
|
"loss": 8.817, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03715745471435207, |
|
"grad_norm": 1.876010537147522, |
|
"learning_rate": 7.432641684732115e-06, |
|
"loss": 8.7205, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04025390927388141, |
|
"grad_norm": 1.7335777282714844, |
|
"learning_rate": 8.052028491793125e-06, |
|
"loss": 8.6376, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.043350363833410746, |
|
"grad_norm": 1.6829620599746704, |
|
"learning_rate": 8.671415298854134e-06, |
|
"loss": 8.5273, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.046446818392940084, |
|
"grad_norm": 1.6329585313796997, |
|
"learning_rate": 9.290802105915144e-06, |
|
"loss": 8.4292, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04954327295246942, |
|
"grad_norm": 1.62351655960083, |
|
"learning_rate": 9.910188912976154e-06, |
|
"loss": 8.3279, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05263972751199876, |
|
"grad_norm": 1.5334705114364624, |
|
"learning_rate": 1.0529575720037164e-05, |
|
"loss": 8.2018, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0557361820715281, |
|
"grad_norm": 1.5242592096328735, |
|
"learning_rate": 1.1148962527098173e-05, |
|
"loss": 8.1007, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.058832636631057436, |
|
"grad_norm": 1.5945011377334595, |
|
"learning_rate": 1.1768349334159183e-05, |
|
"loss": 7.972, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06192909119058678, |
|
"grad_norm": 1.3093743324279785, |
|
"learning_rate": 1.2387736141220193e-05, |
|
"loss": 7.8736, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06502554575011611, |
|
"grad_norm": 1.3056074380874634, |
|
"learning_rate": 1.30071229482812e-05, |
|
"loss": 7.7617, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06812200030964545, |
|
"grad_norm": 1.2901231050491333, |
|
"learning_rate": 1.362650975534221e-05, |
|
"loss": 7.6573, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07121845486917479, |
|
"grad_norm": 1.0811238288879395, |
|
"learning_rate": 1.424589656240322e-05, |
|
"loss": 7.5707, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07431490942870414, |
|
"grad_norm": 0.9134311676025391, |
|
"learning_rate": 1.486528336946423e-05, |
|
"loss": 7.4959, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07741136398823348, |
|
"grad_norm": 0.9673048257827759, |
|
"learning_rate": 1.548467017652524e-05, |
|
"loss": 7.4314, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08050781854776282, |
|
"grad_norm": 1.0383951663970947, |
|
"learning_rate": 1.610405698358625e-05, |
|
"loss": 7.3523, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08360427310729215, |
|
"grad_norm": 1.0910584926605225, |
|
"learning_rate": 1.6723443790647262e-05, |
|
"loss": 7.3133, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08670072766682149, |
|
"grad_norm": 0.804308295249939, |
|
"learning_rate": 1.734283059770827e-05, |
|
"loss": 7.2522, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08979718222635083, |
|
"grad_norm": 0.9341151714324951, |
|
"learning_rate": 1.796221740476928e-05, |
|
"loss": 7.2261, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09289363678588017, |
|
"grad_norm": 0.8165347576141357, |
|
"learning_rate": 1.8581604211830288e-05, |
|
"loss": 7.2088, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0959900913454095, |
|
"grad_norm": 0.6941328644752502, |
|
"learning_rate": 1.9200991018891298e-05, |
|
"loss": 7.1554, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09908654590493884, |
|
"grad_norm": 0.7364155650138855, |
|
"learning_rate": 1.9820377825952308e-05, |
|
"loss": 7.1313, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10218300046446818, |
|
"grad_norm": 1.3144842386245728, |
|
"learning_rate": 2.0439764633013317e-05, |
|
"loss": 7.1198, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10527945502399752, |
|
"grad_norm": 0.703687846660614, |
|
"learning_rate": 2.1059151440074327e-05, |
|
"loss": 7.0936, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10837590958352686, |
|
"grad_norm": 0.7936609387397766, |
|
"learning_rate": 2.1678538247135337e-05, |
|
"loss": 7.0966, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1114723641430562, |
|
"grad_norm": 0.9979026317596436, |
|
"learning_rate": 2.2297925054196347e-05, |
|
"loss": 7.0917, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11456881870258553, |
|
"grad_norm": 0.8398326635360718, |
|
"learning_rate": 2.2917311861257356e-05, |
|
"loss": 7.0791, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11766527326211487, |
|
"grad_norm": 0.7220719456672668, |
|
"learning_rate": 2.3536698668318366e-05, |
|
"loss": 7.057, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12076172782164422, |
|
"grad_norm": 0.8845738172531128, |
|
"learning_rate": 2.4156085475379376e-05, |
|
"loss": 7.0476, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12385818238117356, |
|
"grad_norm": 0.8084824085235596, |
|
"learning_rate": 2.4775472282440385e-05, |
|
"loss": 7.0369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1269546369407029, |
|
"grad_norm": 0.7229199409484863, |
|
"learning_rate": 2.5394859089501395e-05, |
|
"loss": 7.0193, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13005109150023222, |
|
"grad_norm": 0.7450975179672241, |
|
"learning_rate": 2.60142458965624e-05, |
|
"loss": 7.0136, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13314754605976156, |
|
"grad_norm": 1.1810022592544556, |
|
"learning_rate": 2.6633632703623415e-05, |
|
"loss": 7.0257, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1362440006192909, |
|
"grad_norm": 0.724097728729248, |
|
"learning_rate": 2.725301951068442e-05, |
|
"loss": 7.0076, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13934045517882024, |
|
"grad_norm": 0.8406842350959778, |
|
"learning_rate": 2.7872406317745434e-05, |
|
"loss": 6.9976, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14243690973834958, |
|
"grad_norm": 0.8269332647323608, |
|
"learning_rate": 2.849179312480644e-05, |
|
"loss": 6.9812, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14553336429787894, |
|
"grad_norm": 0.7661322355270386, |
|
"learning_rate": 2.9111179931867453e-05, |
|
"loss": 7.0072, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14862981885740828, |
|
"grad_norm": 0.6673895120620728, |
|
"learning_rate": 2.973056673892846e-05, |
|
"loss": 6.9775, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15172627341693762, |
|
"grad_norm": 1.1476161479949951, |
|
"learning_rate": 3.0349953545989473e-05, |
|
"loss": 6.9496, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15482272797646696, |
|
"grad_norm": 1.0809210538864136, |
|
"learning_rate": 3.096934035305048e-05, |
|
"loss": 6.9578, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1579191825359963, |
|
"grad_norm": 0.8364447951316833, |
|
"learning_rate": 3.158872716011149e-05, |
|
"loss": 6.9371, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.16101563709552563, |
|
"grad_norm": 0.9381659030914307, |
|
"learning_rate": 3.22081139671725e-05, |
|
"loss": 6.9373, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16411209165505497, |
|
"grad_norm": 0.8810213804244995, |
|
"learning_rate": 3.2827500774233505e-05, |
|
"loss": 6.9463, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1672085462145843, |
|
"grad_norm": 0.8275142908096313, |
|
"learning_rate": 3.3446887581294525e-05, |
|
"loss": 6.932, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17030500077411365, |
|
"grad_norm": 0.6804556846618652, |
|
"learning_rate": 3.406627438835553e-05, |
|
"loss": 6.9181, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17340145533364298, |
|
"grad_norm": 0.7559427618980408, |
|
"learning_rate": 3.468566119541654e-05, |
|
"loss": 6.9202, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17649790989317232, |
|
"grad_norm": 0.6762346029281616, |
|
"learning_rate": 3.5305048002477544e-05, |
|
"loss": 6.9081, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17959436445270166, |
|
"grad_norm": 0.6671234369277954, |
|
"learning_rate": 3.592443480953856e-05, |
|
"loss": 6.9216, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.182690819012231, |
|
"grad_norm": 0.9335949420928955, |
|
"learning_rate": 3.654382161659957e-05, |
|
"loss": 6.9034, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18578727357176034, |
|
"grad_norm": 0.9805537462234497, |
|
"learning_rate": 3.7163208423660576e-05, |
|
"loss": 6.895, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18888372813128967, |
|
"grad_norm": 0.8761160969734192, |
|
"learning_rate": 3.778259523072158e-05, |
|
"loss": 6.9029, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.191980182690819, |
|
"grad_norm": 0.8361015915870667, |
|
"learning_rate": 3.8401982037782596e-05, |
|
"loss": 6.8819, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.19507663725034835, |
|
"grad_norm": 0.6740533709526062, |
|
"learning_rate": 3.902136884484361e-05, |
|
"loss": 6.8882, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1981730918098777, |
|
"grad_norm": 0.8334875702857971, |
|
"learning_rate": 3.9640755651904615e-05, |
|
"loss": 6.8917, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.20126954636940703, |
|
"grad_norm": 0.7946698665618896, |
|
"learning_rate": 4.026014245896562e-05, |
|
"loss": 6.8712, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20436600092893636, |
|
"grad_norm": 1.1773180961608887, |
|
"learning_rate": 4.0879529266026635e-05, |
|
"loss": 6.8963, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2074624554884657, |
|
"grad_norm": 0.6932355165481567, |
|
"learning_rate": 4.149891607308765e-05, |
|
"loss": 6.8718, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21055891004799504, |
|
"grad_norm": 0.8239333629608154, |
|
"learning_rate": 4.2118302880148654e-05, |
|
"loss": 6.8549, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.21365536460752438, |
|
"grad_norm": 0.8844727873802185, |
|
"learning_rate": 4.273768968720966e-05, |
|
"loss": 6.8687, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.21675181916705372, |
|
"grad_norm": 0.8168037533760071, |
|
"learning_rate": 4.3357076494270674e-05, |
|
"loss": 6.8457, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21984827372658305, |
|
"grad_norm": 0.7363680601119995, |
|
"learning_rate": 4.397646330133168e-05, |
|
"loss": 6.8538, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2229447282861124, |
|
"grad_norm": 0.9639245867729187, |
|
"learning_rate": 4.459585010839269e-05, |
|
"loss": 6.855, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22604118284564173, |
|
"grad_norm": 0.7763282656669617, |
|
"learning_rate": 4.52152369154537e-05, |
|
"loss": 6.8254, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22913763740517107, |
|
"grad_norm": 1.482752799987793, |
|
"learning_rate": 4.583462372251471e-05, |
|
"loss": 6.8331, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2322340919647004, |
|
"grad_norm": 0.8456624150276184, |
|
"learning_rate": 4.645401052957572e-05, |
|
"loss": 6.8513, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23533054652422974, |
|
"grad_norm": 0.9166210889816284, |
|
"learning_rate": 4.707339733663673e-05, |
|
"loss": 6.8402, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23842700108375908, |
|
"grad_norm": 0.8375464677810669, |
|
"learning_rate": 4.769278414369774e-05, |
|
"loss": 6.8337, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.24152345564328845, |
|
"grad_norm": 1.267236590385437, |
|
"learning_rate": 4.831217095075875e-05, |
|
"loss": 6.8193, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2446199102028178, |
|
"grad_norm": 0.6456039547920227, |
|
"learning_rate": 4.893155775781976e-05, |
|
"loss": 6.7969, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.24771636476234712, |
|
"grad_norm": 0.8981896638870239, |
|
"learning_rate": 4.955094456488077e-05, |
|
"loss": 6.8133, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.25081281932187643, |
|
"grad_norm": 1.120186686515808, |
|
"learning_rate": 5.017033137194178e-05, |
|
"loss": 6.8056, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2539092738814058, |
|
"grad_norm": 2.292698621749878, |
|
"learning_rate": 5.078971817900279e-05, |
|
"loss": 6.8308, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2570057284409351, |
|
"grad_norm": 0.7018686532974243, |
|
"learning_rate": 5.1409104986063797e-05, |
|
"loss": 6.8154, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.26010218300046445, |
|
"grad_norm": 0.8676766753196716, |
|
"learning_rate": 5.20284917931248e-05, |
|
"loss": 6.8134, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2631986375599938, |
|
"grad_norm": 1.0170965194702148, |
|
"learning_rate": 5.2647878600185816e-05, |
|
"loss": 6.8014, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2662950921195231, |
|
"grad_norm": 1.100301742553711, |
|
"learning_rate": 5.326726540724683e-05, |
|
"loss": 6.7759, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.26939154667905246, |
|
"grad_norm": 0.9783535003662109, |
|
"learning_rate": 5.3886652214307835e-05, |
|
"loss": 6.7684, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2724880012385818, |
|
"grad_norm": 1.2189717292785645, |
|
"learning_rate": 5.450603902136884e-05, |
|
"loss": 6.7609, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.27558445579811114, |
|
"grad_norm": 0.9612496495246887, |
|
"learning_rate": 5.5125425828429855e-05, |
|
"loss": 6.7852, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2786809103576405, |
|
"grad_norm": 1.201369047164917, |
|
"learning_rate": 5.574481263549087e-05, |
|
"loss": 6.7685, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2817773649171698, |
|
"grad_norm": 1.0445016622543335, |
|
"learning_rate": 5.6364199442551874e-05, |
|
"loss": 6.7863, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.28487381947669915, |
|
"grad_norm": 0.9389632940292358, |
|
"learning_rate": 5.698358624961288e-05, |
|
"loss": 6.7803, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2879702740362285, |
|
"grad_norm": 1.522533655166626, |
|
"learning_rate": 5.7602973056673894e-05, |
|
"loss": 6.7642, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2910667285957579, |
|
"grad_norm": 0.5819054841995239, |
|
"learning_rate": 5.822235986373491e-05, |
|
"loss": 6.772, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2941631831552872, |
|
"grad_norm": 0.5492868423461914, |
|
"learning_rate": 5.884174667079591e-05, |
|
"loss": 6.7712, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.29725963771481656, |
|
"grad_norm": 0.9563374519348145, |
|
"learning_rate": 5.946113347785692e-05, |
|
"loss": 6.7602, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3003560922743459, |
|
"grad_norm": 1.8112778663635254, |
|
"learning_rate": 6.0080520284917926e-05, |
|
"loss": 6.774, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.30345254683387524, |
|
"grad_norm": 1.9124343395233154, |
|
"learning_rate": 6.0699907091978946e-05, |
|
"loss": 6.7692, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3065490013934046, |
|
"grad_norm": 1.0520577430725098, |
|
"learning_rate": 6.131929389903995e-05, |
|
"loss": 6.7624, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3096454559529339, |
|
"grad_norm": 0.9971650242805481, |
|
"learning_rate": 6.193868070610096e-05, |
|
"loss": 6.7597, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31274191051246325, |
|
"grad_norm": 0.7130516171455383, |
|
"learning_rate": 6.255806751316196e-05, |
|
"loss": 6.7548, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3158383650719926, |
|
"grad_norm": 0.8867819309234619, |
|
"learning_rate": 6.317745432022298e-05, |
|
"loss": 6.7416, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3189348196315219, |
|
"grad_norm": 2.448023557662964, |
|
"learning_rate": 6.379684112728398e-05, |
|
"loss": 6.7675, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.32203127419105126, |
|
"grad_norm": 2.0288820266723633, |
|
"learning_rate": 6.4416227934345e-05, |
|
"loss": 6.7555, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3251277287505806, |
|
"grad_norm": 0.645900309085846, |
|
"learning_rate": 6.503561474140602e-05, |
|
"loss": 6.7557, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.32822418331010994, |
|
"grad_norm": 0.7342972159385681, |
|
"learning_rate": 6.565500154846701e-05, |
|
"loss": 6.7452, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3313206378696393, |
|
"grad_norm": 1.523195743560791, |
|
"learning_rate": 6.627438835552803e-05, |
|
"loss": 6.7476, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3344170924291686, |
|
"grad_norm": 1.812499761581421, |
|
"learning_rate": 6.689377516258905e-05, |
|
"loss": 6.7432, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.33751354698869795, |
|
"grad_norm": 0.8007811307907104, |
|
"learning_rate": 6.751316196965004e-05, |
|
"loss": 6.7387, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3406100015482273, |
|
"grad_norm": 1.449756145477295, |
|
"learning_rate": 6.813254877671106e-05, |
|
"loss": 6.7323, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.34370645610775663, |
|
"grad_norm": 1.145936369895935, |
|
"learning_rate": 6.875193558377207e-05, |
|
"loss": 6.7396, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.34680291066728597, |
|
"grad_norm": 1.155754804611206, |
|
"learning_rate": 6.937132239083308e-05, |
|
"loss": 6.7288, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3498993652268153, |
|
"grad_norm": 1.3261879682540894, |
|
"learning_rate": 6.99907091978941e-05, |
|
"loss": 6.717, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.35299581978634464, |
|
"grad_norm": 2.5398218631744385, |
|
"learning_rate": 7.061009600495509e-05, |
|
"loss": 6.7055, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.356092274345874, |
|
"grad_norm": 0.6757873892784119, |
|
"learning_rate": 7.122948281201611e-05, |
|
"loss": 6.7242, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3591887289054033, |
|
"grad_norm": 0.8870462775230408, |
|
"learning_rate": 7.184886961907711e-05, |
|
"loss": 6.7241, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.36228518346493266, |
|
"grad_norm": 2.03185772895813, |
|
"learning_rate": 7.246825642613812e-05, |
|
"loss": 6.7364, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.365381638024462, |
|
"grad_norm": 1.013759970664978, |
|
"learning_rate": 7.308764323319914e-05, |
|
"loss": 6.7151, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.36847809258399133, |
|
"grad_norm": 1.6533416509628296, |
|
"learning_rate": 7.370703004026015e-05, |
|
"loss": 6.7207, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3715745471435207, |
|
"grad_norm": 1.0296862125396729, |
|
"learning_rate": 7.432641684732115e-05, |
|
"loss": 6.7154, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.37467100170305, |
|
"grad_norm": 0.7925991415977478, |
|
"learning_rate": 7.494580365438217e-05, |
|
"loss": 6.7036, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.37776745626257935, |
|
"grad_norm": 1.123253345489502, |
|
"learning_rate": 7.556519046144317e-05, |
|
"loss": 6.6981, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3808639108221087, |
|
"grad_norm": 1.2927206754684448, |
|
"learning_rate": 7.618457726850419e-05, |
|
"loss": 6.7105, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.383960365381638, |
|
"grad_norm": 1.2877053022384644, |
|
"learning_rate": 7.680396407556519e-05, |
|
"loss": 6.7046, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.38705681994116736, |
|
"grad_norm": 1.5025876760482788, |
|
"learning_rate": 7.74233508826262e-05, |
|
"loss": 6.7097, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3901532745006967, |
|
"grad_norm": 1.8476455211639404, |
|
"learning_rate": 7.804273768968722e-05, |
|
"loss": 6.7091, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.39324972906022604, |
|
"grad_norm": 1.1083704233169556, |
|
"learning_rate": 7.866212449674822e-05, |
|
"loss": 6.7281, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3963461836197554, |
|
"grad_norm": 1.9753637313842773, |
|
"learning_rate": 7.928151130380923e-05, |
|
"loss": 6.6795, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3994426381792847, |
|
"grad_norm": 0.6769999265670776, |
|
"learning_rate": 7.990089811087024e-05, |
|
"loss": 6.7043, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.40253909273881405, |
|
"grad_norm": 1.1025127172470093, |
|
"learning_rate": 8.052028491793124e-05, |
|
"loss": 6.673, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4056355472983434, |
|
"grad_norm": 1.132672667503357, |
|
"learning_rate": 8.113967172499226e-05, |
|
"loss": 6.6962, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.40873200185787273, |
|
"grad_norm": 3.0605337619781494, |
|
"learning_rate": 8.175905853205327e-05, |
|
"loss": 6.693, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.41182845641740207, |
|
"grad_norm": 1.0931648015975952, |
|
"learning_rate": 8.237844533911428e-05, |
|
"loss": 6.6865, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4149249109769314, |
|
"grad_norm": 1.2315603494644165, |
|
"learning_rate": 8.29978321461753e-05, |
|
"loss": 6.6753, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.41802136553646074, |
|
"grad_norm": 1.4472100734710693, |
|
"learning_rate": 8.36172189532363e-05, |
|
"loss": 6.6882, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4211178200959901, |
|
"grad_norm": 1.6784274578094482, |
|
"learning_rate": 8.423660576029731e-05, |
|
"loss": 6.6776, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4242142746555194, |
|
"grad_norm": 2.4951741695404053, |
|
"learning_rate": 8.485599256735831e-05, |
|
"loss": 6.6813, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.42731072921504876, |
|
"grad_norm": 2.3850290775299072, |
|
"learning_rate": 8.547537937441932e-05, |
|
"loss": 6.6729, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4304071837745781, |
|
"grad_norm": 0.8592017889022827, |
|
"learning_rate": 8.609476618148034e-05, |
|
"loss": 6.681, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.43350363833410743, |
|
"grad_norm": 1.178676962852478, |
|
"learning_rate": 8.671415298854135e-05, |
|
"loss": 6.6717, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.43660009289363677, |
|
"grad_norm": 1.6043617725372314, |
|
"learning_rate": 8.733353979560235e-05, |
|
"loss": 6.6647, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4396965474531661, |
|
"grad_norm": 0.872035562992096, |
|
"learning_rate": 8.795292660266336e-05, |
|
"loss": 6.666, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.44279300201269545, |
|
"grad_norm": 1.1680723428726196, |
|
"learning_rate": 8.857231340972438e-05, |
|
"loss": 6.6622, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4458894565722248, |
|
"grad_norm": 0.8795621991157532, |
|
"learning_rate": 8.919170021678539e-05, |
|
"loss": 6.64, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4489859111317541, |
|
"grad_norm": 1.5785902738571167, |
|
"learning_rate": 8.981108702384639e-05, |
|
"loss": 6.6613, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.45208236569128346, |
|
"grad_norm": 1.319611668586731, |
|
"learning_rate": 9.04304738309074e-05, |
|
"loss": 6.6573, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4551788202508128, |
|
"grad_norm": 1.0796053409576416, |
|
"learning_rate": 9.104986063796842e-05, |
|
"loss": 6.6614, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.45827527481034214, |
|
"grad_norm": 1.2139097452163696, |
|
"learning_rate": 9.166924744502942e-05, |
|
"loss": 6.6461, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4613717293698715, |
|
"grad_norm": 1.3955761194229126, |
|
"learning_rate": 9.228863425209043e-05, |
|
"loss": 6.6611, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4644681839294008, |
|
"grad_norm": 1.5178614854812622, |
|
"learning_rate": 9.290802105915144e-05, |
|
"loss": 6.6615, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46756463848893015, |
|
"grad_norm": 1.3112921714782715, |
|
"learning_rate": 9.352740786621246e-05, |
|
"loss": 6.6644, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4706610930484595, |
|
"grad_norm": 1.5961909294128418, |
|
"learning_rate": 9.414679467327346e-05, |
|
"loss": 6.673, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4737575476079888, |
|
"grad_norm": 1.0166618824005127, |
|
"learning_rate": 9.476618148033447e-05, |
|
"loss": 6.647, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.47685400216751817, |
|
"grad_norm": 1.2850325107574463, |
|
"learning_rate": 9.538556828739548e-05, |
|
"loss": 6.6536, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4799504567270475, |
|
"grad_norm": 1.1776533126831055, |
|
"learning_rate": 9.600495509445648e-05, |
|
"loss": 6.6446, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4830469112865769, |
|
"grad_norm": 1.783477544784546, |
|
"learning_rate": 9.66243419015175e-05, |
|
"loss": 6.6353, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.48614336584610623, |
|
"grad_norm": 1.7229933738708496, |
|
"learning_rate": 9.724372870857851e-05, |
|
"loss": 6.6363, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4892398204056356, |
|
"grad_norm": 0.9246505498886108, |
|
"learning_rate": 9.786311551563952e-05, |
|
"loss": 6.6616, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4923362749651649, |
|
"grad_norm": 1.7007242441177368, |
|
"learning_rate": 9.848250232270054e-05, |
|
"loss": 6.6608, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.49543272952469425, |
|
"grad_norm": 1.145609974861145, |
|
"learning_rate": 9.910188912976154e-05, |
|
"loss": 6.6282, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4985291840842236, |
|
"grad_norm": 1.1772605180740356, |
|
"learning_rate": 9.972127593682255e-05, |
|
"loss": 6.6463, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5016256386437529, |
|
"grad_norm": 0.8392823338508606, |
|
"learning_rate": 0.00010034066274388355, |
|
"loss": 6.6387, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5047220932032822, |
|
"grad_norm": 1.2767823934555054, |
|
"learning_rate": 0.00010096004955094456, |
|
"loss": 6.6571, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5078185477628115, |
|
"grad_norm": 2.3833205699920654, |
|
"learning_rate": 0.00010157943635800558, |
|
"loss": 6.6407, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5109150023223409, |
|
"grad_norm": 1.3098053932189941, |
|
"learning_rate": 0.00010219882316506659, |
|
"loss": 6.6357, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5140114568818702, |
|
"grad_norm": 1.2075214385986328, |
|
"learning_rate": 0.00010281820997212759, |
|
"loss": 6.6368, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5171079114413996, |
|
"grad_norm": 1.251852035522461, |
|
"learning_rate": 0.00010343759677918861, |
|
"loss": 6.6255, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5202043660009289, |
|
"grad_norm": 1.3142434358596802, |
|
"learning_rate": 0.0001040569835862496, |
|
"loss": 6.6218, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5233008205604582, |
|
"grad_norm": 2.4521663188934326, |
|
"learning_rate": 0.00010467637039331063, |
|
"loss": 6.6264, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5263972751199876, |
|
"grad_norm": 1.0846492052078247, |
|
"learning_rate": 0.00010529575720037163, |
|
"loss": 6.6515, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5294937296795169, |
|
"grad_norm": 1.6620179414749146, |
|
"learning_rate": 0.00010591514400743264, |
|
"loss": 6.6323, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5325901842390462, |
|
"grad_norm": 0.8557600975036621, |
|
"learning_rate": 0.00010653453081449366, |
|
"loss": 6.6437, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5356866387985756, |
|
"grad_norm": 0.6991952061653137, |
|
"learning_rate": 0.00010715391762155466, |
|
"loss": 6.6211, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5387830933581049, |
|
"grad_norm": 1.5852376222610474, |
|
"learning_rate": 0.00010777330442861567, |
|
"loss": 6.5952, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5418795479176343, |
|
"grad_norm": 1.642796516418457, |
|
"learning_rate": 0.00010839269123567669, |
|
"loss": 6.6295, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5449760024771636, |
|
"grad_norm": 1.2764023542404175, |
|
"learning_rate": 0.00010901207804273768, |
|
"loss": 6.6294, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5480724570366929, |
|
"grad_norm": 1.6868603229522705, |
|
"learning_rate": 0.0001096314648497987, |
|
"loss": 6.6292, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5511689115962223, |
|
"grad_norm": 1.3303276300430298, |
|
"learning_rate": 0.00011025085165685971, |
|
"loss": 6.5998, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5542653661557516, |
|
"grad_norm": 1.396274447441101, |
|
"learning_rate": 0.00011087023846392072, |
|
"loss": 6.6223, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.557361820715281, |
|
"grad_norm": 0.879639744758606, |
|
"learning_rate": 0.00011148962527098174, |
|
"loss": 6.612, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5604582752748103, |
|
"grad_norm": 1.1366828680038452, |
|
"learning_rate": 0.00011210901207804273, |
|
"loss": 6.6122, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5635547298343396, |
|
"grad_norm": 1.480747938156128, |
|
"learning_rate": 0.00011272839888510375, |
|
"loss": 6.6094, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.566651184393869, |
|
"grad_norm": 1.1296987533569336, |
|
"learning_rate": 0.00011334778569216477, |
|
"loss": 6.6194, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5697476389533983, |
|
"grad_norm": 0.9196439385414124, |
|
"learning_rate": 0.00011396717249922576, |
|
"loss": 6.5923, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5728440935129276, |
|
"grad_norm": 1.3304774761199951, |
|
"learning_rate": 0.00011458655930628678, |
|
"loss": 6.6129, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.575940548072457, |
|
"grad_norm": 1.071112871170044, |
|
"learning_rate": 0.00011520594611334779, |
|
"loss": 6.6095, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5790370026319864, |
|
"grad_norm": 1.1381322145462036, |
|
"learning_rate": 0.0001158253329204088, |
|
"loss": 6.5962, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5821334571915158, |
|
"grad_norm": 2.608501672744751, |
|
"learning_rate": 0.00011644471972746981, |
|
"loss": 6.6024, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5852299117510451, |
|
"grad_norm": 1.4727625846862793, |
|
"learning_rate": 0.0001170641065345308, |
|
"loss": 6.5914, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5883263663105744, |
|
"grad_norm": 1.192298173904419, |
|
"learning_rate": 0.00011768349334159183, |
|
"loss": 6.6072, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5914228208701038, |
|
"grad_norm": 0.9773418307304382, |
|
"learning_rate": 0.00011830288014865285, |
|
"loss": 6.5805, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5945192754296331, |
|
"grad_norm": 1.096369743347168, |
|
"learning_rate": 0.00011892226695571384, |
|
"loss": 6.6052, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5976157299891625, |
|
"grad_norm": 1.2275642156600952, |
|
"learning_rate": 0.00011954165376277486, |
|
"loss": 6.594, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6007121845486918, |
|
"grad_norm": 1.3209136724472046, |
|
"learning_rate": 0.00012016104056983585, |
|
"loss": 6.6078, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6038086391082211, |
|
"grad_norm": 1.3680113554000854, |
|
"learning_rate": 0.00012078042737689687, |
|
"loss": 6.5793, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6069050936677505, |
|
"grad_norm": 1.2960150241851807, |
|
"learning_rate": 0.00012139981418395789, |
|
"loss": 6.5969, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6100015482272798, |
|
"grad_norm": 0.8884462118148804, |
|
"learning_rate": 0.00012201920099101888, |
|
"loss": 6.5862, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6130980027868091, |
|
"grad_norm": 0.9539084434509277, |
|
"learning_rate": 0.0001226385877980799, |
|
"loss": 6.5797, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6161944573463385, |
|
"grad_norm": 1.023714303970337, |
|
"learning_rate": 0.0001232579746051409, |
|
"loss": 6.5896, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6192909119058678, |
|
"grad_norm": 1.0426772832870483, |
|
"learning_rate": 0.00012387736141220192, |
|
"loss": 6.6121, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6223873664653972, |
|
"grad_norm": 1.4499601125717163, |
|
"learning_rate": 0.00012449674821926292, |
|
"loss": 6.6107, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6254838210249265, |
|
"grad_norm": 1.2633146047592163, |
|
"learning_rate": 0.00012511613502632393, |
|
"loss": 6.5991, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6285802755844558, |
|
"grad_norm": 0.845995306968689, |
|
"learning_rate": 0.00012573552183338496, |
|
"loss": 6.5722, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6316767301439852, |
|
"grad_norm": 1.2431766986846924, |
|
"learning_rate": 0.00012635490864044597, |
|
"loss": 6.5958, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6347731847035145, |
|
"grad_norm": 0.9436641335487366, |
|
"learning_rate": 0.00012697429544750698, |
|
"loss": 6.5901, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6378696392630439, |
|
"grad_norm": 1.334149718284607, |
|
"learning_rate": 0.00012759368225456795, |
|
"loss": 6.5938, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6409660938225732, |
|
"grad_norm": 0.9270686507225037, |
|
"learning_rate": 0.000128213069061629, |
|
"loss": 6.5767, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6440625483821025, |
|
"grad_norm": 1.3940073251724243, |
|
"learning_rate": 0.00012883245586869, |
|
"loss": 6.5834, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6471590029416319, |
|
"grad_norm": 1.163221001625061, |
|
"learning_rate": 0.000129451842675751, |
|
"loss": 6.5784, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6502554575011612, |
|
"grad_norm": 0.9691527485847473, |
|
"learning_rate": 0.00013007122948281203, |
|
"loss": 6.5823, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6533519120606905, |
|
"grad_norm": 0.7050260305404663, |
|
"learning_rate": 0.000130690616289873, |
|
"loss": 6.5847, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6564483666202199, |
|
"grad_norm": 1.2201118469238281, |
|
"learning_rate": 0.00013131000309693402, |
|
"loss": 6.5952, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6595448211797492, |
|
"grad_norm": 1.3519176244735718, |
|
"learning_rate": 0.00013192938990399505, |
|
"loss": 6.5822, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6626412757392786, |
|
"grad_norm": 1.0712783336639404, |
|
"learning_rate": 0.00013254877671105606, |
|
"loss": 6.5677, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6657377302988079, |
|
"grad_norm": 1.0584081411361694, |
|
"learning_rate": 0.00013316816351811707, |
|
"loss": 6.5859, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6688341848583372, |
|
"grad_norm": 0.8563801050186157, |
|
"learning_rate": 0.0001337875503251781, |
|
"loss": 6.5902, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6719306394178666, |
|
"grad_norm": 0.8715903162956238, |
|
"learning_rate": 0.00013440693713223908, |
|
"loss": 6.5875, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6750270939773959, |
|
"grad_norm": 1.3086822032928467, |
|
"learning_rate": 0.00013502632393930008, |
|
"loss": 6.5905, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6781235485369252, |
|
"grad_norm": 0.8140910267829895, |
|
"learning_rate": 0.00013564571074636112, |
|
"loss": 6.558, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6812200030964546, |
|
"grad_norm": 0.8857564330101013, |
|
"learning_rate": 0.00013626509755342212, |
|
"loss": 6.5713, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6843164576559839, |
|
"grad_norm": 1.4854942560195923, |
|
"learning_rate": 0.00013688448436048313, |
|
"loss": 6.5836, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6874129122155133, |
|
"grad_norm": 1.4530035257339478, |
|
"learning_rate": 0.00013750387116754414, |
|
"loss": 6.5756, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6905093667750426, |
|
"grad_norm": 0.8865880370140076, |
|
"learning_rate": 0.00013812325797460514, |
|
"loss": 6.5887, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6936058213345719, |
|
"grad_norm": 0.8601120710372925, |
|
"learning_rate": 0.00013874264478166615, |
|
"loss": 6.5701, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6967022758941013, |
|
"grad_norm": 0.8077085614204407, |
|
"learning_rate": 0.00013936203158872716, |
|
"loss": 6.5734, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6997987304536306, |
|
"grad_norm": 0.7860495448112488, |
|
"learning_rate": 0.0001399814183957882, |
|
"loss": 6.5609, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.70289518501316, |
|
"grad_norm": 1.4957787990570068, |
|
"learning_rate": 0.00014060080520284917, |
|
"loss": 6.5588, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7059916395726893, |
|
"grad_norm": 1.2393313646316528, |
|
"learning_rate": 0.00014122019200991018, |
|
"loss": 6.5752, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7090880941322186, |
|
"grad_norm": 0.8842589855194092, |
|
"learning_rate": 0.0001418395788169712, |
|
"loss": 6.5574, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.712184548691748, |
|
"grad_norm": 0.7826055884361267, |
|
"learning_rate": 0.00014245896562403222, |
|
"loss": 6.5612, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7152810032512773, |
|
"grad_norm": 0.9402616024017334, |
|
"learning_rate": 0.00014307835243109322, |
|
"loss": 6.5596, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7183774578108066, |
|
"grad_norm": 1.274904727935791, |
|
"learning_rate": 0.00014369773923815423, |
|
"loss": 6.5796, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.721473912370336, |
|
"grad_norm": 1.112528681755066, |
|
"learning_rate": 0.00014431712604521523, |
|
"loss": 6.5563, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7245703669298653, |
|
"grad_norm": 0.8044337630271912, |
|
"learning_rate": 0.00014493651285227624, |
|
"loss": 6.547, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7276668214893947, |
|
"grad_norm": 1.0962836742401123, |
|
"learning_rate": 0.00014555589965933727, |
|
"loss": 6.5543, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.730763276048924, |
|
"grad_norm": 1.0332891941070557, |
|
"learning_rate": 0.00014617528646639828, |
|
"loss": 6.5486, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7338597306084533, |
|
"grad_norm": 0.9583357572555542, |
|
"learning_rate": 0.00014679467327345926, |
|
"loss": 6.5602, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7369561851679827, |
|
"grad_norm": 1.0913727283477783, |
|
"learning_rate": 0.0001474140600805203, |
|
"loss": 6.5468, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.740052639727512, |
|
"grad_norm": 1.192328929901123, |
|
"learning_rate": 0.0001480334468875813, |
|
"loss": 6.5476, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7431490942870413, |
|
"grad_norm": 1.3153208494186401, |
|
"learning_rate": 0.0001486528336946423, |
|
"loss": 6.5502, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7462455488465707, |
|
"grad_norm": 1.0659363269805908, |
|
"learning_rate": 0.0001492722205017033, |
|
"loss": 6.5458, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7493420034061, |
|
"grad_norm": 0.6409627199172974, |
|
"learning_rate": 0.00014989160730876435, |
|
"loss": 6.5615, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7524384579656294, |
|
"grad_norm": 1.534621238708496, |
|
"learning_rate": 0.00015051099411582532, |
|
"loss": 6.5413, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7555349125251587, |
|
"grad_norm": 0.8091804385185242, |
|
"learning_rate": 0.00015113038092288633, |
|
"loss": 6.5558, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.758631367084688, |
|
"grad_norm": 1.1276757717132568, |
|
"learning_rate": 0.00015174976772994736, |
|
"loss": 6.5495, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7617278216442174, |
|
"grad_norm": 1.1171313524246216, |
|
"learning_rate": 0.00015236915453700837, |
|
"loss": 6.5202, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7648242762037467, |
|
"grad_norm": 0.8118519186973572, |
|
"learning_rate": 0.00015298854134406938, |
|
"loss": 6.5484, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.767920730763276, |
|
"grad_norm": 0.835800290107727, |
|
"learning_rate": 0.00015360792815113038, |
|
"loss": 6.5512, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7710171853228054, |
|
"grad_norm": 1.2488937377929688, |
|
"learning_rate": 0.0001542273149581914, |
|
"loss": 6.5434, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7741136398823347, |
|
"grad_norm": 1.0001873970031738, |
|
"learning_rate": 0.0001548467017652524, |
|
"loss": 6.5562, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7772100944418641, |
|
"grad_norm": 1.329168438911438, |
|
"learning_rate": 0.00015546608857231343, |
|
"loss": 6.5432, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7803065490013934, |
|
"grad_norm": 1.0739688873291016, |
|
"learning_rate": 0.00015608547537937444, |
|
"loss": 6.5359, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7834030035609227, |
|
"grad_norm": 1.1103359460830688, |
|
"learning_rate": 0.00015670486218643541, |
|
"loss": 6.5514, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7864994581204521, |
|
"grad_norm": 0.7088027596473694, |
|
"learning_rate": 0.00015732424899349645, |
|
"loss": 6.5415, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7895959126799814, |
|
"grad_norm": 1.141654133796692, |
|
"learning_rate": 0.00015794363580055745, |
|
"loss": 6.5505, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.7926923672395108, |
|
"grad_norm": 0.9900869727134705, |
|
"learning_rate": 0.00015856302260761846, |
|
"loss": 6.5505, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.7957888217990401, |
|
"grad_norm": 0.9820410013198853, |
|
"learning_rate": 0.00015918240941467947, |
|
"loss": 6.5306, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.7988852763585694, |
|
"grad_norm": 1.1498329639434814, |
|
"learning_rate": 0.00015980179622174047, |
|
"loss": 6.5308, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8019817309180988, |
|
"grad_norm": 1.5919135808944702, |
|
"learning_rate": 0.00016042118302880148, |
|
"loss": 6.5458, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.8050781854776281, |
|
"grad_norm": 1.7433273792266846, |
|
"learning_rate": 0.00016104056983586249, |
|
"loss": 6.5307, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8081746400371574, |
|
"grad_norm": 1.2043076753616333, |
|
"learning_rate": 0.00016165995664292352, |
|
"loss": 6.5347, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8112710945966868, |
|
"grad_norm": 1.2197911739349365, |
|
"learning_rate": 0.00016227934344998453, |
|
"loss": 6.5282, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8143675491562161, |
|
"grad_norm": 0.8074585199356079, |
|
"learning_rate": 0.0001628987302570455, |
|
"loss": 6.542, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8174640037157455, |
|
"grad_norm": 1.1308220624923706, |
|
"learning_rate": 0.00016351811706410654, |
|
"loss": 6.547, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8205604582752748, |
|
"grad_norm": 0.989686131477356, |
|
"learning_rate": 0.00016413750387116755, |
|
"loss": 6.5418, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8236569128348041, |
|
"grad_norm": 0.9891242980957031, |
|
"learning_rate": 0.00016475689067822855, |
|
"loss": 6.5211, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8267533673943335, |
|
"grad_norm": 0.9230055212974548, |
|
"learning_rate": 0.00016537627748528958, |
|
"loss": 6.5371, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8298498219538628, |
|
"grad_norm": 0.9501631259918213, |
|
"learning_rate": 0.0001659956642923506, |
|
"loss": 6.5325, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8329462765133921, |
|
"grad_norm": 0.9849696755409241, |
|
"learning_rate": 0.00016661505109941157, |
|
"loss": 6.5395, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8360427310729215, |
|
"grad_norm": 1.2444876432418823, |
|
"learning_rate": 0.0001672344379064726, |
|
"loss": 6.5156, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8391391856324508, |
|
"grad_norm": 0.7472972869873047, |
|
"learning_rate": 0.0001678538247135336, |
|
"loss": 6.5228, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8422356401919802, |
|
"grad_norm": 0.8915477991104126, |
|
"learning_rate": 0.00016847321152059462, |
|
"loss": 6.5242, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8453320947515095, |
|
"grad_norm": 1.0017406940460205, |
|
"learning_rate": 0.00016909259832765562, |
|
"loss": 6.5449, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8484285493110388, |
|
"grad_norm": 1.0427559614181519, |
|
"learning_rate": 0.00016971198513471663, |
|
"loss": 6.5257, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8515250038705682, |
|
"grad_norm": 0.8571954965591431, |
|
"learning_rate": 0.00017033137194177764, |
|
"loss": 6.5203, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8546214584300975, |
|
"grad_norm": 1.0811147689819336, |
|
"learning_rate": 0.00017095075874883864, |
|
"loss": 6.5196, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8577179129896269, |
|
"grad_norm": 0.9217764735221863, |
|
"learning_rate": 0.00017157014555589968, |
|
"loss": 6.5189, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8608143675491562, |
|
"grad_norm": 0.9920642375946045, |
|
"learning_rate": 0.00017218953236296068, |
|
"loss": 6.5191, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8639108221086855, |
|
"grad_norm": 1.0834949016571045, |
|
"learning_rate": 0.00017280891917002166, |
|
"loss": 6.5227, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8670072766682149, |
|
"grad_norm": 0.916513204574585, |
|
"learning_rate": 0.0001734283059770827, |
|
"loss": 6.5144, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8701037312277442, |
|
"grad_norm": 1.2615902423858643, |
|
"learning_rate": 0.0001740476927841437, |
|
"loss": 6.506, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.8732001857872735, |
|
"grad_norm": 0.8685635924339294, |
|
"learning_rate": 0.0001746670795912047, |
|
"loss": 6.5142, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8762966403468029, |
|
"grad_norm": 0.8606330156326294, |
|
"learning_rate": 0.00017528646639826574, |
|
"loss": 6.5019, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8793930949063322, |
|
"grad_norm": 0.7754759192466736, |
|
"learning_rate": 0.00017590585320532672, |
|
"loss": 6.5119, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8824895494658616, |
|
"grad_norm": 0.8332505226135254, |
|
"learning_rate": 0.00017652524001238773, |
|
"loss": 6.5283, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8855860040253909, |
|
"grad_norm": 1.1799520254135132, |
|
"learning_rate": 0.00017714462681944876, |
|
"loss": 6.5043, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8886824585849202, |
|
"grad_norm": 0.9492645859718323, |
|
"learning_rate": 0.00017776401362650977, |
|
"loss": 6.5106, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.8917789131444496, |
|
"grad_norm": 0.7921923995018005, |
|
"learning_rate": 0.00017838340043357077, |
|
"loss": 6.5065, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.8948753677039789, |
|
"grad_norm": 0.6766930818557739, |
|
"learning_rate": 0.00017900278724063175, |
|
"loss": 6.5239, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.8979718222635082, |
|
"grad_norm": 0.7052696347236633, |
|
"learning_rate": 0.00017962217404769278, |
|
"loss": 6.5378, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9010682768230376, |
|
"grad_norm": 0.973673403263092, |
|
"learning_rate": 0.0001802415608547538, |
|
"loss": 6.5099, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9041647313825669, |
|
"grad_norm": 0.8590471744537354, |
|
"learning_rate": 0.0001808609476618148, |
|
"loss": 6.522, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9072611859420963, |
|
"grad_norm": 0.9478482604026794, |
|
"learning_rate": 0.00018148033446887583, |
|
"loss": 6.5148, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9103576405016256, |
|
"grad_norm": 0.991057813167572, |
|
"learning_rate": 0.00018209972127593684, |
|
"loss": 6.4991, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9134540950611549, |
|
"grad_norm": 0.8526809811592102, |
|
"learning_rate": 0.00018271910808299782, |
|
"loss": 6.5164, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9165505496206843, |
|
"grad_norm": 0.6919571161270142, |
|
"learning_rate": 0.00018333849489005885, |
|
"loss": 6.5214, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9196470041802136, |
|
"grad_norm": 0.657346248626709, |
|
"learning_rate": 0.00018395788169711986, |
|
"loss": 6.5013, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.922743458739743, |
|
"grad_norm": 0.8530818223953247, |
|
"learning_rate": 0.00018457726850418086, |
|
"loss": 6.5145, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.9258399132992723, |
|
"grad_norm": 0.8030965328216553, |
|
"learning_rate": 0.0001851966553112419, |
|
"loss": 6.513, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9289363678588016, |
|
"grad_norm": 0.8161980509757996, |
|
"learning_rate": 0.00018581604211830288, |
|
"loss": 6.5074, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.932032822418331, |
|
"grad_norm": 0.9112780094146729, |
|
"learning_rate": 0.00018643542892536388, |
|
"loss": 6.4961, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9351292769778603, |
|
"grad_norm": 0.8977142572402954, |
|
"learning_rate": 0.00018705481573242491, |
|
"loss": 6.4973, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9382257315373896, |
|
"grad_norm": 1.0232683420181274, |
|
"learning_rate": 0.00018767420253948592, |
|
"loss": 6.48, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.941322186096919, |
|
"grad_norm": 0.8228316307067871, |
|
"learning_rate": 0.00018829358934654693, |
|
"loss": 6.4842, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9444186406564483, |
|
"grad_norm": 0.724467396736145, |
|
"learning_rate": 0.0001889129761536079, |
|
"loss": 6.4781, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9475150952159777, |
|
"grad_norm": 0.9022755026817322, |
|
"learning_rate": 0.00018953236296066894, |
|
"loss": 6.4799, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.950611549775507, |
|
"grad_norm": 1.0211142301559448, |
|
"learning_rate": 0.00019015174976772995, |
|
"loss": 6.4719, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9537080043350363, |
|
"grad_norm": 0.7571627497673035, |
|
"learning_rate": 0.00019077113657479095, |
|
"loss": 6.4685, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9568044588945657, |
|
"grad_norm": 0.797822117805481, |
|
"learning_rate": 0.00019139052338185199, |
|
"loss": 6.4502, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.959900913454095, |
|
"grad_norm": 1.1731350421905518, |
|
"learning_rate": 0.00019200991018891297, |
|
"loss": 6.4534, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9629973680136245, |
|
"grad_norm": 0.7823401689529419, |
|
"learning_rate": 0.00019262929699597397, |
|
"loss": 6.4611, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9660938225731538, |
|
"grad_norm": 1.2475049495697021, |
|
"learning_rate": 0.000193248683803035, |
|
"loss": 6.4389, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9691902771326831, |
|
"grad_norm": 0.9524723887443542, |
|
"learning_rate": 0.000193868070610096, |
|
"loss": 6.4435, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9722867316922125, |
|
"grad_norm": 0.9494399428367615, |
|
"learning_rate": 0.00019448745741715702, |
|
"loss": 6.4332, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.9753831862517418, |
|
"grad_norm": 1.0070710182189941, |
|
"learning_rate": 0.00019510684422421805, |
|
"loss": 6.4529, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.9784796408112711, |
|
"grad_norm": 1.180368185043335, |
|
"learning_rate": 0.00019572623103127903, |
|
"loss": 6.4369, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9815760953708005, |
|
"grad_norm": 1.0592350959777832, |
|
"learning_rate": 0.00019634561783834004, |
|
"loss": 6.4402, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.9846725499303298, |
|
"grad_norm": 0.8868720531463623, |
|
"learning_rate": 0.00019696500464540107, |
|
"loss": 6.4406, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9877690044898592, |
|
"grad_norm": 0.8809700608253479, |
|
"learning_rate": 0.00019758439145246208, |
|
"loss": 6.4125, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.9908654590493885, |
|
"grad_norm": 0.8676486611366272, |
|
"learning_rate": 0.00019820377825952308, |
|
"loss": 6.4281, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9939619136089178, |
|
"grad_norm": 0.6752346754074097, |
|
"learning_rate": 0.0001988231650665841, |
|
"loss": 6.4041, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.9970583681684472, |
|
"grad_norm": 0.9568197131156921, |
|
"learning_rate": 0.0001994425518736451, |
|
"loss": 6.3981, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5673872828483582, |
|
"learning_rate": 0.0002000619386807061, |
|
"loss": 6.071, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.0030964545595293, |
|
"grad_norm": 0.8268159627914429, |
|
"learning_rate": 0.0002006813254877671, |
|
"loss": 6.3988, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.0061929091190587, |
|
"grad_norm": 0.7635223269462585, |
|
"learning_rate": 0.00020130071229482814, |
|
"loss": 6.3831, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.009289363678588, |
|
"grad_norm": 0.7615036368370056, |
|
"learning_rate": 0.00020192009910188912, |
|
"loss": 6.3832, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.0123858182381174, |
|
"grad_norm": 0.7862409353256226, |
|
"learning_rate": 0.00020253948590895013, |
|
"loss": 6.3929, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.0154822727976467, |
|
"grad_norm": 1.113342046737671, |
|
"learning_rate": 0.00020315887271601116, |
|
"loss": 6.3917, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.018578727357176, |
|
"grad_norm": 0.8702403903007507, |
|
"learning_rate": 0.00020377825952307217, |
|
"loss": 6.3783, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.0216751819167054, |
|
"grad_norm": 0.8440068960189819, |
|
"learning_rate": 0.00020439764633013317, |
|
"loss": 6.3777, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.0247716364762347, |
|
"grad_norm": 1.1612240076065063, |
|
"learning_rate": 0.00020501703313719418, |
|
"loss": 6.3617, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.027868091035764, |
|
"grad_norm": 0.8664381504058838, |
|
"learning_rate": 0.00020563641994425519, |
|
"loss": 6.3766, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.0309645455952934, |
|
"grad_norm": 1.2137264013290405, |
|
"learning_rate": 0.0002062558067513162, |
|
"loss": 6.3724, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.0340610001548227, |
|
"grad_norm": 1.2266614437103271, |
|
"learning_rate": 0.00020687519355837723, |
|
"loss": 6.3313, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.037157454714352, |
|
"grad_norm": 0.8568953275680542, |
|
"learning_rate": 0.00020749458036543823, |
|
"loss": 6.3583, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.0402539092738814, |
|
"grad_norm": 0.874577522277832, |
|
"learning_rate": 0.0002081139671724992, |
|
"loss": 6.3511, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.0433503638334107, |
|
"grad_norm": 1.1219960451126099, |
|
"learning_rate": 0.00020873335397956024, |
|
"loss": 6.323, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.04644681839294, |
|
"grad_norm": 1.1575599908828735, |
|
"learning_rate": 0.00020935274078662125, |
|
"loss": 6.3426, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.0495432729524694, |
|
"grad_norm": 0.7617483139038086, |
|
"learning_rate": 0.00020997212759368226, |
|
"loss": 6.3243, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.0526397275119987, |
|
"grad_norm": 1.019921064376831, |
|
"learning_rate": 0.00021059151440074326, |
|
"loss": 6.303, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.055736182071528, |
|
"grad_norm": 1.034369945526123, |
|
"learning_rate": 0.0002112109012078043, |
|
"loss": 6.3092, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.0588326366310574, |
|
"grad_norm": 1.1426433324813843, |
|
"learning_rate": 0.00021183028801486528, |
|
"loss": 6.3023, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.0619290911905868, |
|
"grad_norm": 0.9942957162857056, |
|
"learning_rate": 0.00021244967482192628, |
|
"loss": 6.3104, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.065025545750116, |
|
"grad_norm": 1.0719786882400513, |
|
"learning_rate": 0.00021306906162898732, |
|
"loss": 6.2973, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.0681220003096454, |
|
"grad_norm": 1.0518437623977661, |
|
"learning_rate": 0.00021368844843604832, |
|
"loss": 6.2967, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.0712184548691748, |
|
"grad_norm": 1.2732771635055542, |
|
"learning_rate": 0.00021430783524310933, |
|
"loss": 6.2746, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.0743149094287041, |
|
"grad_norm": 1.5430597066879272, |
|
"learning_rate": 0.00021492722205017034, |
|
"loss": 6.2815, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.0774113639882335, |
|
"grad_norm": 0.8930633068084717, |
|
"learning_rate": 0.00021554660885723134, |
|
"loss": 6.251, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.0805078185477628, |
|
"grad_norm": 1.1095397472381592, |
|
"learning_rate": 0.00021616599566429235, |
|
"loss": 6.238, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.0836042731072921, |
|
"grad_norm": 1.1570417881011963, |
|
"learning_rate": 0.00021678538247135338, |
|
"loss": 6.2169, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0867007276668215, |
|
"grad_norm": 1.2682262659072876, |
|
"learning_rate": 0.0002174047692784144, |
|
"loss": 6.2256, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.0897971822263508, |
|
"grad_norm": 1.2010442018508911, |
|
"learning_rate": 0.00021802415608547537, |
|
"loss": 6.2034, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.0928936367858801, |
|
"grad_norm": 1.3368873596191406, |
|
"learning_rate": 0.0002186435428925364, |
|
"loss": 6.1673, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.0959900913454095, |
|
"grad_norm": 1.1895204782485962, |
|
"learning_rate": 0.0002192629296995974, |
|
"loss": 6.1546, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.0990865459049388, |
|
"grad_norm": 1.1519889831542969, |
|
"learning_rate": 0.0002198823165066584, |
|
"loss": 6.1365, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.1021830004644682, |
|
"grad_norm": 1.3705570697784424, |
|
"learning_rate": 0.00022050170331371942, |
|
"loss": 6.07, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.1052794550239975, |
|
"grad_norm": 1.6378076076507568, |
|
"learning_rate": 0.00022112109012078043, |
|
"loss": 6.0432, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.1083759095835268, |
|
"grad_norm": 1.5780587196350098, |
|
"learning_rate": 0.00022174047692784143, |
|
"loss": 6.0201, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.1114723641430562, |
|
"grad_norm": 1.2604175806045532, |
|
"learning_rate": 0.00022235986373490244, |
|
"loss": 5.9781, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.1145688187025855, |
|
"grad_norm": 1.4099502563476562, |
|
"learning_rate": 0.00022297925054196347, |
|
"loss": 5.9298, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.1176652732621148, |
|
"grad_norm": 1.233045220375061, |
|
"learning_rate": 0.00022359863734902448, |
|
"loss": 5.8924, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.1207617278216442, |
|
"grad_norm": 1.4948160648345947, |
|
"learning_rate": 0.00022421802415608546, |
|
"loss": 5.8785, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.1238581823811735, |
|
"grad_norm": 1.7853126525878906, |
|
"learning_rate": 0.0002248374109631465, |
|
"loss": 5.8134, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.1269546369407029, |
|
"grad_norm": 2.1024398803710938, |
|
"learning_rate": 0.0002254567977702075, |
|
"loss": 5.7864, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.1300510915002322, |
|
"grad_norm": 1.6895965337753296, |
|
"learning_rate": 0.0002260761845772685, |
|
"loss": 5.7182, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.1331475460597615, |
|
"grad_norm": 1.7023606300354004, |
|
"learning_rate": 0.00022669557138432954, |
|
"loss": 5.6528, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.1362440006192909, |
|
"grad_norm": 1.2533527612686157, |
|
"learning_rate": 0.00022731495819139054, |
|
"loss": 5.6221, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.1393404551788202, |
|
"grad_norm": 1.621505618095398, |
|
"learning_rate": 0.00022793434499845152, |
|
"loss": 5.5583, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.1424369097383495, |
|
"grad_norm": 1.3869857788085938, |
|
"learning_rate": 0.00022855373180551256, |
|
"loss": 5.5145, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.1455333642978789, |
|
"grad_norm": 1.542646884918213, |
|
"learning_rate": 0.00022917311861257356, |
|
"loss": 5.4559, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.1486298188574082, |
|
"grad_norm": 1.4515721797943115, |
|
"learning_rate": 0.00022979250541963457, |
|
"loss": 5.4007, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.1517262734169376, |
|
"grad_norm": 1.7579517364501953, |
|
"learning_rate": 0.00023041189222669557, |
|
"loss": 5.3976, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.154822727976467, |
|
"grad_norm": 1.2533565759658813, |
|
"learning_rate": 0.00023103127903375658, |
|
"loss": 5.3172, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.1579191825359962, |
|
"grad_norm": 1.494162917137146, |
|
"learning_rate": 0.0002316506658408176, |
|
"loss": 5.2509, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.1610156370955256, |
|
"grad_norm": 1.595115065574646, |
|
"learning_rate": 0.0002322700526478786, |
|
"loss": 5.1784, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.164112091655055, |
|
"grad_norm": 1.55663001537323, |
|
"learning_rate": 0.00023288943945493963, |
|
"loss": 5.0949, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.1672085462145843, |
|
"grad_norm": 1.374272346496582, |
|
"learning_rate": 0.00023350882626200063, |
|
"loss": 5.0331, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.1703050007741136, |
|
"grad_norm": 1.3195029497146606, |
|
"learning_rate": 0.0002341282130690616, |
|
"loss": 4.9576, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.173401455333643, |
|
"grad_norm": 1.161839485168457, |
|
"learning_rate": 0.00023474759987612265, |
|
"loss": 4.9166, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.1764979098931723, |
|
"grad_norm": 1.2902604341506958, |
|
"learning_rate": 0.00023536698668318365, |
|
"loss": 4.8334, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.1795943644527016, |
|
"grad_norm": 1.0339348316192627, |
|
"learning_rate": 0.00023598637349024466, |
|
"loss": 4.7735, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.182690819012231, |
|
"grad_norm": 1.1560925245285034, |
|
"learning_rate": 0.0002366057602973057, |
|
"loss": 4.7389, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.1857872735717603, |
|
"grad_norm": 1.0810256004333496, |
|
"learning_rate": 0.00023722514710436667, |
|
"loss": 4.673, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.1888837281312896, |
|
"grad_norm": 1.187358021736145, |
|
"learning_rate": 0.00023784453391142768, |
|
"loss": 4.6703, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.191980182690819, |
|
"grad_norm": 1.2153098583221436, |
|
"learning_rate": 0.0002384639207184887, |
|
"loss": 4.5977, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.1950766372503483, |
|
"grad_norm": 1.3098320960998535, |
|
"learning_rate": 0.00023908330752554972, |
|
"loss": 4.5396, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.1981730918098776, |
|
"grad_norm": 1.3841015100479126, |
|
"learning_rate": 0.00023970269433261072, |
|
"loss": 4.5191, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.201269546369407, |
|
"grad_norm": 1.0185471773147583, |
|
"learning_rate": 0.0002403220811396717, |
|
"loss": 4.4745, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.2043660009289363, |
|
"grad_norm": 1.0954643487930298, |
|
"learning_rate": 0.00024094146794673274, |
|
"loss": 4.4384, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.2074624554884656, |
|
"grad_norm": 1.0373002290725708, |
|
"learning_rate": 0.00024156085475379374, |
|
"loss": 4.4049, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.210558910047995, |
|
"grad_norm": 1.0706144571304321, |
|
"learning_rate": 0.00024218024156085475, |
|
"loss": 4.3815, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.2136553646075243, |
|
"grad_norm": 1.1758544445037842, |
|
"learning_rate": 0.00024279962836791578, |
|
"loss": 4.3619, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.2167518191670537, |
|
"grad_norm": 1.1079212427139282, |
|
"learning_rate": 0.0002434190151749768, |
|
"loss": 4.337, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.219848273726583, |
|
"grad_norm": 1.1753212213516235, |
|
"learning_rate": 0.00024403840198203777, |
|
"loss": 4.3046, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.2229447282861123, |
|
"grad_norm": 1.1949397325515747, |
|
"learning_rate": 0.00024465778878909883, |
|
"loss": 4.3101, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.2260411828456417, |
|
"grad_norm": 1.0809822082519531, |
|
"learning_rate": 0.0002452771755961598, |
|
"loss": 4.2624, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.229137637405171, |
|
"grad_norm": 1.113866925239563, |
|
"learning_rate": 0.0002458965624032208, |
|
"loss": 4.2679, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.2322340919647004, |
|
"grad_norm": 1.1212016344070435, |
|
"learning_rate": 0.0002465159492102818, |
|
"loss": 4.2071, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.2353305465242297, |
|
"grad_norm": 1.1517590284347534, |
|
"learning_rate": 0.00024713533601734285, |
|
"loss": 4.1929, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.238427001083759, |
|
"grad_norm": 0.9486988186836243, |
|
"learning_rate": 0.00024775472282440383, |
|
"loss": 4.1934, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.2415234556432884, |
|
"grad_norm": 1.0706721544265747, |
|
"learning_rate": 0.00024837410963146487, |
|
"loss": 4.1776, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.2446199102028177, |
|
"grad_norm": 1.1148719787597656, |
|
"learning_rate": 0.00024899349643852585, |
|
"loss": 4.1265, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.247716364762347, |
|
"grad_norm": 1.1065315008163452, |
|
"learning_rate": 0.0002496128832455869, |
|
"loss": 4.1034, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.2508128193218764, |
|
"grad_norm": 1.158066987991333, |
|
"learning_rate": 0.00025023227005264786, |
|
"loss": 4.0863, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.2539092738814057, |
|
"grad_norm": 1.1560614109039307, |
|
"learning_rate": 0.0002508516568597089, |
|
"loss": 4.0778, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.257005728440935, |
|
"grad_norm": 0.98968905210495, |
|
"learning_rate": 0.0002514710436667699, |
|
"loss": 4.051, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.2601021830004644, |
|
"grad_norm": 1.1713204383850098, |
|
"learning_rate": 0.0002520904304738309, |
|
"loss": 4.0181, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.2631986375599937, |
|
"grad_norm": 1.1065443754196167, |
|
"learning_rate": 0.00025270981728089194, |
|
"loss": 4.0239, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.266295092119523, |
|
"grad_norm": 1.043097972869873, |
|
"learning_rate": 0.00025332920408795297, |
|
"loss": 4.0208, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.2693915466790524, |
|
"grad_norm": 1.024276614189148, |
|
"learning_rate": 0.00025394859089501395, |
|
"loss": 3.9804, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.2724880012385817, |
|
"grad_norm": 1.1613043546676636, |
|
"learning_rate": 0.000254567977702075, |
|
"loss": 3.9819, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.275584455798111, |
|
"grad_norm": 1.0510482788085938, |
|
"learning_rate": 0.0002551873645091359, |
|
"loss": 3.9696, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.2786809103576404, |
|
"grad_norm": 0.9902080297470093, |
|
"learning_rate": 0.00025580675131619694, |
|
"loss": 3.9233, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.2817773649171698, |
|
"grad_norm": 1.165866732597351, |
|
"learning_rate": 0.000256426138123258, |
|
"loss": 3.9079, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.284873819476699, |
|
"grad_norm": 1.0561455488204956, |
|
"learning_rate": 0.00025704552493031896, |
|
"loss": 3.9072, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.2879702740362284, |
|
"grad_norm": 0.989741325378418, |
|
"learning_rate": 0.00025766491173738, |
|
"loss": 3.9018, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.2910667285957578, |
|
"grad_norm": 1.099219799041748, |
|
"learning_rate": 0.000258284298544441, |
|
"loss": 3.868, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.2941631831552871, |
|
"grad_norm": 1.1154602766036987, |
|
"learning_rate": 0.000258903685351502, |
|
"loss": 3.8644, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.2972596377148164, |
|
"grad_norm": 1.0872890949249268, |
|
"learning_rate": 0.00025952307215856304, |
|
"loss": 3.8587, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.3003560922743458, |
|
"grad_norm": 1.0499584674835205, |
|
"learning_rate": 0.00026014245896562407, |
|
"loss": 3.8235, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.3034525468338751, |
|
"grad_norm": 1.030174732208252, |
|
"learning_rate": 0.00026076184577268505, |
|
"loss": 3.8302, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.3065490013934045, |
|
"grad_norm": 1.0867342948913574, |
|
"learning_rate": 0.000261381232579746, |
|
"loss": 3.8341, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.3096454559529338, |
|
"grad_norm": 1.0520577430725098, |
|
"learning_rate": 0.00026200061938680706, |
|
"loss": 3.8018, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.3127419105124631, |
|
"grad_norm": 1.0809017419815063, |
|
"learning_rate": 0.00026262000619386804, |
|
"loss": 3.7748, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.3158383650719925, |
|
"grad_norm": 1.1091547012329102, |
|
"learning_rate": 0.0002632393930009291, |
|
"loss": 3.7732, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.3189348196315218, |
|
"grad_norm": 1.0448859930038452, |
|
"learning_rate": 0.0002638587798079901, |
|
"loss": 3.74, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.3220312741910512, |
|
"grad_norm": 1.0798423290252686, |
|
"learning_rate": 0.0002644781666150511, |
|
"loss": 3.7374, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.3251277287505805, |
|
"grad_norm": 0.9496048092842102, |
|
"learning_rate": 0.0002650975534221121, |
|
"loss": 3.7422, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.3282241833101098, |
|
"grad_norm": 0.9731584787368774, |
|
"learning_rate": 0.00026571694022917315, |
|
"loss": 3.6992, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.3313206378696392, |
|
"grad_norm": 0.9330194592475891, |
|
"learning_rate": 0.00026633632703623413, |
|
"loss": 3.6868, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.3344170924291685, |
|
"grad_norm": 1.0531985759735107, |
|
"learning_rate": 0.00026695571384329517, |
|
"loss": 3.6958, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.3375135469886978, |
|
"grad_norm": 0.9694075584411621, |
|
"learning_rate": 0.0002675751006503562, |
|
"loss": 3.7137, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.3406100015482272, |
|
"grad_norm": 0.9474936723709106, |
|
"learning_rate": 0.0002681944874574171, |
|
"loss": 3.6889, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.3437064561077565, |
|
"grad_norm": 0.9624688029289246, |
|
"learning_rate": 0.00026881387426447816, |
|
"loss": 3.6531, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.3468029106672859, |
|
"grad_norm": 0.9767426252365112, |
|
"learning_rate": 0.0002694332610715392, |
|
"loss": 3.6596, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.3498993652268152, |
|
"grad_norm": 0.9959364533424377, |
|
"learning_rate": 0.00027005264787860017, |
|
"loss": 3.6434, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.3529958197863445, |
|
"grad_norm": 1.0519224405288696, |
|
"learning_rate": 0.0002706720346856612, |
|
"loss": 3.5982, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.3560922743458739, |
|
"grad_norm": 0.9964626431465149, |
|
"learning_rate": 0.00027129142149272224, |
|
"loss": 3.6145, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.3591887289054032, |
|
"grad_norm": 1.0506435632705688, |
|
"learning_rate": 0.0002719108082997832, |
|
"loss": 3.5859, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.3622851834649325, |
|
"grad_norm": 1.0846556425094604, |
|
"learning_rate": 0.00027253019510684425, |
|
"loss": 3.5981, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.3653816380244619, |
|
"grad_norm": 1.0251847505569458, |
|
"learning_rate": 0.0002731495819139053, |
|
"loss": 3.5731, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.3684780925839912, |
|
"grad_norm": 1.0184073448181152, |
|
"learning_rate": 0.00027376896872096626, |
|
"loss": 3.5665, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.3715745471435206, |
|
"grad_norm": 0.9859119057655334, |
|
"learning_rate": 0.00027438835552802724, |
|
"loss": 3.5401, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.37467100170305, |
|
"grad_norm": 0.9708986878395081, |
|
"learning_rate": 0.0002750077423350883, |
|
"loss": 3.5392, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.3777674562625792, |
|
"grad_norm": 1.0786579847335815, |
|
"learning_rate": 0.00027562712914214925, |
|
"loss": 3.5553, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.3808639108221086, |
|
"grad_norm": 1.011117696762085, |
|
"learning_rate": 0.0002762465159492103, |
|
"loss": 3.5251, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.383960365381638, |
|
"grad_norm": 0.9319019317626953, |
|
"learning_rate": 0.00027686590275627127, |
|
"loss": 3.5408, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.3870568199411673, |
|
"grad_norm": 1.0703030824661255, |
|
"learning_rate": 0.0002774852895633323, |
|
"loss": 3.5147, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.3901532745006966, |
|
"grad_norm": 0.9363672733306885, |
|
"learning_rate": 0.00027810467637039333, |
|
"loss": 3.5054, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.393249729060226, |
|
"grad_norm": 1.0434913635253906, |
|
"learning_rate": 0.0002787240631774543, |
|
"loss": 3.498, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.3963461836197553, |
|
"grad_norm": 1.1381675004959106, |
|
"learning_rate": 0.00027934344998451535, |
|
"loss": 3.5045, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.3994426381792846, |
|
"grad_norm": 0.9770002365112305, |
|
"learning_rate": 0.0002799628367915764, |
|
"loss": 3.5115, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.402539092738814, |
|
"grad_norm": 0.9267017245292664, |
|
"learning_rate": 0.0002805822235986373, |
|
"loss": 3.4452, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.4056355472983433, |
|
"grad_norm": 1.0910615921020508, |
|
"learning_rate": 0.00028120161040569834, |
|
"loss": 3.4792, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.4087320018578726, |
|
"grad_norm": 1.0374314785003662, |
|
"learning_rate": 0.00028182099721275937, |
|
"loss": 3.4497, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.411828456417402, |
|
"grad_norm": 1.1077336072921753, |
|
"learning_rate": 0.00028244038401982035, |
|
"loss": 3.4836, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.4149249109769313, |
|
"grad_norm": 0.9700469374656677, |
|
"learning_rate": 0.0002830597708268814, |
|
"loss": 3.4539, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.4180213655364606, |
|
"grad_norm": 1.0011495351791382, |
|
"learning_rate": 0.0002836791576339424, |
|
"loss": 3.4192, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.42111782009599, |
|
"grad_norm": 1.0449153184890747, |
|
"learning_rate": 0.0002842985444410034, |
|
"loss": 3.4279, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.4242142746555193, |
|
"grad_norm": 1.0163695812225342, |
|
"learning_rate": 0.00028491793124806443, |
|
"loss": 3.4375, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.4273107292150486, |
|
"grad_norm": 0.9043591618537903, |
|
"learning_rate": 0.00028553731805512546, |
|
"loss": 3.41, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.430407183774578, |
|
"grad_norm": 1.0529117584228516, |
|
"learning_rate": 0.00028615670486218644, |
|
"loss": 3.4181, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.4335036383341073, |
|
"grad_norm": 0.9313072562217712, |
|
"learning_rate": 0.0002867760916692475, |
|
"loss": 3.381, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.4366000928936367, |
|
"grad_norm": 1.0091314315795898, |
|
"learning_rate": 0.00028739547847630846, |
|
"loss": 3.4084, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.439696547453166, |
|
"grad_norm": 1.023206114768982, |
|
"learning_rate": 0.00028801486528336943, |
|
"loss": 3.3933, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.4427930020126953, |
|
"grad_norm": 0.9428771734237671, |
|
"learning_rate": 0.00028863425209043047, |
|
"loss": 3.3793, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.4458894565722247, |
|
"grad_norm": 0.9487484097480774, |
|
"learning_rate": 0.0002892536388974915, |
|
"loss": 3.3703, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.448985911131754, |
|
"grad_norm": 1.0242682695388794, |
|
"learning_rate": 0.0002898730257045525, |
|
"loss": 3.3808, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.4520823656912833, |
|
"grad_norm": 0.963318407535553, |
|
"learning_rate": 0.0002904924125116135, |
|
"loss": 3.3756, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.4551788202508127, |
|
"grad_norm": 0.9051762223243713, |
|
"learning_rate": 0.00029111179931867455, |
|
"loss": 3.3356, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.458275274810342, |
|
"grad_norm": 0.9930270910263062, |
|
"learning_rate": 0.0002917311861257355, |
|
"loss": 3.3601, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.4613717293698714, |
|
"grad_norm": 1.077131748199463, |
|
"learning_rate": 0.00029235057293279656, |
|
"loss": 3.3308, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.4644681839294007, |
|
"grad_norm": 0.881527304649353, |
|
"learning_rate": 0.0002929699597398576, |
|
"loss": 3.328, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.46756463848893, |
|
"grad_norm": 1.0115300416946411, |
|
"learning_rate": 0.0002935893465469185, |
|
"loss": 3.3233, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.4706610930484594, |
|
"grad_norm": 1.0688494443893433, |
|
"learning_rate": 0.00029420873335397955, |
|
"loss": 3.3381, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.4737575476079887, |
|
"grad_norm": 1.0195506811141968, |
|
"learning_rate": 0.0002948281201610406, |
|
"loss": 3.3058, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.476854002167518, |
|
"grad_norm": 0.9502407312393188, |
|
"learning_rate": 0.00029544750696810156, |
|
"loss": 3.3174, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.4799504567270474, |
|
"grad_norm": 1.0097241401672363, |
|
"learning_rate": 0.0002960668937751626, |
|
"loss": 3.3102, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.483046911286577, |
|
"grad_norm": 0.9834030866622925, |
|
"learning_rate": 0.0002966862805822236, |
|
"loss": 3.3135, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.4861433658461063, |
|
"grad_norm": 1.014854907989502, |
|
"learning_rate": 0.0002973056673892846, |
|
"loss": 3.2915, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.4892398204056356, |
|
"grad_norm": 0.944720983505249, |
|
"learning_rate": 0.00029792505419634564, |
|
"loss": 3.2783, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.492336274965165, |
|
"grad_norm": 1.012688159942627, |
|
"learning_rate": 0.0002985444410034066, |
|
"loss": 3.2931, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.4954327295246943, |
|
"grad_norm": 0.9100663065910339, |
|
"learning_rate": 0.00029916382781046766, |
|
"loss": 3.2785, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.4985291840842236, |
|
"grad_norm": 0.8774744272232056, |
|
"learning_rate": 0.0002997832146175287, |
|
"loss": 3.2777, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.5016256386437528, |
|
"grad_norm": 0.9623695611953735, |
|
"learning_rate": 0.0003004026014245896, |
|
"loss": 3.2671, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.504722093203282, |
|
"grad_norm": 1.0606322288513184, |
|
"learning_rate": 0.00030102198823165065, |
|
"loss": 3.2483, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.5078185477628114, |
|
"grad_norm": 1.0098302364349365, |
|
"learning_rate": 0.0003016413750387117, |
|
"loss": 3.2355, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.5109150023223408, |
|
"grad_norm": 0.8991314172744751, |
|
"learning_rate": 0.00030226076184577266, |
|
"loss": 3.239, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.51401145688187, |
|
"grad_norm": 0.9911772012710571, |
|
"learning_rate": 0.0003028801486528337, |
|
"loss": 3.2569, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.5171079114413994, |
|
"grad_norm": 0.9949657320976257, |
|
"learning_rate": 0.00030349953545989473, |
|
"loss": 3.2441, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.5202043660009288, |
|
"grad_norm": 0.9273360371589661, |
|
"learning_rate": 0.0003041189222669557, |
|
"loss": 3.2385, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.5233008205604581, |
|
"grad_norm": 0.94888836145401, |
|
"learning_rate": 0.00030473830907401674, |
|
"loss": 3.2728, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.5263972751199875, |
|
"grad_norm": 0.9299125075340271, |
|
"learning_rate": 0.0003053576958810778, |
|
"loss": 3.2272, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.5294937296795168, |
|
"grad_norm": 0.8870009183883667, |
|
"learning_rate": 0.00030597708268813875, |
|
"loss": 3.2218, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.5325901842390461, |
|
"grad_norm": 1.0036243200302124, |
|
"learning_rate": 0.00030659646949519973, |
|
"loss": 3.2008, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.5356866387985755, |
|
"grad_norm": 0.9473212957382202, |
|
"learning_rate": 0.00030721585630226077, |
|
"loss": 3.2295, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.5387830933581048, |
|
"grad_norm": 0.8856829404830933, |
|
"learning_rate": 0.00030783524310932175, |
|
"loss": 3.2126, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.5418795479176342, |
|
"grad_norm": 0.997509777545929, |
|
"learning_rate": 0.0003084546299163828, |
|
"loss": 3.212, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.5449760024771635, |
|
"grad_norm": 0.9016265273094177, |
|
"learning_rate": 0.0003090740167234438, |
|
"loss": 3.208, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.5480724570366928, |
|
"grad_norm": 0.8731397390365601, |
|
"learning_rate": 0.0003096934035305048, |
|
"loss": 3.2158, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.5511689115962222, |
|
"grad_norm": 0.9676650166511536, |
|
"learning_rate": 0.0003103127903375658, |
|
"loss": 3.2032, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.5542653661557515, |
|
"grad_norm": 0.9783886075019836, |
|
"learning_rate": 0.00031093217714462686, |
|
"loss": 3.2114, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.5573618207152808, |
|
"grad_norm": 1.0224086046218872, |
|
"learning_rate": 0.00031155156395168784, |
|
"loss": 3.1828, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.5604582752748102, |
|
"grad_norm": 0.9322043061256409, |
|
"learning_rate": 0.00031217095075874887, |
|
"loss": 3.1851, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.5635547298343395, |
|
"grad_norm": 0.9294213056564331, |
|
"learning_rate": 0.0003127903375658099, |
|
"loss": 3.189, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.5666511843938689, |
|
"grad_norm": 0.9628444910049438, |
|
"learning_rate": 0.00031340972437287083, |
|
"loss": 3.1524, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.5697476389533982, |
|
"grad_norm": 0.9377193450927734, |
|
"learning_rate": 0.00031402911117993186, |
|
"loss": 3.1688, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.5728440935129275, |
|
"grad_norm": 0.8622744083404541, |
|
"learning_rate": 0.0003146484979869929, |
|
"loss": 3.1374, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.5759405480724569, |
|
"grad_norm": 0.9315075874328613, |
|
"learning_rate": 0.0003152678847940539, |
|
"loss": 3.1657, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.5790370026319864, |
|
"grad_norm": 0.9984999895095825, |
|
"learning_rate": 0.0003158872716011149, |
|
"loss": 3.1494, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.5821334571915158, |
|
"grad_norm": 0.9476169943809509, |
|
"learning_rate": 0.0003165066584081759, |
|
"loss": 3.1262, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.585229911751045, |
|
"grad_norm": 0.8942754864692688, |
|
"learning_rate": 0.0003171260452152369, |
|
"loss": 3.1546, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.5883263663105744, |
|
"grad_norm": 0.9009295701980591, |
|
"learning_rate": 0.00031774543202229796, |
|
"loss": 3.1516, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.5914228208701038, |
|
"grad_norm": 1.010343074798584, |
|
"learning_rate": 0.00031836481882935893, |
|
"loss": 3.1448, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.5945192754296331, |
|
"grad_norm": 0.9292970299720764, |
|
"learning_rate": 0.00031898420563641997, |
|
"loss": 3.123, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.5976157299891625, |
|
"grad_norm": 0.9574374556541443, |
|
"learning_rate": 0.00031960359244348095, |
|
"loss": 3.1358, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.6007121845486918, |
|
"grad_norm": 0.9073388576507568, |
|
"learning_rate": 0.0003202229792505419, |
|
"loss": 3.1352, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.6038086391082211, |
|
"grad_norm": 0.9928716421127319, |
|
"learning_rate": 0.00032084236605760296, |
|
"loss": 3.1226, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.6069050936677505, |
|
"grad_norm": 0.9886534810066223, |
|
"learning_rate": 0.000321461752864664, |
|
"loss": 3.131, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.6100015482272798, |
|
"grad_norm": 0.9734316468238831, |
|
"learning_rate": 0.00032208113967172497, |
|
"loss": 3.1341, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.6130980027868091, |
|
"grad_norm": 0.9681540131568909, |
|
"learning_rate": 0.000322700526478786, |
|
"loss": 3.0973, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.6161944573463385, |
|
"grad_norm": 0.9452388286590576, |
|
"learning_rate": 0.00032331991328584704, |
|
"loss": 3.1082, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.6192909119058678, |
|
"grad_norm": 0.9055010080337524, |
|
"learning_rate": 0.000323939300092908, |
|
"loss": 3.0891, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.6223873664653972, |
|
"grad_norm": 0.9603378772735596, |
|
"learning_rate": 0.00032455868689996905, |
|
"loss": 3.0998, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.6254838210249265, |
|
"grad_norm": 0.8925791382789612, |
|
"learning_rate": 0.0003251780737070301, |
|
"loss": 3.1165, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.6285802755844558, |
|
"grad_norm": 0.928421139717102, |
|
"learning_rate": 0.000325797460514091, |
|
"loss": 3.1087, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.6316767301439852, |
|
"grad_norm": 0.9481196403503418, |
|
"learning_rate": 0.00032641684732115204, |
|
"loss": 3.0916, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.6347731847035145, |
|
"grad_norm": 0.9044370055198669, |
|
"learning_rate": 0.0003270362341282131, |
|
"loss": 3.1, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.6378696392630439, |
|
"grad_norm": 0.9636628031730652, |
|
"learning_rate": 0.00032765562093527406, |
|
"loss": 3.114, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.6409660938225732, |
|
"grad_norm": 0.9585344195365906, |
|
"learning_rate": 0.0003282750077423351, |
|
"loss": 3.086, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.6440625483821025, |
|
"grad_norm": 0.9368054866790771, |
|
"learning_rate": 0.0003288943945493961, |
|
"loss": 3.0763, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.6471590029416319, |
|
"grad_norm": 0.951101541519165, |
|
"learning_rate": 0.0003295137813564571, |
|
"loss": 3.0746, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.6502554575011612, |
|
"grad_norm": 0.9043335318565369, |
|
"learning_rate": 0.00033013316816351814, |
|
"loss": 3.0665, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.6533519120606905, |
|
"grad_norm": 0.8929763436317444, |
|
"learning_rate": 0.00033075255497057917, |
|
"loss": 3.0644, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.6564483666202199, |
|
"grad_norm": 0.9089614152908325, |
|
"learning_rate": 0.00033137194177764015, |
|
"loss": 3.0661, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.6595448211797492, |
|
"grad_norm": 0.9606667757034302, |
|
"learning_rate": 0.0003319913285847012, |
|
"loss": 3.0578, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.6626412757392786, |
|
"grad_norm": 0.8867613673210144, |
|
"learning_rate": 0.00033261071539176216, |
|
"loss": 3.0707, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.665737730298808, |
|
"grad_norm": 0.9263885617256165, |
|
"learning_rate": 0.00033323010219882314, |
|
"loss": 3.0579, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.6688341848583372, |
|
"grad_norm": 0.8380886316299438, |
|
"learning_rate": 0.0003338494890058842, |
|
"loss": 3.0628, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.6719306394178666, |
|
"grad_norm": 0.9296733140945435, |
|
"learning_rate": 0.0003344688758129452, |
|
"loss": 3.0374, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.675027093977396, |
|
"grad_norm": 0.9482071995735168, |
|
"learning_rate": 0.0003350882626200062, |
|
"loss": 3.0611, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.6781235485369252, |
|
"grad_norm": 0.934635579586029, |
|
"learning_rate": 0.0003357076494270672, |
|
"loss": 3.0465, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.6812200030964546, |
|
"grad_norm": 0.9624560475349426, |
|
"learning_rate": 0.00033632703623412825, |
|
"loss": 3.0622, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.684316457655984, |
|
"grad_norm": 0.952055037021637, |
|
"learning_rate": 0.00033694642304118923, |
|
"loss": 3.0483, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.6874129122155133, |
|
"grad_norm": 0.8703885674476624, |
|
"learning_rate": 0.00033756580984825027, |
|
"loss": 3.0506, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.6905093667750426, |
|
"grad_norm": 0.9054002165794373, |
|
"learning_rate": 0.00033818519665531125, |
|
"loss": 3.045, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.693605821334572, |
|
"grad_norm": 0.9501616954803467, |
|
"learning_rate": 0.0003388045834623722, |
|
"loss": 3.0327, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.6967022758941013, |
|
"grad_norm": 0.880946934223175, |
|
"learning_rate": 0.00033942397026943326, |
|
"loss": 3.0414, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.6997987304536306, |
|
"grad_norm": 0.9799813032150269, |
|
"learning_rate": 0.00034004335707649424, |
|
"loss": 3.0485, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.70289518501316, |
|
"grad_norm": 0.9278644323348999, |
|
"learning_rate": 0.00034066274388355527, |
|
"loss": 3.0334, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.7059916395726893, |
|
"grad_norm": 0.8921311497688293, |
|
"learning_rate": 0.0003412821306906163, |
|
"loss": 3.0283, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.7090880941322186, |
|
"grad_norm": 0.8926926851272583, |
|
"learning_rate": 0.0003419015174976773, |
|
"loss": 3.0294, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.712184548691748, |
|
"grad_norm": 0.9130481481552124, |
|
"learning_rate": 0.0003425209043047383, |
|
"loss": 3.007, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.7152810032512773, |
|
"grad_norm": 0.9094374775886536, |
|
"learning_rate": 0.00034314029111179935, |
|
"loss": 3.0183, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.7183774578108066, |
|
"grad_norm": 0.8862912058830261, |
|
"learning_rate": 0.00034375967791886033, |
|
"loss": 2.9898, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.721473912370336, |
|
"grad_norm": 0.9140844941139221, |
|
"learning_rate": 0.00034437906472592136, |
|
"loss": 3.0172, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.7245703669298653, |
|
"grad_norm": 0.976078450679779, |
|
"learning_rate": 0.0003449984515329824, |
|
"loss": 3.0161, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.7276668214893947, |
|
"grad_norm": 0.9176059365272522, |
|
"learning_rate": 0.0003456178383400433, |
|
"loss": 2.9931, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.730763276048924, |
|
"grad_norm": 0.9895356297492981, |
|
"learning_rate": 0.00034623722514710436, |
|
"loss": 3.0026, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.7338597306084533, |
|
"grad_norm": 0.9021176099777222, |
|
"learning_rate": 0.0003468566119541654, |
|
"loss": 2.9841, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.7369561851679827, |
|
"grad_norm": 1.0290924310684204, |
|
"learning_rate": 0.00034747599876122637, |
|
"loss": 3.0205, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.740052639727512, |
|
"grad_norm": 0.9842997193336487, |
|
"learning_rate": 0.0003480953855682874, |
|
"loss": 2.9983, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.7431490942870413, |
|
"grad_norm": 1.004170536994934, |
|
"learning_rate": 0.00034871477237534843, |
|
"loss": 2.9929, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.7462455488465707, |
|
"grad_norm": 0.8903537392616272, |
|
"learning_rate": 0.0003493341591824094, |
|
"loss": 2.9928, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.7493420034061, |
|
"grad_norm": 0.9463049173355103, |
|
"learning_rate": 0.00034995354598947045, |
|
"loss": 2.9975, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.7524384579656294, |
|
"grad_norm": 0.879135251045227, |
|
"learning_rate": 0.0003505729327965315, |
|
"loss": 2.9767, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.7555349125251587, |
|
"grad_norm": 0.9398852586746216, |
|
"learning_rate": 0.00035119231960359246, |
|
"loss": 2.9813, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.758631367084688, |
|
"grad_norm": 0.9972649216651917, |
|
"learning_rate": 0.00035181170641065344, |
|
"loss": 2.964, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.7617278216442174, |
|
"grad_norm": 0.9139822721481323, |
|
"learning_rate": 0.00035243109321771447, |
|
"loss": 2.9906, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.7648242762037467, |
|
"grad_norm": 0.8910505771636963, |
|
"learning_rate": 0.00035305048002477545, |
|
"loss": 2.9749, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.767920730763276, |
|
"grad_norm": 1.1436492204666138, |
|
"learning_rate": 0.0003536698668318365, |
|
"loss": 2.9727, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.7710171853228054, |
|
"grad_norm": 0.9300575852394104, |
|
"learning_rate": 0.0003542892536388975, |
|
"loss": 3.0028, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.7741136398823347, |
|
"grad_norm": 0.8461237549781799, |
|
"learning_rate": 0.0003549086404459585, |
|
"loss": 2.9749, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.777210094441864, |
|
"grad_norm": 0.882404088973999, |
|
"learning_rate": 0.00035552802725301953, |
|
"loss": 2.9568, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.7803065490013934, |
|
"grad_norm": 0.8937315344810486, |
|
"learning_rate": 0.00035614741406008056, |
|
"loss": 2.9807, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.7834030035609227, |
|
"grad_norm": 0.8935524225234985, |
|
"learning_rate": 0.00035676680086714154, |
|
"loss": 2.982, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.786499458120452, |
|
"grad_norm": 0.9033128023147583, |
|
"learning_rate": 0.0003573861876742026, |
|
"loss": 2.9634, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.7895959126799814, |
|
"grad_norm": 0.9767388701438904, |
|
"learning_rate": 0.0003580055744812635, |
|
"loss": 2.9613, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.7926923672395108, |
|
"grad_norm": 1.0344420671463013, |
|
"learning_rate": 0.00035862496128832454, |
|
"loss": 2.9319, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.79578882179904, |
|
"grad_norm": 0.87823486328125, |
|
"learning_rate": 0.00035924434809538557, |
|
"loss": 2.96, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.7988852763585694, |
|
"grad_norm": 0.9067280888557434, |
|
"learning_rate": 0.00035986373490244655, |
|
"loss": 2.9322, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.8019817309180988, |
|
"grad_norm": 0.8616409301757812, |
|
"learning_rate": 0.0003604831217095076, |
|
"loss": 2.9611, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.805078185477628, |
|
"grad_norm": 0.8421568274497986, |
|
"learning_rate": 0.0003611025085165686, |
|
"loss": 2.9366, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.8081746400371574, |
|
"grad_norm": 0.8576173782348633, |
|
"learning_rate": 0.0003617218953236296, |
|
"loss": 2.9423, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.8112710945966868, |
|
"grad_norm": 0.8986689448356628, |
|
"learning_rate": 0.00036234128213069063, |
|
"loss": 2.9376, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.8143675491562161, |
|
"grad_norm": 0.9134368300437927, |
|
"learning_rate": 0.00036296066893775166, |
|
"loss": 2.9262, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.8174640037157455, |
|
"grad_norm": 0.9681121110916138, |
|
"learning_rate": 0.00036358005574481264, |
|
"loss": 2.9341, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.8205604582752748, |
|
"grad_norm": 1.0286924839019775, |
|
"learning_rate": 0.0003641994425518737, |
|
"loss": 2.9306, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.8236569128348041, |
|
"grad_norm": 0.9352772831916809, |
|
"learning_rate": 0.00036481882935893465, |
|
"loss": 2.948, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.8267533673943335, |
|
"grad_norm": 1.0539007186889648, |
|
"learning_rate": 0.00036543821616599563, |
|
"loss": 2.9523, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.8298498219538628, |
|
"grad_norm": 0.8661713600158691, |
|
"learning_rate": 0.00036605760297305667, |
|
"loss": 2.9269, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.8329462765133921, |
|
"grad_norm": 0.9120956659317017, |
|
"learning_rate": 0.0003666769897801177, |
|
"loss": 2.9302, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.8360427310729215, |
|
"grad_norm": 0.9333845376968384, |
|
"learning_rate": 0.0003672963765871787, |
|
"loss": 2.9247, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.8391391856324508, |
|
"grad_norm": 0.864277720451355, |
|
"learning_rate": 0.0003679157633942397, |
|
"loss": 2.9269, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.8422356401919802, |
|
"grad_norm": 0.954741358757019, |
|
"learning_rate": 0.00036853515020130075, |
|
"loss": 2.9348, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.8453320947515095, |
|
"grad_norm": 0.8879597187042236, |
|
"learning_rate": 0.0003691545370083617, |
|
"loss": 2.9259, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.8484285493110388, |
|
"grad_norm": 0.8487861752510071, |
|
"learning_rate": 0.00036977392381542276, |
|
"loss": 2.9189, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.8515250038705682, |
|
"grad_norm": 0.9464482069015503, |
|
"learning_rate": 0.0003703933106224838, |
|
"loss": 2.9119, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.8546214584300975, |
|
"grad_norm": 0.8773711919784546, |
|
"learning_rate": 0.0003710126974295447, |
|
"loss": 2.9222, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.8577179129896269, |
|
"grad_norm": 0.8919110894203186, |
|
"learning_rate": 0.00037163208423660575, |
|
"loss": 2.9056, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.8608143675491562, |
|
"grad_norm": 0.9436878561973572, |
|
"learning_rate": 0.0003722514710436668, |
|
"loss": 2.9095, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.8639108221086855, |
|
"grad_norm": 0.9595790505409241, |
|
"learning_rate": 0.00037287085785072776, |
|
"loss": 2.9047, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.8670072766682149, |
|
"grad_norm": 0.8692799806594849, |
|
"learning_rate": 0.0003734902446577888, |
|
"loss": 2.905, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.8701037312277442, |
|
"grad_norm": 0.9274528622627258, |
|
"learning_rate": 0.00037410963146484983, |
|
"loss": 2.9251, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.8732001857872735, |
|
"grad_norm": 0.8798776268959045, |
|
"learning_rate": 0.0003747290182719108, |
|
"loss": 2.9113, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.8762966403468029, |
|
"grad_norm": 0.8613748550415039, |
|
"learning_rate": 0.00037534840507897184, |
|
"loss": 2.9077, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.8793930949063322, |
|
"grad_norm": 0.8926125764846802, |
|
"learning_rate": 0.0003759677918860329, |
|
"loss": 2.9029, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.8824895494658616, |
|
"grad_norm": 0.9414944052696228, |
|
"learning_rate": 0.00037658717869309386, |
|
"loss": 2.8968, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.885586004025391, |
|
"grad_norm": 0.8922074437141418, |
|
"learning_rate": 0.0003772065655001549, |
|
"loss": 2.8992, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.8886824585849202, |
|
"grad_norm": 0.9254492521286011, |
|
"learning_rate": 0.0003778259523072158, |
|
"loss": 2.912, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.8917789131444496, |
|
"grad_norm": 0.8882949948310852, |
|
"learning_rate": 0.00037844533911427685, |
|
"loss": 2.8972, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.894875367703979, |
|
"grad_norm": 0.874482274055481, |
|
"learning_rate": 0.0003790647259213379, |
|
"loss": 2.8848, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.8979718222635082, |
|
"grad_norm": 0.8989077210426331, |
|
"learning_rate": 0.00037968411272839886, |
|
"loss": 2.8934, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.9010682768230376, |
|
"grad_norm": 0.9361928105354309, |
|
"learning_rate": 0.0003803034995354599, |
|
"loss": 2.8697, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.904164731382567, |
|
"grad_norm": 0.8788303732872009, |
|
"learning_rate": 0.0003809228863425209, |
|
"loss": 2.8989, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.9072611859420963, |
|
"grad_norm": 0.8196372985839844, |
|
"learning_rate": 0.0003815422731495819, |
|
"loss": 2.8913, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.9103576405016256, |
|
"grad_norm": 0.8973246216773987, |
|
"learning_rate": 0.00038216165995664294, |
|
"loss": 2.8941, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.913454095061155, |
|
"grad_norm": 0.951608419418335, |
|
"learning_rate": 0.00038278104676370397, |
|
"loss": 2.8941, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.9165505496206843, |
|
"grad_norm": 0.87721186876297, |
|
"learning_rate": 0.00038340043357076495, |
|
"loss": 2.9039, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.9196470041802136, |
|
"grad_norm": 0.8995383381843567, |
|
"learning_rate": 0.00038401982037782593, |
|
"loss": 2.8978, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.922743458739743, |
|
"grad_norm": 0.9441946148872375, |
|
"learning_rate": 0.00038463920718488696, |
|
"loss": 2.8774, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.9258399132992723, |
|
"grad_norm": 0.8960248231887817, |
|
"learning_rate": 0.00038525859399194794, |
|
"loss": 2.8908, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.9289363678588016, |
|
"grad_norm": 0.9116747975349426, |
|
"learning_rate": 0.000385877980799009, |
|
"loss": 2.8639, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.932032822418331, |
|
"grad_norm": 0.8798891305923462, |
|
"learning_rate": 0.00038649736760607, |
|
"loss": 2.86, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.9351292769778603, |
|
"grad_norm": 0.8671932816505432, |
|
"learning_rate": 0.000387116754413131, |
|
"loss": 2.871, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.9382257315373896, |
|
"grad_norm": 0.9382427930831909, |
|
"learning_rate": 0.000387736141220192, |
|
"loss": 2.8508, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.941322186096919, |
|
"grad_norm": 0.9341138005256653, |
|
"learning_rate": 0.00038835552802725306, |
|
"loss": 2.8717, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.9444186406564483, |
|
"grad_norm": 0.9240859150886536, |
|
"learning_rate": 0.00038897491483431404, |
|
"loss": 2.8802, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.9475150952159777, |
|
"grad_norm": 0.9910873174667358, |
|
"learning_rate": 0.00038959430164137507, |
|
"loss": 2.8709, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.950611549775507, |
|
"grad_norm": 0.9003307223320007, |
|
"learning_rate": 0.0003902136884484361, |
|
"loss": 2.8732, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.9537080043350363, |
|
"grad_norm": 0.904257595539093, |
|
"learning_rate": 0.00039083307525549703, |
|
"loss": 2.8876, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.9568044588945657, |
|
"grad_norm": 0.978615403175354, |
|
"learning_rate": 0.00039145246206255806, |
|
"loss": 2.8684, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.959900913454095, |
|
"grad_norm": 0.8782775402069092, |
|
"learning_rate": 0.0003920718488696191, |
|
"loss": 2.8677, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.9629973680136246, |
|
"grad_norm": 0.9640995860099792, |
|
"learning_rate": 0.0003926912356766801, |
|
"loss": 2.8568, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.966093822573154, |
|
"grad_norm": 0.8807209134101868, |
|
"learning_rate": 0.0003933106224837411, |
|
"loss": 2.8618, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.9691902771326832, |
|
"grad_norm": 0.8921664357185364, |
|
"learning_rate": 0.00039393000929080214, |
|
"loss": 2.8788, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.9722867316922126, |
|
"grad_norm": 0.9727539420127869, |
|
"learning_rate": 0.0003945493960978631, |
|
"loss": 2.8512, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.975383186251742, |
|
"grad_norm": 0.8913626670837402, |
|
"learning_rate": 0.00039516878290492415, |
|
"loss": 2.8604, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.9784796408112713, |
|
"grad_norm": 0.8825446963310242, |
|
"learning_rate": 0.0003957881697119852, |
|
"loss": 2.8448, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.9815760953708006, |
|
"grad_norm": 0.916666567325592, |
|
"learning_rate": 0.00039640755651904617, |
|
"loss": 2.8625, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.98467254993033, |
|
"grad_norm": 1.0008190870285034, |
|
"learning_rate": 0.00039702694332610715, |
|
"loss": 2.8631, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.9877690044898593, |
|
"grad_norm": 0.8584704399108887, |
|
"learning_rate": 0.0003976463301331682, |
|
"loss": 2.8701, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.9908654590493886, |
|
"grad_norm": 0.9079132676124573, |
|
"learning_rate": 0.00039826571694022916, |
|
"loss": 2.8453, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.993961913608918, |
|
"grad_norm": 0.8909833431243896, |
|
"learning_rate": 0.0003988851037472902, |
|
"loss": 2.8315, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.9970583681684473, |
|
"grad_norm": 0.9206358194351196, |
|
"learning_rate": 0.00039950449055435117, |
|
"loss": 2.8694, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6666725277900696, |
|
"learning_rate": 0.0004001238773614122, |
|
"loss": 2.7051, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.0030964545595293, |
|
"grad_norm": 0.8826514482498169, |
|
"learning_rate": 0.00040074326416847324, |
|
"loss": 2.8328, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.0061929091190587, |
|
"grad_norm": 0.922680139541626, |
|
"learning_rate": 0.0004013626509755342, |
|
"loss": 2.852, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.009289363678588, |
|
"grad_norm": 0.9056729674339294, |
|
"learning_rate": 0.00040198203778259525, |
|
"loss": 2.8423, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.0123858182381174, |
|
"grad_norm": 0.866322934627533, |
|
"learning_rate": 0.0004026014245896563, |
|
"loss": 2.8412, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.0154822727976467, |
|
"grad_norm": 0.9588058590888977, |
|
"learning_rate": 0.0004032208113967172, |
|
"loss": 2.8526, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.018578727357176, |
|
"grad_norm": 0.9247243404388428, |
|
"learning_rate": 0.00040384019820377824, |
|
"loss": 2.8271, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.0216751819167054, |
|
"grad_norm": 0.8787789940834045, |
|
"learning_rate": 0.0004044595850108393, |
|
"loss": 2.8043, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.0247716364762347, |
|
"grad_norm": 0.8963256478309631, |
|
"learning_rate": 0.00040507897181790025, |
|
"loss": 2.8162, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.027868091035764, |
|
"grad_norm": 0.9025070071220398, |
|
"learning_rate": 0.0004056983586249613, |
|
"loss": 2.8226, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.0309645455952934, |
|
"grad_norm": 0.8822202086448669, |
|
"learning_rate": 0.0004063177454320223, |
|
"loss": 2.8284, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.0340610001548227, |
|
"grad_norm": 0.9176104068756104, |
|
"learning_rate": 0.0004069371322390833, |
|
"loss": 2.8379, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.037157454714352, |
|
"grad_norm": 0.9508628845214844, |
|
"learning_rate": 0.00040755651904614433, |
|
"loss": 2.8113, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.0402539092738814, |
|
"grad_norm": 0.9238744378089905, |
|
"learning_rate": 0.00040817590585320537, |
|
"loss": 2.8221, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.0433503638334107, |
|
"grad_norm": 0.8854493498802185, |
|
"learning_rate": 0.00040879529266026635, |
|
"loss": 2.8139, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.04644681839294, |
|
"grad_norm": 0.8652548789978027, |
|
"learning_rate": 0.0004094146794673274, |
|
"loss": 2.8153, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.0495432729524694, |
|
"grad_norm": 0.8663405179977417, |
|
"learning_rate": 0.00041003406627438836, |
|
"loss": 2.8098, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.0526397275119987, |
|
"grad_norm": 0.8482099175453186, |
|
"learning_rate": 0.00041065345308144934, |
|
"loss": 2.8102, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.055736182071528, |
|
"grad_norm": 0.895483672618866, |
|
"learning_rate": 0.00041127283988851037, |
|
"loss": 2.8014, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.0588326366310574, |
|
"grad_norm": 0.8933889865875244, |
|
"learning_rate": 0.0004118922266955714, |
|
"loss": 2.8008, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.0619290911905868, |
|
"grad_norm": 0.87566739320755, |
|
"learning_rate": 0.0004125116135026324, |
|
"loss": 2.8055, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.065025545750116, |
|
"grad_norm": 0.9240240454673767, |
|
"learning_rate": 0.0004131310003096934, |
|
"loss": 2.8249, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.0681220003096454, |
|
"grad_norm": 0.9362452626228333, |
|
"learning_rate": 0.00041375038711675445, |
|
"loss": 2.8128, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.0712184548691748, |
|
"grad_norm": 0.859845757484436, |
|
"learning_rate": 0.00041436977392381543, |
|
"loss": 2.7887, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.074314909428704, |
|
"grad_norm": 0.9458219408988953, |
|
"learning_rate": 0.00041498916073087646, |
|
"loss": 2.8087, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.0774113639882335, |
|
"grad_norm": 0.9015805125236511, |
|
"learning_rate": 0.0004156085475379375, |
|
"loss": 2.8197, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.080507818547763, |
|
"grad_norm": 0.8841304779052734, |
|
"learning_rate": 0.0004162279343449984, |
|
"loss": 2.793, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.083604273107292, |
|
"grad_norm": 0.9217279553413391, |
|
"learning_rate": 0.00041684732115205946, |
|
"loss": 2.8279, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.0867007276668215, |
|
"grad_norm": 0.9141611456871033, |
|
"learning_rate": 0.0004174667079591205, |
|
"loss": 2.7922, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.089797182226351, |
|
"grad_norm": 0.8566716313362122, |
|
"learning_rate": 0.00041808609476618147, |
|
"loss": 2.8088, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.09289363678588, |
|
"grad_norm": 0.9103225469589233, |
|
"learning_rate": 0.0004187054815732425, |
|
"loss": 2.8134, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.0959900913454095, |
|
"grad_norm": 0.8901599049568176, |
|
"learning_rate": 0.0004193248683803035, |
|
"loss": 2.8114, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.099086545904939, |
|
"grad_norm": 0.9474543333053589, |
|
"learning_rate": 0.0004199442551873645, |
|
"loss": 2.7907, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.102183000464468, |
|
"grad_norm": 0.8805556297302246, |
|
"learning_rate": 0.00042056364199442555, |
|
"loss": 2.8023, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.1052794550239975, |
|
"grad_norm": 0.9209165573120117, |
|
"learning_rate": 0.00042118302880148653, |
|
"loss": 2.8247, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.108375909583527, |
|
"grad_norm": 0.9121336340904236, |
|
"learning_rate": 0.00042180241560854756, |
|
"loss": 2.7983, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.111472364143056, |
|
"grad_norm": 0.883575439453125, |
|
"learning_rate": 0.0004224218024156086, |
|
"loss": 2.7973, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.1145688187025855, |
|
"grad_norm": 0.8569662570953369, |
|
"learning_rate": 0.0004230411892226695, |
|
"loss": 2.807, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.117665273262115, |
|
"grad_norm": 0.8648683428764343, |
|
"learning_rate": 0.00042366057602973055, |
|
"loss": 2.7953, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.120761727821644, |
|
"grad_norm": 1.0288830995559692, |
|
"learning_rate": 0.0004242799628367916, |
|
"loss": 2.7934, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.1238581823811735, |
|
"grad_norm": 0.9366074800491333, |
|
"learning_rate": 0.00042489934964385257, |
|
"loss": 2.8014, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.126954636940703, |
|
"grad_norm": 0.9614273905754089, |
|
"learning_rate": 0.0004255187364509136, |
|
"loss": 2.7822, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.130051091500232, |
|
"grad_norm": 0.8939881324768066, |
|
"learning_rate": 0.00042613812325797463, |
|
"loss": 2.8195, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.1331475460597615, |
|
"grad_norm": 0.9166781902313232, |
|
"learning_rate": 0.0004267575100650356, |
|
"loss": 2.7889, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.136244000619291, |
|
"grad_norm": 0.8826269507408142, |
|
"learning_rate": 0.00042737689687209665, |
|
"loss": 2.8041, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.13934045517882, |
|
"grad_norm": 0.9127874970436096, |
|
"learning_rate": 0.0004279962836791577, |
|
"loss": 2.7986, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.1424369097383495, |
|
"grad_norm": 0.9072954654693604, |
|
"learning_rate": 0.00042861567048621866, |
|
"loss": 2.8031, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.145533364297879, |
|
"grad_norm": 0.8833560943603516, |
|
"learning_rate": 0.00042923505729327964, |
|
"loss": 2.7911, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.1486298188574082, |
|
"grad_norm": 0.861221194267273, |
|
"learning_rate": 0.00042985444410034067, |
|
"loss": 2.8073, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.1517262734169376, |
|
"grad_norm": 0.9040530323982239, |
|
"learning_rate": 0.00043047383090740165, |
|
"loss": 2.7849, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.154822727976467, |
|
"grad_norm": 0.9143641591072083, |
|
"learning_rate": 0.0004310932177144627, |
|
"loss": 2.7896, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 2.1579191825359962, |
|
"grad_norm": 0.8545592427253723, |
|
"learning_rate": 0.0004317126045215237, |
|
"loss": 2.7971, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 2.1610156370955256, |
|
"grad_norm": 0.9303133487701416, |
|
"learning_rate": 0.0004323319913285847, |
|
"loss": 2.7784, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 2.164112091655055, |
|
"grad_norm": 0.9570648074150085, |
|
"learning_rate": 0.00043295137813564573, |
|
"loss": 2.7977, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 2.1672085462145843, |
|
"grad_norm": 0.906696081161499, |
|
"learning_rate": 0.00043357076494270676, |
|
"loss": 2.7947, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.1703050007741136, |
|
"grad_norm": 0.8919961452484131, |
|
"learning_rate": 0.00043419015174976774, |
|
"loss": 2.7926, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 2.173401455333643, |
|
"grad_norm": 0.8740367889404297, |
|
"learning_rate": 0.0004348095385568288, |
|
"loss": 2.7747, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.1764979098931723, |
|
"grad_norm": 0.8785775899887085, |
|
"learning_rate": 0.0004354289253638898, |
|
"loss": 2.791, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 2.1795943644527016, |
|
"grad_norm": 0.9824354648590088, |
|
"learning_rate": 0.00043604831217095073, |
|
"loss": 2.7756, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 2.182690819012231, |
|
"grad_norm": 0.9581257104873657, |
|
"learning_rate": 0.00043666769897801177, |
|
"loss": 2.7893, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.1857872735717603, |
|
"grad_norm": 0.9003785252571106, |
|
"learning_rate": 0.0004372870857850728, |
|
"loss": 2.7857, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 2.1888837281312896, |
|
"grad_norm": 0.9463407397270203, |
|
"learning_rate": 0.0004379064725921338, |
|
"loss": 2.7608, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.191980182690819, |
|
"grad_norm": 0.9050635695457458, |
|
"learning_rate": 0.0004385258593991948, |
|
"loss": 2.7703, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.1950766372503483, |
|
"grad_norm": 0.8689008951187134, |
|
"learning_rate": 0.0004391452462062558, |
|
"loss": 2.7742, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.1981730918098776, |
|
"grad_norm": 0.8723441958427429, |
|
"learning_rate": 0.0004397646330133168, |
|
"loss": 2.7694, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.201269546369407, |
|
"grad_norm": 0.8924479484558105, |
|
"learning_rate": 0.00044038401982037786, |
|
"loss": 2.7906, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.2043660009289363, |
|
"grad_norm": 0.919276773929596, |
|
"learning_rate": 0.00044100340662743884, |
|
"loss": 2.7872, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.2074624554884656, |
|
"grad_norm": 0.901465654373169, |
|
"learning_rate": 0.00044162279343449987, |
|
"loss": 2.7465, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.210558910047995, |
|
"grad_norm": 0.8734842538833618, |
|
"learning_rate": 0.00044224218024156085, |
|
"loss": 2.7662, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.2136553646075243, |
|
"grad_norm": 0.9729484915733337, |
|
"learning_rate": 0.00044286156704862183, |
|
"loss": 2.7681, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.2167518191670537, |
|
"grad_norm": 0.8634438514709473, |
|
"learning_rate": 0.00044348095385568286, |
|
"loss": 2.7694, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.219848273726583, |
|
"grad_norm": 0.8623734712600708, |
|
"learning_rate": 0.0004441003406627439, |
|
"loss": 2.7775, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.2229447282861123, |
|
"grad_norm": 0.9596241116523743, |
|
"learning_rate": 0.0004447197274698049, |
|
"loss": 2.7916, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.2260411828456417, |
|
"grad_norm": 0.8765792846679688, |
|
"learning_rate": 0.0004453391142768659, |
|
"loss": 2.7529, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.229137637405171, |
|
"grad_norm": 0.887290894985199, |
|
"learning_rate": 0.00044595850108392694, |
|
"loss": 2.7697, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.2322340919647004, |
|
"grad_norm": 0.842238187789917, |
|
"learning_rate": 0.0004465778878909879, |
|
"loss": 2.7521, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.2353305465242297, |
|
"grad_norm": 0.9190672039985657, |
|
"learning_rate": 0.00044719727469804896, |
|
"loss": 2.7611, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.238427001083759, |
|
"grad_norm": 0.8801867365837097, |
|
"learning_rate": 0.00044781666150511, |
|
"loss": 2.7656, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.2415234556432884, |
|
"grad_norm": 0.9014734029769897, |
|
"learning_rate": 0.0004484360483121709, |
|
"loss": 2.7855, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.2446199102028177, |
|
"grad_norm": 0.8749867081642151, |
|
"learning_rate": 0.00044905543511923195, |
|
"loss": 2.7683, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.247716364762347, |
|
"grad_norm": 0.8823255896568298, |
|
"learning_rate": 0.000449674821926293, |
|
"loss": 2.7468, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.2508128193218764, |
|
"grad_norm": 1.020506739616394, |
|
"learning_rate": 0.00045029420873335396, |
|
"loss": 2.7633, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.2539092738814057, |
|
"grad_norm": 0.9416619539260864, |
|
"learning_rate": 0.000450913595540415, |
|
"loss": 2.7598, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.257005728440935, |
|
"grad_norm": 0.8934683203697205, |
|
"learning_rate": 0.00045153298234747603, |
|
"loss": 2.767, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.2601021830004644, |
|
"grad_norm": 0.9301040768623352, |
|
"learning_rate": 0.000452152369154537, |
|
"loss": 2.768, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.2631986375599937, |
|
"grad_norm": 0.9030665159225464, |
|
"learning_rate": 0.00045277175596159804, |
|
"loss": 2.7468, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.266295092119523, |
|
"grad_norm": 0.8950912952423096, |
|
"learning_rate": 0.0004533911427686591, |
|
"loss": 2.7583, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.2693915466790524, |
|
"grad_norm": 0.9231360554695129, |
|
"learning_rate": 0.00045401052957572005, |
|
"loss": 2.768, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.2724880012385817, |
|
"grad_norm": 0.9247618317604065, |
|
"learning_rate": 0.0004546299163827811, |
|
"loss": 2.7679, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.275584455798111, |
|
"grad_norm": 0.8417907953262329, |
|
"learning_rate": 0.00045524930318984207, |
|
"loss": 2.7641, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.2786809103576404, |
|
"grad_norm": 0.881175696849823, |
|
"learning_rate": 0.00045586868999690305, |
|
"loss": 2.7377, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.2817773649171698, |
|
"grad_norm": 0.9351217746734619, |
|
"learning_rate": 0.0004564880768039641, |
|
"loss": 2.7521, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.284873819476699, |
|
"grad_norm": 0.8650684952735901, |
|
"learning_rate": 0.0004571074636110251, |
|
"loss": 2.7675, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.2879702740362284, |
|
"grad_norm": 0.922113299369812, |
|
"learning_rate": 0.0004577268504180861, |
|
"loss": 2.7401, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.2910667285957578, |
|
"grad_norm": 0.8902767896652222, |
|
"learning_rate": 0.0004583462372251471, |
|
"loss": 2.7772, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.294163183155287, |
|
"grad_norm": 0.8764835596084595, |
|
"learning_rate": 0.00045896562403220816, |
|
"loss": 2.7526, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.2972596377148164, |
|
"grad_norm": 0.8847823739051819, |
|
"learning_rate": 0.00045958501083926914, |
|
"loss": 2.7504, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.300356092274346, |
|
"grad_norm": 0.8462940454483032, |
|
"learning_rate": 0.00046020439764633017, |
|
"loss": 2.7209, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.303452546833875, |
|
"grad_norm": 0.8645547032356262, |
|
"learning_rate": 0.00046082378445339115, |
|
"loss": 2.7464, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.3065490013934045, |
|
"grad_norm": 0.8842138051986694, |
|
"learning_rate": 0.00046144317126045213, |
|
"loss": 2.7566, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.309645455952934, |
|
"grad_norm": 0.8625742197036743, |
|
"learning_rate": 0.00046206255806751316, |
|
"loss": 2.753, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.312741910512463, |
|
"grad_norm": 0.922121524810791, |
|
"learning_rate": 0.00046268194487457414, |
|
"loss": 2.75, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.3158383650719925, |
|
"grad_norm": 0.8739849925041199, |
|
"learning_rate": 0.0004633013316816352, |
|
"loss": 2.7513, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.318934819631522, |
|
"grad_norm": 0.8614432215690613, |
|
"learning_rate": 0.0004639207184886962, |
|
"loss": 2.75, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.322031274191051, |
|
"grad_norm": 0.8714541792869568, |
|
"learning_rate": 0.0004645401052957572, |
|
"loss": 2.7297, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.3251277287505805, |
|
"grad_norm": 0.9732015132904053, |
|
"learning_rate": 0.0004651594921028182, |
|
"loss": 2.7529, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.32822418331011, |
|
"grad_norm": 0.9061838388442993, |
|
"learning_rate": 0.00046577887890987925, |
|
"loss": 2.7541, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.331320637869639, |
|
"grad_norm": 1.0056427717208862, |
|
"learning_rate": 0.00046639826571694023, |
|
"loss": 2.7381, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.3344170924291685, |
|
"grad_norm": 0.9382318258285522, |
|
"learning_rate": 0.00046701765252400127, |
|
"loss": 2.758, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.337513546988698, |
|
"grad_norm": 0.9322879314422607, |
|
"learning_rate": 0.0004676370393310623, |
|
"loss": 2.7196, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.340610001548227, |
|
"grad_norm": 0.8709734678268433, |
|
"learning_rate": 0.0004682564261381232, |
|
"loss": 2.7259, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.3437064561077565, |
|
"grad_norm": 0.8605784177780151, |
|
"learning_rate": 0.00046887581294518426, |
|
"loss": 2.7116, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 2.346802910667286, |
|
"grad_norm": 0.8777926564216614, |
|
"learning_rate": 0.0004694951997522453, |
|
"loss": 2.7389, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 2.349899365226815, |
|
"grad_norm": 0.9535753130912781, |
|
"learning_rate": 0.00047011458655930627, |
|
"loss": 2.7402, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.3529958197863445, |
|
"grad_norm": 0.8377962708473206, |
|
"learning_rate": 0.0004707339733663673, |
|
"loss": 2.7672, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.356092274345874, |
|
"grad_norm": 0.9221674799919128, |
|
"learning_rate": 0.00047135336017342834, |
|
"loss": 2.7341, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 2.359188728905403, |
|
"grad_norm": 0.9175540804862976, |
|
"learning_rate": 0.0004719727469804893, |
|
"loss": 2.7332, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 2.3622851834649325, |
|
"grad_norm": 0.896039605140686, |
|
"learning_rate": 0.00047259213378755035, |
|
"loss": 2.7587, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 2.365381638024462, |
|
"grad_norm": 0.8460658192634583, |
|
"learning_rate": 0.0004732115205946114, |
|
"loss": 2.7378, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 2.3684780925839912, |
|
"grad_norm": 0.9001418352127075, |
|
"learning_rate": 0.00047383090740167236, |
|
"loss": 2.7374, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.3715745471435206, |
|
"grad_norm": 0.9807076454162598, |
|
"learning_rate": 0.00047445029420873334, |
|
"loss": 2.723, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 2.37467100170305, |
|
"grad_norm": 0.8731216192245483, |
|
"learning_rate": 0.0004750696810157944, |
|
"loss": 2.7112, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 2.3777674562625792, |
|
"grad_norm": 0.8750482201576233, |
|
"learning_rate": 0.00047568906782285536, |
|
"loss": 2.7016, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 2.3808639108221086, |
|
"grad_norm": 0.8985123634338379, |
|
"learning_rate": 0.0004763084546299164, |
|
"loss": 2.7462, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 2.383960365381638, |
|
"grad_norm": 0.8914074301719666, |
|
"learning_rate": 0.0004769278414369774, |
|
"loss": 2.7253, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.3870568199411673, |
|
"grad_norm": 0.8856596350669861, |
|
"learning_rate": 0.0004775472282440384, |
|
"loss": 2.7438, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 2.3901532745006966, |
|
"grad_norm": 0.9476223587989807, |
|
"learning_rate": 0.00047816661505109944, |
|
"loss": 2.7208, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 2.393249729060226, |
|
"grad_norm": 0.8765897750854492, |
|
"learning_rate": 0.00047878600185816047, |
|
"loss": 2.7302, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 2.3963461836197553, |
|
"grad_norm": 0.9087428450584412, |
|
"learning_rate": 0.00047940538866522145, |
|
"loss": 2.7225, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 2.3994426381792846, |
|
"grad_norm": 0.9276483058929443, |
|
"learning_rate": 0.0004800247754722825, |
|
"loss": 2.7297, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.402539092738814, |
|
"grad_norm": 0.8988469243049622, |
|
"learning_rate": 0.0004806441622793434, |
|
"loss": 2.7167, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 2.4056355472983433, |
|
"grad_norm": 0.865112841129303, |
|
"learning_rate": 0.00048126354908640444, |
|
"loss": 2.7187, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 2.4087320018578726, |
|
"grad_norm": 0.8832447528839111, |
|
"learning_rate": 0.0004818829358934655, |
|
"loss": 2.7123, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 2.411828456417402, |
|
"grad_norm": 0.8970694541931152, |
|
"learning_rate": 0.00048250232270052645, |
|
"loss": 2.7255, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 2.4149249109769313, |
|
"grad_norm": 0.8232760429382324, |
|
"learning_rate": 0.0004831217095075875, |
|
"loss": 2.7315, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.4180213655364606, |
|
"grad_norm": 0.9075847268104553, |
|
"learning_rate": 0.0004837410963146485, |
|
"loss": 2.7098, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 2.42111782009599, |
|
"grad_norm": 0.871097981929779, |
|
"learning_rate": 0.0004843604831217095, |
|
"loss": 2.7172, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 2.4242142746555193, |
|
"grad_norm": 0.8684946894645691, |
|
"learning_rate": 0.00048497986992877053, |
|
"loss": 2.7031, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 2.4273107292150486, |
|
"grad_norm": 0.9100140929222107, |
|
"learning_rate": 0.00048559925673583157, |
|
"loss": 2.7175, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 2.430407183774578, |
|
"grad_norm": 0.8607642650604248, |
|
"learning_rate": 0.00048621864354289254, |
|
"loss": 2.7149, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.4335036383341073, |
|
"grad_norm": 0.865871012210846, |
|
"learning_rate": 0.0004868380303499536, |
|
"loss": 2.7139, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 2.4366000928936367, |
|
"grad_norm": 0.9190123677253723, |
|
"learning_rate": 0.00048745741715701456, |
|
"loss": 2.7167, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 2.439696547453166, |
|
"grad_norm": 0.8954902291297913, |
|
"learning_rate": 0.00048807680396407554, |
|
"loss": 2.7041, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 2.4427930020126953, |
|
"grad_norm": 0.9070473313331604, |
|
"learning_rate": 0.0004886961907711366, |
|
"loss": 2.712, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 2.4458894565722247, |
|
"grad_norm": 1.2090919017791748, |
|
"learning_rate": 0.0004893155775781977, |
|
"loss": 2.7241, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.448985911131754, |
|
"grad_norm": 0.8956063985824585, |
|
"learning_rate": 0.0004899349643852586, |
|
"loss": 2.7089, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 2.4520823656912833, |
|
"grad_norm": 0.8796259164810181, |
|
"learning_rate": 0.0004905543511923196, |
|
"loss": 2.6996, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 2.4551788202508127, |
|
"grad_norm": 0.8752288222312927, |
|
"learning_rate": 0.0004911737379993806, |
|
"loss": 2.7141, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 2.458275274810342, |
|
"grad_norm": 0.8404427170753479, |
|
"learning_rate": 0.0004917931248064416, |
|
"loss": 2.7086, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 2.4613717293698714, |
|
"grad_norm": 0.8801198601722717, |
|
"learning_rate": 0.0004924125116135027, |
|
"loss": 2.716, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.4644681839294007, |
|
"grad_norm": 0.8937883377075195, |
|
"learning_rate": 0.0004930318984205636, |
|
"loss": 2.6963, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 2.46756463848893, |
|
"grad_norm": 0.8348713517189026, |
|
"learning_rate": 0.0004936512852276246, |
|
"loss": 2.7158, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 2.4706610930484594, |
|
"grad_norm": 0.9168616533279419, |
|
"learning_rate": 0.0004942706720346857, |
|
"loss": 2.7212, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 2.4737575476079887, |
|
"grad_norm": 0.8765811324119568, |
|
"learning_rate": 0.0004948900588417467, |
|
"loss": 2.7037, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 2.476854002167518, |
|
"grad_norm": 0.9563819766044617, |
|
"learning_rate": 0.0004955094456488077, |
|
"loss": 2.7076, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.4799504567270474, |
|
"grad_norm": 0.9105591177940369, |
|
"learning_rate": 0.0004961288324558688, |
|
"loss": 2.704, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 2.4830469112865767, |
|
"grad_norm": 0.8907128572463989, |
|
"learning_rate": 0.0004967482192629297, |
|
"loss": 2.711, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 2.486143365846106, |
|
"grad_norm": 0.9110057353973389, |
|
"learning_rate": 0.0004973676060699907, |
|
"loss": 2.715, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 2.4892398204056354, |
|
"grad_norm": 0.8938244581222534, |
|
"learning_rate": 0.0004979869928770517, |
|
"loss": 2.7236, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 2.4923362749651647, |
|
"grad_norm": 0.8680298328399658, |
|
"learning_rate": 0.0004986063796841128, |
|
"loss": 2.7141, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.495432729524694, |
|
"grad_norm": 1.2556971311569214, |
|
"learning_rate": 0.0004992257664911738, |
|
"loss": 2.7182, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 2.4985291840842234, |
|
"grad_norm": 0.8885079026222229, |
|
"learning_rate": 0.0004998451532982347, |
|
"loss": 2.7178, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 2.5016256386437528, |
|
"grad_norm": 0.8683394193649292, |
|
"learning_rate": 0.0005004645401052957, |
|
"loss": 2.713, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 2.504722093203282, |
|
"grad_norm": 0.8895092010498047, |
|
"learning_rate": 0.0005010839269123568, |
|
"loss": 2.7244, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 2.5078185477628114, |
|
"grad_norm": 0.9000723958015442, |
|
"learning_rate": 0.0005017033137194178, |
|
"loss": 2.7018, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.5109150023223408, |
|
"grad_norm": 0.8466011881828308, |
|
"learning_rate": 0.0005023227005264788, |
|
"loss": 2.7252, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 2.51401145688187, |
|
"grad_norm": 0.8740931749343872, |
|
"learning_rate": 0.0005029420873335399, |
|
"loss": 2.7023, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 2.5171079114413994, |
|
"grad_norm": 0.9173566102981567, |
|
"learning_rate": 0.0005035614741406008, |
|
"loss": 2.7158, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 2.520204366000929, |
|
"grad_norm": 0.9136703610420227, |
|
"learning_rate": 0.0005041808609476618, |
|
"loss": 2.7081, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 2.523300820560458, |
|
"grad_norm": 0.9001860022544861, |
|
"learning_rate": 0.0005048002477547229, |
|
"loss": 2.6879, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.5263972751199875, |
|
"grad_norm": 0.8756097555160522, |
|
"learning_rate": 0.0005054196345617839, |
|
"loss": 2.714, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 2.529493729679517, |
|
"grad_norm": 0.8774548768997192, |
|
"learning_rate": 0.0005060390213688449, |
|
"loss": 2.6751, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 2.532590184239046, |
|
"grad_norm": 0.8764857649803162, |
|
"learning_rate": 0.0005066584081759059, |
|
"loss": 2.7045, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 2.5356866387985755, |
|
"grad_norm": 0.8589802980422974, |
|
"learning_rate": 0.0005072777949829669, |
|
"loss": 2.7001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 2.538783093358105, |
|
"grad_norm": 0.8591241836547852, |
|
"learning_rate": 0.0005078971817900279, |
|
"loss": 2.6838, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.541879547917634, |
|
"grad_norm": 0.8960736989974976, |
|
"learning_rate": 0.000508516568597089, |
|
"loss": 2.6847, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 2.5449760024771635, |
|
"grad_norm": 0.8818134069442749, |
|
"learning_rate": 0.00050913595540415, |
|
"loss": 2.6907, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 2.548072457036693, |
|
"grad_norm": 0.8439919948577881, |
|
"learning_rate": 0.0005097553422112108, |
|
"loss": 2.6649, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 2.551168911596222, |
|
"grad_norm": 0.953252911567688, |
|
"learning_rate": 0.0005103747290182718, |
|
"loss": 2.7113, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 2.5542653661557515, |
|
"grad_norm": 0.8814793825149536, |
|
"learning_rate": 0.0005109941158253329, |
|
"loss": 2.6971, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.557361820715281, |
|
"grad_norm": 0.8562922477722168, |
|
"learning_rate": 0.0005116135026323939, |
|
"loss": 2.6816, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 2.56045827527481, |
|
"grad_norm": 0.9286318421363831, |
|
"learning_rate": 0.0005122328894394549, |
|
"loss": 2.6976, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 2.5635547298343395, |
|
"grad_norm": 0.8571282029151917, |
|
"learning_rate": 0.000512852276246516, |
|
"loss": 2.6931, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 2.566651184393869, |
|
"grad_norm": 0.8638617396354675, |
|
"learning_rate": 0.0005134716630535769, |
|
"loss": 2.7112, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 2.569747638953398, |
|
"grad_norm": 0.8954980969429016, |
|
"learning_rate": 0.0005140910498606379, |
|
"loss": 2.6775, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.5728440935129275, |
|
"grad_norm": 0.8603184223175049, |
|
"learning_rate": 0.000514710436667699, |
|
"loss": 2.6962, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 2.575940548072457, |
|
"grad_norm": 0.8614330887794495, |
|
"learning_rate": 0.00051532982347476, |
|
"loss": 2.7119, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 2.5790370026319867, |
|
"grad_norm": 0.853256106376648, |
|
"learning_rate": 0.000515949210281821, |
|
"loss": 2.6701, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 2.5821334571915155, |
|
"grad_norm": 0.9329004883766174, |
|
"learning_rate": 0.000516568597088882, |
|
"loss": 2.7029, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 2.5852299117510453, |
|
"grad_norm": 0.8642740249633789, |
|
"learning_rate": 0.000517187983895943, |
|
"loss": 2.6927, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.5883263663105742, |
|
"grad_norm": 0.8851795196533203, |
|
"learning_rate": 0.000517807370703004, |
|
"loss": 2.6801, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 2.591422820870104, |
|
"grad_norm": 0.8649539947509766, |
|
"learning_rate": 0.0005184267575100651, |
|
"loss": 2.671, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 2.594519275429633, |
|
"grad_norm": 0.8715213537216187, |
|
"learning_rate": 0.0005190461443171261, |
|
"loss": 2.69, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 2.5976157299891627, |
|
"grad_norm": 0.8469790816307068, |
|
"learning_rate": 0.000519665531124187, |
|
"loss": 2.6773, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 2.6007121845486916, |
|
"grad_norm": 0.8525969982147217, |
|
"learning_rate": 0.0005202849179312481, |
|
"loss": 2.6728, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.6038086391082214, |
|
"grad_norm": 0.8539503812789917, |
|
"learning_rate": 0.0005209043047383091, |
|
"loss": 2.6869, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 2.6069050936677503, |
|
"grad_norm": 0.877877414226532, |
|
"learning_rate": 0.0005215236915453701, |
|
"loss": 2.6924, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 2.61000154822728, |
|
"grad_norm": 0.9159960150718689, |
|
"learning_rate": 0.0005221430783524312, |
|
"loss": 2.6827, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 2.613098002786809, |
|
"grad_norm": 0.9159612059593201, |
|
"learning_rate": 0.000522762465159492, |
|
"loss": 2.6715, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 2.6161944573463387, |
|
"grad_norm": 0.8842989802360535, |
|
"learning_rate": 0.000523381851966553, |
|
"loss": 2.6781, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.6192909119058676, |
|
"grad_norm": 0.981275737285614, |
|
"learning_rate": 0.0005240012387736141, |
|
"loss": 2.6975, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 2.6223873664653974, |
|
"grad_norm": 0.8604749441146851, |
|
"learning_rate": 0.0005246206255806751, |
|
"loss": 2.6785, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 2.6254838210249263, |
|
"grad_norm": 0.880984902381897, |
|
"learning_rate": 0.0005252400123877361, |
|
"loss": 2.6743, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 2.628580275584456, |
|
"grad_norm": 0.9086693525314331, |
|
"learning_rate": 0.0005258593991947972, |
|
"loss": 2.6827, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 2.631676730143985, |
|
"grad_norm": 0.9209759831428528, |
|
"learning_rate": 0.0005264787860018581, |
|
"loss": 2.6969, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.6347731847035147, |
|
"grad_norm": 1.1329649686813354, |
|
"learning_rate": 0.0005270981728089191, |
|
"loss": 2.6682, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 2.6378696392630436, |
|
"grad_norm": 0.904861569404602, |
|
"learning_rate": 0.0005277175596159802, |
|
"loss": 2.6765, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 2.6409660938225734, |
|
"grad_norm": 0.9609228372573853, |
|
"learning_rate": 0.0005283369464230412, |
|
"loss": 2.6777, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 2.6440625483821023, |
|
"grad_norm": 0.84135901927948, |
|
"learning_rate": 0.0005289563332301022, |
|
"loss": 2.6963, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.647159002941632, |
|
"grad_norm": 0.9496148228645325, |
|
"learning_rate": 0.0005295757200371633, |
|
"loss": 2.6755, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.650255457501161, |
|
"grad_norm": 0.9461915493011475, |
|
"learning_rate": 0.0005301951068442242, |
|
"loss": 2.6947, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 2.6533519120606908, |
|
"grad_norm": 0.8542360067367554, |
|
"learning_rate": 0.0005308144936512852, |
|
"loss": 2.6722, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 2.6564483666202197, |
|
"grad_norm": 0.9559420347213745, |
|
"learning_rate": 0.0005314338804583463, |
|
"loss": 2.6781, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 2.6595448211797494, |
|
"grad_norm": 0.9376833438873291, |
|
"learning_rate": 0.0005320532672654073, |
|
"loss": 2.7124, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 2.6626412757392783, |
|
"grad_norm": 0.8750305771827698, |
|
"learning_rate": 0.0005326726540724683, |
|
"loss": 2.695, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.665737730298808, |
|
"grad_norm": 0.8628771305084229, |
|
"learning_rate": 0.0005332920408795294, |
|
"loss": 2.6874, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 2.668834184858337, |
|
"grad_norm": 0.91616290807724, |
|
"learning_rate": 0.0005339114276865903, |
|
"loss": 2.6733, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 2.671930639417867, |
|
"grad_norm": 0.8734931349754333, |
|
"learning_rate": 0.0005345308144936513, |
|
"loss": 2.6805, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 2.6750270939773957, |
|
"grad_norm": 0.8667175769805908, |
|
"learning_rate": 0.0005351502013007124, |
|
"loss": 2.6863, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 2.6781235485369255, |
|
"grad_norm": 0.8947048783302307, |
|
"learning_rate": 0.0005357695881077733, |
|
"loss": 2.6906, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.6812200030964544, |
|
"grad_norm": 0.9095123410224915, |
|
"learning_rate": 0.0005363889749148342, |
|
"loss": 2.6741, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 2.684316457655984, |
|
"grad_norm": 0.8678126335144043, |
|
"learning_rate": 0.0005370083617218953, |
|
"loss": 2.6532, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 2.687412912215513, |
|
"grad_norm": 0.8941618800163269, |
|
"learning_rate": 0.0005376277485289563, |
|
"loss": 2.6733, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 2.690509366775043, |
|
"grad_norm": 0.9127388596534729, |
|
"learning_rate": 0.0005382471353360173, |
|
"loss": 2.6864, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 2.6936058213345717, |
|
"grad_norm": 0.8542888760566711, |
|
"learning_rate": 0.0005388665221430784, |
|
"loss": 2.6839, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.6967022758941015, |
|
"grad_norm": 0.8937285542488098, |
|
"learning_rate": 0.0005394859089501394, |
|
"loss": 2.6911, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 2.6997987304536304, |
|
"grad_norm": 0.9001040458679199, |
|
"learning_rate": 0.0005401052957572003, |
|
"loss": 2.6785, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 2.70289518501316, |
|
"grad_norm": 0.9357818365097046, |
|
"learning_rate": 0.0005407246825642614, |
|
"loss": 2.6959, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 2.705991639572689, |
|
"grad_norm": 0.9065813422203064, |
|
"learning_rate": 0.0005413440693713224, |
|
"loss": 2.6838, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.709088094132219, |
|
"grad_norm": 0.8821165561676025, |
|
"learning_rate": 0.0005419634561783834, |
|
"loss": 2.6618, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.7121845486917477, |
|
"grad_norm": 0.8667876720428467, |
|
"learning_rate": 0.0005425828429854445, |
|
"loss": 2.6849, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 2.7152810032512775, |
|
"grad_norm": 0.8643457889556885, |
|
"learning_rate": 0.0005432022297925055, |
|
"loss": 2.6629, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 2.7183774578108064, |
|
"grad_norm": 0.8841952681541443, |
|
"learning_rate": 0.0005438216165995664, |
|
"loss": 2.6605, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 2.721473912370336, |
|
"grad_norm": 0.9219385385513306, |
|
"learning_rate": 0.0005444410034066275, |
|
"loss": 2.6594, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 2.724570366929865, |
|
"grad_norm": 0.9676291942596436, |
|
"learning_rate": 0.0005450603902136885, |
|
"loss": 2.6796, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.727666821489395, |
|
"grad_norm": 0.9405499696731567, |
|
"learning_rate": 0.0005456797770207495, |
|
"loss": 2.6928, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 2.7307632760489238, |
|
"grad_norm": 0.9420516490936279, |
|
"learning_rate": 0.0005462991638278106, |
|
"loss": 2.6699, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 2.7338597306084536, |
|
"grad_norm": 0.9792620539665222, |
|
"learning_rate": 0.0005469185506348715, |
|
"loss": 2.6666, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 2.7369561851679824, |
|
"grad_norm": 0.9726955890655518, |
|
"learning_rate": 0.0005475379374419325, |
|
"loss": 2.645, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 2.7400526397275122, |
|
"grad_norm": 1.020033359527588, |
|
"learning_rate": 0.0005481573242489936, |
|
"loss": 2.6614, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.743149094287041, |
|
"grad_norm": 1.0454789400100708, |
|
"learning_rate": 0.0005487767110560545, |
|
"loss": 2.6565, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 2.746245548846571, |
|
"grad_norm": 0.8889420628547668, |
|
"learning_rate": 0.0005493960978631155, |
|
"loss": 2.6916, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 2.7493420034061, |
|
"grad_norm": 0.9025602340698242, |
|
"learning_rate": 0.0005500154846701765, |
|
"loss": 2.6999, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 2.7524384579656296, |
|
"grad_norm": 0.8665561079978943, |
|
"learning_rate": 0.0005506348714772375, |
|
"loss": 2.6385, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 2.7555349125251585, |
|
"grad_norm": 0.9031399488449097, |
|
"learning_rate": 0.0005512542582842985, |
|
"loss": 2.6505, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.7586313670846883, |
|
"grad_norm": 0.9555135369300842, |
|
"learning_rate": 0.0005518736450913595, |
|
"loss": 2.6816, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 2.761727821644217, |
|
"grad_norm": 0.9307361245155334, |
|
"learning_rate": 0.0005524930318984206, |
|
"loss": 2.6715, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 2.764824276203747, |
|
"grad_norm": 0.9591286778450012, |
|
"learning_rate": 0.0005531124187054816, |
|
"loss": 2.6822, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 2.767920730763276, |
|
"grad_norm": 0.9070897698402405, |
|
"learning_rate": 0.0005537318055125425, |
|
"loss": 2.6702, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 2.7710171853228056, |
|
"grad_norm": 0.9256467819213867, |
|
"learning_rate": 0.0005543511923196036, |
|
"loss": 2.6555, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.7741136398823345, |
|
"grad_norm": 0.993756115436554, |
|
"learning_rate": 0.0005549705791266646, |
|
"loss": 2.6547, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 2.7772100944418643, |
|
"grad_norm": 0.9043955206871033, |
|
"learning_rate": 0.0005555899659337256, |
|
"loss": 2.6905, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 2.780306549001393, |
|
"grad_norm": 0.9000112414360046, |
|
"learning_rate": 0.0005562093527407867, |
|
"loss": 2.6534, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 2.783403003560923, |
|
"grad_norm": 0.9210097789764404, |
|
"learning_rate": 0.0005568287395478476, |
|
"loss": 2.6717, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 2.786499458120452, |
|
"grad_norm": 0.8958888053894043, |
|
"learning_rate": 0.0005574481263549086, |
|
"loss": 2.6856, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.7895959126799816, |
|
"grad_norm": 1.0156104564666748, |
|
"learning_rate": 0.0005580675131619697, |
|
"loss": 2.6794, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 2.7926923672395105, |
|
"grad_norm": 0.9581423997879028, |
|
"learning_rate": 0.0005586868999690307, |
|
"loss": 2.6576, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 2.7957888217990403, |
|
"grad_norm": 0.9721694588661194, |
|
"learning_rate": 0.0005593062867760917, |
|
"loss": 2.6569, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 2.798885276358569, |
|
"grad_norm": 0.9453576803207397, |
|
"learning_rate": 0.0005599256735831528, |
|
"loss": 2.6664, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 2.801981730918099, |
|
"grad_norm": 0.9473662972450256, |
|
"learning_rate": 0.0005605450603902137, |
|
"loss": 2.6604, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.805078185477628, |
|
"grad_norm": 0.9190026521682739, |
|
"learning_rate": 0.0005611644471972746, |
|
"loss": 2.6632, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 2.8081746400371577, |
|
"grad_norm": 0.9677988886833191, |
|
"learning_rate": 0.0005617838340043357, |
|
"loss": 2.6574, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 2.8112710945966866, |
|
"grad_norm": 0.9148370623588562, |
|
"learning_rate": 0.0005624032208113967, |
|
"loss": 2.6801, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 2.8143675491562163, |
|
"grad_norm": 0.908485472202301, |
|
"learning_rate": 0.0005630226076184577, |
|
"loss": 2.6614, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 2.8174640037157452, |
|
"grad_norm": 0.9479948878288269, |
|
"learning_rate": 0.0005636419944255187, |
|
"loss": 2.6426, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.820560458275275, |
|
"grad_norm": 1.0165117979049683, |
|
"learning_rate": 0.0005642613812325797, |
|
"loss": 2.6608, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 2.823656912834804, |
|
"grad_norm": 0.870343029499054, |
|
"learning_rate": 0.0005648807680396407, |
|
"loss": 2.6729, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 2.8267533673943337, |
|
"grad_norm": 0.9335671067237854, |
|
"learning_rate": 0.0005655001548467018, |
|
"loss": 2.6655, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 2.8298498219538626, |
|
"grad_norm": 0.9250266551971436, |
|
"learning_rate": 0.0005661195416537628, |
|
"loss": 2.6793, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 2.8329462765133924, |
|
"grad_norm": 0.8737602233886719, |
|
"learning_rate": 0.0005667389284608237, |
|
"loss": 2.6552, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.8360427310729213, |
|
"grad_norm": 0.9168223142623901, |
|
"learning_rate": 0.0005673583152678848, |
|
"loss": 2.6725, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 2.839139185632451, |
|
"grad_norm": 0.9240823984146118, |
|
"learning_rate": 0.0005679777020749458, |
|
"loss": 2.642, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 2.84223564019198, |
|
"grad_norm": 0.9061072468757629, |
|
"learning_rate": 0.0005685970888820068, |
|
"loss": 2.6746, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.8453320947515097, |
|
"grad_norm": 0.8670341968536377, |
|
"learning_rate": 0.0005692164756890679, |
|
"loss": 2.6693, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 2.8484285493110386, |
|
"grad_norm": 0.9250338673591614, |
|
"learning_rate": 0.0005698358624961289, |
|
"loss": 2.6755, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.8515250038705684, |
|
"grad_norm": 0.9369593262672424, |
|
"learning_rate": 0.0005704552493031898, |
|
"loss": 2.6794, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 2.8546214584300973, |
|
"grad_norm": 0.9392365217208862, |
|
"learning_rate": 0.0005710746361102509, |
|
"loss": 2.6644, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.857717912989627, |
|
"grad_norm": 0.9542964696884155, |
|
"learning_rate": 0.0005716940229173119, |
|
"loss": 2.6785, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 2.860814367549156, |
|
"grad_norm": 0.9194208979606628, |
|
"learning_rate": 0.0005723134097243729, |
|
"loss": 2.6743, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.8639108221086857, |
|
"grad_norm": 0.9285315275192261, |
|
"learning_rate": 0.000572932796531434, |
|
"loss": 2.6589, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.8670072766682146, |
|
"grad_norm": 0.9268024563789368, |
|
"learning_rate": 0.000573552183338495, |
|
"loss": 2.6552, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.8701037312277444, |
|
"grad_norm": 0.904656171798706, |
|
"learning_rate": 0.0005741715701455558, |
|
"loss": 2.6657, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.8732001857872733, |
|
"grad_norm": 0.9420167207717896, |
|
"learning_rate": 0.0005747909569526169, |
|
"loss": 2.6572, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.876296640346803, |
|
"grad_norm": 0.9118287563323975, |
|
"learning_rate": 0.0005754103437596779, |
|
"loss": 2.6629, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.879393094906332, |
|
"grad_norm": 0.940430223941803, |
|
"learning_rate": 0.0005760297305667389, |
|
"loss": 2.6518, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.8824895494658618, |
|
"grad_norm": 1.3163542747497559, |
|
"learning_rate": 0.0005766491173738, |
|
"loss": 2.652, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.8855860040253907, |
|
"grad_norm": 0.9466584324836731, |
|
"learning_rate": 0.0005772685041808609, |
|
"loss": 2.6809, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.8886824585849205, |
|
"grad_norm": 2.4098305702209473, |
|
"learning_rate": 0.0005778878909879219, |
|
"loss": 2.6616, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.8917789131444493, |
|
"grad_norm": 1.0643264055252075, |
|
"learning_rate": 0.000578507277794983, |
|
"loss": 2.6719, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.894875367703979, |
|
"grad_norm": 0.9846721887588501, |
|
"learning_rate": 0.000579126664602044, |
|
"loss": 2.6706, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.897971822263508, |
|
"grad_norm": 0.9832435250282288, |
|
"learning_rate": 0.000579746051409105, |
|
"loss": 2.6725, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.901068276823038, |
|
"grad_norm": 0.8981136083602905, |
|
"learning_rate": 0.000580365438216166, |
|
"loss": 2.6475, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.9041647313825667, |
|
"grad_norm": 0.8961195349693298, |
|
"learning_rate": 0.000580984825023227, |
|
"loss": 2.6705, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.9072611859420965, |
|
"grad_norm": 1.0543441772460938, |
|
"learning_rate": 0.000581604211830288, |
|
"loss": 2.6666, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.9103576405016254, |
|
"grad_norm": 0.9041043519973755, |
|
"learning_rate": 0.0005822235986373491, |
|
"loss": 2.6608, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.913454095061155, |
|
"grad_norm": 0.9475833773612976, |
|
"learning_rate": 0.0005828429854444101, |
|
"loss": 2.6453, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.916550549620684, |
|
"grad_norm": 0.9282538890838623, |
|
"learning_rate": 0.000583462372251471, |
|
"loss": 2.6531, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.919647004180214, |
|
"grad_norm": 0.936406672000885, |
|
"learning_rate": 0.0005840817590585321, |
|
"loss": 2.6594, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.9227434587397427, |
|
"grad_norm": 0.9766597747802734, |
|
"learning_rate": 0.0005847011458655931, |
|
"loss": 2.6698, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.9258399132992725, |
|
"grad_norm": 0.9606243968009949, |
|
"learning_rate": 0.0005853205326726541, |
|
"loss": 2.6819, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.9289363678588014, |
|
"grad_norm": 0.9478334784507751, |
|
"learning_rate": 0.0005859399194797152, |
|
"loss": 2.6589, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.932032822418331, |
|
"grad_norm": 0.9398000836372375, |
|
"learning_rate": 0.0005865593062867762, |
|
"loss": 2.6414, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.93512927697786, |
|
"grad_norm": 0.8788222074508667, |
|
"learning_rate": 0.000587178693093837, |
|
"loss": 2.6327, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.93822573153739, |
|
"grad_norm": 0.945261538028717, |
|
"learning_rate": 0.0005877980799008981, |
|
"loss": 2.6642, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.9413221860969188, |
|
"grad_norm": 0.9153859615325928, |
|
"learning_rate": 0.0005884174667079591, |
|
"loss": 2.6528, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.9444186406564485, |
|
"grad_norm": 1.6933245658874512, |
|
"learning_rate": 0.0005890368535150201, |
|
"loss": 2.6594, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.9475150952159774, |
|
"grad_norm": 1.0047813653945923, |
|
"learning_rate": 0.0005896562403220812, |
|
"loss": 2.6867, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.950611549775507, |
|
"grad_norm": 1.006410002708435, |
|
"learning_rate": 0.0005902756271291422, |
|
"loss": 2.6551, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.953708004335036, |
|
"grad_norm": 0.987974226474762, |
|
"learning_rate": 0.0005908950139362031, |
|
"loss": 2.6563, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.956804458894566, |
|
"grad_norm": 0.9611511826515198, |
|
"learning_rate": 0.0005915144007432642, |
|
"loss": 2.6677, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.959900913454095, |
|
"grad_norm": 0.9569249153137207, |
|
"learning_rate": 0.0005921337875503252, |
|
"loss": 2.6368, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.9629973680136246, |
|
"grad_norm": 0.909783124923706, |
|
"learning_rate": 0.0005927531743573862, |
|
"loss": 2.6353, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.966093822573154, |
|
"grad_norm": 0.9167472720146179, |
|
"learning_rate": 0.0005933725611644472, |
|
"loss": 2.6469, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.9691902771326832, |
|
"grad_norm": 0.9903345108032227, |
|
"learning_rate": 0.0005939919479715082, |
|
"loss": 2.6567, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.9722867316922126, |
|
"grad_norm": 0.9372828006744385, |
|
"learning_rate": 0.0005946113347785692, |
|
"loss": 2.6597, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.975383186251742, |
|
"grad_norm": 1.0080912113189697, |
|
"learning_rate": 0.0005952307215856302, |
|
"loss": 2.6425, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.9784796408112713, |
|
"grad_norm": 0.9167620539665222, |
|
"learning_rate": 0.0005958501083926913, |
|
"loss": 2.666, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.9815760953708006, |
|
"grad_norm": 0.9428613781929016, |
|
"learning_rate": 0.0005964694951997523, |
|
"loss": 2.6486, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.98467254993033, |
|
"grad_norm": 1.0144000053405762, |
|
"learning_rate": 0.0005970888820068132, |
|
"loss": 2.6382, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.9877690044898593, |
|
"grad_norm": 0.8944305777549744, |
|
"learning_rate": 0.0005977082688138743, |
|
"loss": 2.6406, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.9908654590493886, |
|
"grad_norm": 0.9113066792488098, |
|
"learning_rate": 0.0005983276556209353, |
|
"loss": 2.6514, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.993961913608918, |
|
"grad_norm": 0.9131670594215393, |
|
"learning_rate": 0.0005989470424279963, |
|
"loss": 2.6314, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.9970583681684473, |
|
"grad_norm": 0.9719523787498474, |
|
"learning_rate": 0.0005995664292350574, |
|
"loss": 2.6578, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.8123937845230103, |
|
"learning_rate": 0.0006001858160421183, |
|
"loss": 2.5215, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 3.0030964545595293, |
|
"grad_norm": 1.055759310722351, |
|
"learning_rate": 0.0006008052028491792, |
|
"loss": 2.6481, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.0061929091190587, |
|
"grad_norm": 0.9894253611564636, |
|
"learning_rate": 0.0006014245896562403, |
|
"loss": 2.6389, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 3.009289363678588, |
|
"grad_norm": 0.9278469085693359, |
|
"learning_rate": 0.0006020439764633013, |
|
"loss": 2.6382, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 3.0123858182381174, |
|
"grad_norm": 0.9690927267074585, |
|
"learning_rate": 0.0006026633632703623, |
|
"loss": 2.6225, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 3.0154822727976467, |
|
"grad_norm": 0.8948525190353394, |
|
"learning_rate": 0.0006032827500774234, |
|
"loss": 2.6266, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 3.018578727357176, |
|
"grad_norm": 0.9562525749206543, |
|
"learning_rate": 0.0006039021368844843, |
|
"loss": 2.6251, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.0216751819167054, |
|
"grad_norm": 0.9463378190994263, |
|
"learning_rate": 0.0006045215236915453, |
|
"loss": 2.6405, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 3.0247716364762347, |
|
"grad_norm": 0.9799174070358276, |
|
"learning_rate": 0.0006051409104986064, |
|
"loss": 2.6381, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 3.027868091035764, |
|
"grad_norm": 0.9874619841575623, |
|
"learning_rate": 0.0006057602973056674, |
|
"loss": 2.6143, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 3.0309645455952934, |
|
"grad_norm": 1.083337426185608, |
|
"learning_rate": 0.0006063796841127284, |
|
"loss": 2.6153, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 3.0340610001548227, |
|
"grad_norm": 0.9509608745574951, |
|
"learning_rate": 0.0006069990709197895, |
|
"loss": 2.6379, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.037157454714352, |
|
"grad_norm": 0.9036940336227417, |
|
"learning_rate": 0.0006076184577268504, |
|
"loss": 2.6415, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 3.0402539092738814, |
|
"grad_norm": 0.9959449768066406, |
|
"learning_rate": 0.0006082378445339114, |
|
"loss": 2.6394, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 3.0433503638334107, |
|
"grad_norm": 0.9509766101837158, |
|
"learning_rate": 0.0006088572313409725, |
|
"loss": 2.6287, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 3.04644681839294, |
|
"grad_norm": 0.9667684435844421, |
|
"learning_rate": 0.0006094766181480335, |
|
"loss": 2.6518, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 3.0495432729524694, |
|
"grad_norm": 0.8897145986557007, |
|
"learning_rate": 0.0006100960049550945, |
|
"loss": 2.6333, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.0526397275119987, |
|
"grad_norm": 1.0284274816513062, |
|
"learning_rate": 0.0006107153917621555, |
|
"loss": 2.6348, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 3.055736182071528, |
|
"grad_norm": 0.9442754983901978, |
|
"learning_rate": 0.0006113347785692165, |
|
"loss": 2.6345, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 3.0588326366310574, |
|
"grad_norm": 0.9227479696273804, |
|
"learning_rate": 0.0006119541653762775, |
|
"loss": 2.6346, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 3.0619290911905868, |
|
"grad_norm": 0.9678612351417542, |
|
"learning_rate": 0.0006125735521833386, |
|
"loss": 2.6267, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 3.065025545750116, |
|
"grad_norm": 0.9622678160667419, |
|
"learning_rate": 0.0006131929389903995, |
|
"loss": 2.6205, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.0681220003096454, |
|
"grad_norm": 0.9785904288291931, |
|
"learning_rate": 0.0006138123257974604, |
|
"loss": 2.6316, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 3.0712184548691748, |
|
"grad_norm": 0.9019646644592285, |
|
"learning_rate": 0.0006144317126045215, |
|
"loss": 2.6322, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 3.074314909428704, |
|
"grad_norm": 0.9511599540710449, |
|
"learning_rate": 0.0006150510994115825, |
|
"loss": 2.6404, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 3.0774113639882335, |
|
"grad_norm": 1.1197845935821533, |
|
"learning_rate": 0.0006156704862186435, |
|
"loss": 2.6334, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 3.080507818547763, |
|
"grad_norm": 1.0321228504180908, |
|
"learning_rate": 0.0006162898730257046, |
|
"loss": 2.6578, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.083604273107292, |
|
"grad_norm": 0.933640718460083, |
|
"learning_rate": 0.0006169092598327656, |
|
"loss": 2.6498, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 3.0867007276668215, |
|
"grad_norm": 0.9308697581291199, |
|
"learning_rate": 0.0006175286466398265, |
|
"loss": 2.6403, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 3.089797182226351, |
|
"grad_norm": 1.0035881996154785, |
|
"learning_rate": 0.0006181480334468876, |
|
"loss": 2.6369, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 3.09289363678588, |
|
"grad_norm": 0.9733856916427612, |
|
"learning_rate": 0.0006187674202539486, |
|
"loss": 2.6434, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 3.0959900913454095, |
|
"grad_norm": 0.9512896537780762, |
|
"learning_rate": 0.0006193868070610096, |
|
"loss": 2.6433, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.099086545904939, |
|
"grad_norm": 1.1366065740585327, |
|
"learning_rate": 0.0006200061938680707, |
|
"loss": 2.6278, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 3.102183000464468, |
|
"grad_norm": 1.0089902877807617, |
|
"learning_rate": 0.0006206255806751317, |
|
"loss": 2.6198, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 3.1052794550239975, |
|
"grad_norm": 0.9710060358047485, |
|
"learning_rate": 0.0006212449674821926, |
|
"loss": 2.6299, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 3.108375909583527, |
|
"grad_norm": 1.0112597942352295, |
|
"learning_rate": 0.0006218643542892537, |
|
"loss": 2.629, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 3.111472364143056, |
|
"grad_norm": 0.8979578614234924, |
|
"learning_rate": 0.0006224837410963147, |
|
"loss": 2.6306, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 3.1145688187025855, |
|
"grad_norm": 0.985578715801239, |
|
"learning_rate": 0.0006231031279033757, |
|
"loss": 2.6214, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 3.117665273262115, |
|
"grad_norm": 1.0180467367172241, |
|
"learning_rate": 0.0006237225147104368, |
|
"loss": 2.6698, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 3.120761727821644, |
|
"grad_norm": 0.9561509490013123, |
|
"learning_rate": 0.0006243419015174977, |
|
"loss": 2.6295, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 3.1238581823811735, |
|
"grad_norm": 0.9035720229148865, |
|
"learning_rate": 0.0006249612883245587, |
|
"loss": 2.6356, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 3.126954636940703, |
|
"grad_norm": 0.9758944511413574, |
|
"learning_rate": 0.0006255806751316198, |
|
"loss": 2.6373, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.130051091500232, |
|
"grad_norm": 0.9201127290725708, |
|
"learning_rate": 0.0006262000619386807, |
|
"loss": 2.6354, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 3.1331475460597615, |
|
"grad_norm": 0.9586511850357056, |
|
"learning_rate": 0.0006268194487457417, |
|
"loss": 2.6286, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 3.136244000619291, |
|
"grad_norm": 1.3197758197784424, |
|
"learning_rate": 0.0006274388355528027, |
|
"loss": 2.6503, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 3.13934045517882, |
|
"grad_norm": 1.4489221572875977, |
|
"learning_rate": 0.0006280582223598637, |
|
"loss": 2.667, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 3.1424369097383495, |
|
"grad_norm": 1.1435356140136719, |
|
"learning_rate": 0.0006286776091669247, |
|
"loss": 2.6803, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 3.145533364297879, |
|
"grad_norm": 5.218364238739014, |
|
"learning_rate": 0.0006292969959739858, |
|
"loss": 2.7482, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 3.1486298188574082, |
|
"grad_norm": 1.0673755407333374, |
|
"learning_rate": 0.0006299163827810468, |
|
"loss": 2.6814, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 3.1517262734169376, |
|
"grad_norm": 0.9964536428451538, |
|
"learning_rate": 0.0006305357695881078, |
|
"loss": 2.6468, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 3.154822727976467, |
|
"grad_norm": 1.0818805694580078, |
|
"learning_rate": 0.0006311551563951688, |
|
"loss": 2.6687, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 3.1579191825359962, |
|
"grad_norm": 1.0229182243347168, |
|
"learning_rate": 0.0006317745432022298, |
|
"loss": 2.632, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.1610156370955256, |
|
"grad_norm": 0.9602491855621338, |
|
"learning_rate": 0.0006323939300092908, |
|
"loss": 2.6209, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 3.164112091655055, |
|
"grad_norm": 1.0441064834594727, |
|
"learning_rate": 0.0006330133168163518, |
|
"loss": 2.6421, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 3.1672085462145843, |
|
"grad_norm": 19.606216430664062, |
|
"learning_rate": 0.0006336327036234129, |
|
"loss": 2.6372, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 3.1703050007741136, |
|
"grad_norm": 1.115622878074646, |
|
"learning_rate": 0.0006342520904304738, |
|
"loss": 2.6775, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 3.173401455333643, |
|
"grad_norm": 1.1430797576904297, |
|
"learning_rate": 0.0006348714772375348, |
|
"loss": 2.6415, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 3.1764979098931723, |
|
"grad_norm": 7.035722255706787, |
|
"learning_rate": 0.0006354908640445959, |
|
"loss": 2.6995, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 3.1795943644527016, |
|
"grad_norm": 1.2375656366348267, |
|
"learning_rate": 0.0006361102508516569, |
|
"loss": 2.7278, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 3.182690819012231, |
|
"grad_norm": 1.0868054628372192, |
|
"learning_rate": 0.0006367296376587179, |
|
"loss": 2.6475, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 3.1857872735717603, |
|
"grad_norm": 1.0047295093536377, |
|
"learning_rate": 0.000637349024465779, |
|
"loss": 2.6195, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 3.1888837281312896, |
|
"grad_norm": 0.9876299500465393, |
|
"learning_rate": 0.0006379684112728399, |
|
"loss": 2.6392, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.191980182690819, |
|
"grad_norm": 1.021812081336975, |
|
"learning_rate": 0.0006385877980799008, |
|
"loss": 2.6468, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 3.1950766372503483, |
|
"grad_norm": 0.954329788684845, |
|
"learning_rate": 0.0006392071848869619, |
|
"loss": 2.6368, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 3.1981730918098776, |
|
"grad_norm": 0.9458587169647217, |
|
"learning_rate": 0.0006398265716940229, |
|
"loss": 2.6368, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 3.201269546369407, |
|
"grad_norm": 1.0526219606399536, |
|
"learning_rate": 0.0006404459585010839, |
|
"loss": 2.6389, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 3.2043660009289363, |
|
"grad_norm": 1.1330630779266357, |
|
"learning_rate": 0.0006410653453081449, |
|
"loss": 2.6666, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 3.2074624554884656, |
|
"grad_norm": 1.0522410869598389, |
|
"learning_rate": 0.0006416847321152059, |
|
"loss": 2.6405, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 3.210558910047995, |
|
"grad_norm": 0.973717451095581, |
|
"learning_rate": 0.0006423041189222669, |
|
"loss": 2.6198, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 3.2136553646075243, |
|
"grad_norm": 0.9188945889472961, |
|
"learning_rate": 0.000642923505729328, |
|
"loss": 2.6478, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 3.2167518191670537, |
|
"grad_norm": 0.9480977654457092, |
|
"learning_rate": 0.000643542892536389, |
|
"loss": 2.635, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 3.219848273726583, |
|
"grad_norm": 0.9224624633789062, |
|
"learning_rate": 0.0006441622793434499, |
|
"loss": 2.6362, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.2229447282861123, |
|
"grad_norm": 0.9448727965354919, |
|
"learning_rate": 0.000644781666150511, |
|
"loss": 2.6215, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 3.2260411828456417, |
|
"grad_norm": 0.9381209015846252, |
|
"learning_rate": 0.000645401052957572, |
|
"loss": 2.6305, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 3.229137637405171, |
|
"grad_norm": 1.0034310817718506, |
|
"learning_rate": 0.000646020439764633, |
|
"loss": 2.6344, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 3.2322340919647004, |
|
"grad_norm": 0.9512182474136353, |
|
"learning_rate": 0.0006466398265716941, |
|
"loss": 2.6397, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 3.2353305465242297, |
|
"grad_norm": 0.9563096761703491, |
|
"learning_rate": 0.0006472592133787551, |
|
"loss": 2.6354, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 3.238427001083759, |
|
"grad_norm": 1.0199220180511475, |
|
"learning_rate": 0.000647878600185816, |
|
"loss": 2.6379, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 3.2415234556432884, |
|
"grad_norm": 0.9473974108695984, |
|
"learning_rate": 0.0006484979869928771, |
|
"loss": 2.6262, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 3.2446199102028177, |
|
"grad_norm": 0.9869408011436462, |
|
"learning_rate": 0.0006491173737999381, |
|
"loss": 2.6495, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 3.247716364762347, |
|
"grad_norm": 0.9925758242607117, |
|
"learning_rate": 0.0006497367606069991, |
|
"loss": 2.631, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 3.2508128193218764, |
|
"grad_norm": 1.048644781112671, |
|
"learning_rate": 0.0006503561474140602, |
|
"loss": 2.6145, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.2539092738814057, |
|
"grad_norm": 0.9119939804077148, |
|
"learning_rate": 0.0006509755342211212, |
|
"loss": 2.6249, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 3.257005728440935, |
|
"grad_norm": 1.0462340116500854, |
|
"learning_rate": 0.000651594921028182, |
|
"loss": 2.6343, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 3.2601021830004644, |
|
"grad_norm": 0.9970148801803589, |
|
"learning_rate": 0.0006522143078352431, |
|
"loss": 2.6353, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 3.2631986375599937, |
|
"grad_norm": 0.9585279822349548, |
|
"learning_rate": 0.0006528336946423041, |
|
"loss": 2.6223, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 3.266295092119523, |
|
"grad_norm": 1.0489411354064941, |
|
"learning_rate": 0.0006534530814493651, |
|
"loss": 2.6246, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 3.2693915466790524, |
|
"grad_norm": 0.9942703247070312, |
|
"learning_rate": 0.0006540724682564262, |
|
"loss": 2.6163, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 3.2724880012385817, |
|
"grad_norm": 1.0939925909042358, |
|
"learning_rate": 0.0006546918550634871, |
|
"loss": 2.6485, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 3.275584455798111, |
|
"grad_norm": 0.9639611840248108, |
|
"learning_rate": 0.0006553112418705481, |
|
"loss": 2.6369, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 3.2786809103576404, |
|
"grad_norm": 1.003915786743164, |
|
"learning_rate": 0.0006559306286776092, |
|
"loss": 2.6324, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 3.2817773649171698, |
|
"grad_norm": 1.07323157787323, |
|
"learning_rate": 0.0006565500154846702, |
|
"loss": 2.6239, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.284873819476699, |
|
"grad_norm": 0.9782385230064392, |
|
"learning_rate": 0.0006571694022917312, |
|
"loss": 2.6276, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 3.2879702740362284, |
|
"grad_norm": 0.9947441816329956, |
|
"learning_rate": 0.0006577887890987922, |
|
"loss": 2.6258, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 3.2910667285957578, |
|
"grad_norm": 0.9401261806488037, |
|
"learning_rate": 0.0006584081759058532, |
|
"loss": 2.6054, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 3.294163183155287, |
|
"grad_norm": 0.9426921606063843, |
|
"learning_rate": 0.0006590275627129142, |
|
"loss": 2.634, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 3.2972596377148164, |
|
"grad_norm": 0.9457327127456665, |
|
"learning_rate": 0.0006596469495199753, |
|
"loss": 2.6319, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 3.300356092274346, |
|
"grad_norm": 1.1993708610534668, |
|
"learning_rate": 0.0006602663363270363, |
|
"loss": 2.6365, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 3.303452546833875, |
|
"grad_norm": 0.9889876842498779, |
|
"learning_rate": 0.0006608857231340973, |
|
"loss": 2.6503, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 3.3065490013934045, |
|
"grad_norm": 0.9788354635238647, |
|
"learning_rate": 0.0006615051099411583, |
|
"loss": 2.641, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 3.309645455952934, |
|
"grad_norm": 0.9262669682502747, |
|
"learning_rate": 0.0006621244967482193, |
|
"loss": 2.6473, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 3.312741910512463, |
|
"grad_norm": 0.9675087332725525, |
|
"learning_rate": 0.0006627438835552803, |
|
"loss": 2.6425, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.3158383650719925, |
|
"grad_norm": 0.9308109879493713, |
|
"learning_rate": 0.0006633632703623414, |
|
"loss": 2.6425, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 3.318934819631522, |
|
"grad_norm": 0.9837930202484131, |
|
"learning_rate": 0.0006639826571694024, |
|
"loss": 2.6309, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 3.322031274191051, |
|
"grad_norm": 0.9883390069007874, |
|
"learning_rate": 0.0006646020439764632, |
|
"loss": 2.5976, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 3.3251277287505805, |
|
"grad_norm": 0.9393827319145203, |
|
"learning_rate": 0.0006652214307835243, |
|
"loss": 2.6229, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 3.32822418331011, |
|
"grad_norm": 0.9329293370246887, |
|
"learning_rate": 0.0006658408175905853, |
|
"loss": 2.6102, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 3.331320637869639, |
|
"grad_norm": 0.8954689502716064, |
|
"learning_rate": 0.0006664602043976463, |
|
"loss": 2.6578, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 3.3344170924291685, |
|
"grad_norm": 0.92784583568573, |
|
"learning_rate": 0.0006670795912047074, |
|
"loss": 2.6127, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 3.337513546988698, |
|
"grad_norm": 0.9678082466125488, |
|
"learning_rate": 0.0006676989780117683, |
|
"loss": 2.6097, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 3.340610001548227, |
|
"grad_norm": 0.9594787955284119, |
|
"learning_rate": 0.0006683183648188293, |
|
"loss": 2.6068, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 3.3437064561077565, |
|
"grad_norm": 0.9914245009422302, |
|
"learning_rate": 0.0006689377516258904, |
|
"loss": 2.6173, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.346802910667286, |
|
"grad_norm": 0.9944581985473633, |
|
"learning_rate": 0.0006695571384329514, |
|
"loss": 2.6229, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 3.349899365226815, |
|
"grad_norm": 1.0383622646331787, |
|
"learning_rate": 0.0006701765252400124, |
|
"loss": 2.6341, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 3.3529958197863445, |
|
"grad_norm": 1.034728765487671, |
|
"learning_rate": 0.0006707959120470735, |
|
"loss": 2.637, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 3.356092274345874, |
|
"grad_norm": 1.0271577835083008, |
|
"learning_rate": 0.0006714152988541344, |
|
"loss": 2.6387, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 3.359188728905403, |
|
"grad_norm": 0.9783453345298767, |
|
"learning_rate": 0.0006720346856611954, |
|
"loss": 2.5978, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 3.3622851834649325, |
|
"grad_norm": 0.9792416095733643, |
|
"learning_rate": 0.0006726540724682565, |
|
"loss": 2.6349, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 3.365381638024462, |
|
"grad_norm": 0.9500912427902222, |
|
"learning_rate": 0.0006732734592753175, |
|
"loss": 2.6087, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 3.3684780925839912, |
|
"grad_norm": 0.9641538262367249, |
|
"learning_rate": 0.0006738928460823785, |
|
"loss": 2.613, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 3.3715745471435206, |
|
"grad_norm": 0.957671582698822, |
|
"learning_rate": 0.0006745122328894394, |
|
"loss": 2.6215, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 3.37467100170305, |
|
"grad_norm": 1.0475072860717773, |
|
"learning_rate": 0.0006751316196965005, |
|
"loss": 2.6092, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.3777674562625792, |
|
"grad_norm": 0.96811842918396, |
|
"learning_rate": 0.0006757510065035615, |
|
"loss": 2.6134, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 3.3808639108221086, |
|
"grad_norm": 1.0156564712524414, |
|
"learning_rate": 0.0006763703933106225, |
|
"loss": 2.6112, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 3.383960365381638, |
|
"grad_norm": 1.0434483289718628, |
|
"learning_rate": 0.0006769897801176836, |
|
"loss": 2.6183, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 3.3870568199411673, |
|
"grad_norm": 0.9763379096984863, |
|
"learning_rate": 0.0006776091669247445, |
|
"loss": 2.6063, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 3.3901532745006966, |
|
"grad_norm": 1.0185160636901855, |
|
"learning_rate": 0.0006782285537318054, |
|
"loss": 2.6491, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 3.393249729060226, |
|
"grad_norm": 0.9660173058509827, |
|
"learning_rate": 0.0006788479405388665, |
|
"loss": 2.6164, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 3.3963461836197553, |
|
"grad_norm": 0.9487093091011047, |
|
"learning_rate": 0.0006794673273459275, |
|
"loss": 2.614, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 3.3994426381792846, |
|
"grad_norm": 0.9912219643592834, |
|
"learning_rate": 0.0006800867141529885, |
|
"loss": 2.6369, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 3.402539092738814, |
|
"grad_norm": 0.9763176441192627, |
|
"learning_rate": 0.0006807061009600496, |
|
"loss": 2.6003, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 3.4056355472983433, |
|
"grad_norm": 1.007444977760315, |
|
"learning_rate": 0.0006813254877671105, |
|
"loss": 2.6294, |
|
"step": 11000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 161450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3128782178793882e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|