|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.04671990572961244, |
|
"eval_steps": 500, |
|
"global_step": 18000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00012977751591559012, |
|
"grad_norm": 29.529769897460938, |
|
"learning_rate": 2.1196980511755674e-09, |
|
"loss": 2.2098, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00025955503183118023, |
|
"grad_norm": 30.7341251373291, |
|
"learning_rate": 4.282655246252677e-09, |
|
"loss": 2.2367, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00038933254774677035, |
|
"grad_norm": 29.822181701660156, |
|
"learning_rate": 6.4456124413297865e-09, |
|
"loss": 2.198, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0005191100636623605, |
|
"grad_norm": 28.564294815063477, |
|
"learning_rate": 8.608569636406895e-09, |
|
"loss": 2.2167, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0006488875795779505, |
|
"grad_norm": 28.514026641845703, |
|
"learning_rate": 1.0771526831484006e-08, |
|
"loss": 2.2001, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0007786650954935407, |
|
"grad_norm": 29.033863067626953, |
|
"learning_rate": 1.2934484026561114e-08, |
|
"loss": 2.1908, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0009084426114091308, |
|
"grad_norm": 4.49394416809082, |
|
"learning_rate": 1.5097441221638225e-08, |
|
"loss": 2.1841, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.001038220127324721, |
|
"grad_norm": 2.1610796451568604, |
|
"learning_rate": 1.7260398416715337e-08, |
|
"loss": 2.1702, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0011679976432403109, |
|
"grad_norm": 1.9879209995269775, |
|
"learning_rate": 1.9423355611792444e-08, |
|
"loss": 2.1613, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.001297775159155901, |
|
"grad_norm": 1.880299687385559, |
|
"learning_rate": 2.1586312806869556e-08, |
|
"loss": 2.1529, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0014275526750714912, |
|
"grad_norm": 1.7597101926803589, |
|
"learning_rate": 2.3749270001946664e-08, |
|
"loss": 2.1641, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0015573301909870814, |
|
"grad_norm": 1.5660640001296997, |
|
"learning_rate": 2.591222719702377e-08, |
|
"loss": 2.1501, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0016871077069026714, |
|
"grad_norm": 1.5481892824172974, |
|
"learning_rate": 2.8075184392100883e-08, |
|
"loss": 2.1583, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0018168852228182615, |
|
"grad_norm": 1.525515079498291, |
|
"learning_rate": 3.0238141587177994e-08, |
|
"loss": 2.1547, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0019466627387338517, |
|
"grad_norm": 1.4705111980438232, |
|
"learning_rate": 3.24010987822551e-08, |
|
"loss": 2.1365, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.002076440254649442, |
|
"grad_norm": 1.3471802473068237, |
|
"learning_rate": 3.4564055977332216e-08, |
|
"loss": 2.1777, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.002206217770565032, |
|
"grad_norm": 1.3963252305984497, |
|
"learning_rate": 3.6727013172409324e-08, |
|
"loss": 2.1356, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0023359952864806218, |
|
"grad_norm": 1.2665836811065674, |
|
"learning_rate": 3.888997036748643e-08, |
|
"loss": 2.1565, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.002465772802396212, |
|
"grad_norm": 1.4085878133773804, |
|
"learning_rate": 4.105292756256354e-08, |
|
"loss": 2.1497, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.002595550318311802, |
|
"grad_norm": 1.4020620584487915, |
|
"learning_rate": 4.3215884757640654e-08, |
|
"loss": 2.1549, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.002725327834227392, |
|
"grad_norm": 1.5391197204589844, |
|
"learning_rate": 4.5378841952717755e-08, |
|
"loss": 2.1519, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.0028551053501429825, |
|
"grad_norm": 1.2532999515533447, |
|
"learning_rate": 4.754179914779487e-08, |
|
"loss": 2.1411, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0029848828660585724, |
|
"grad_norm": 1.2143096923828125, |
|
"learning_rate": 4.970475634287198e-08, |
|
"loss": 2.1435, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.003114660381974163, |
|
"grad_norm": 1.4227972030639648, |
|
"learning_rate": 5.1867713537949086e-08, |
|
"loss": 2.142, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0032444378978897528, |
|
"grad_norm": 1.0749537944793701, |
|
"learning_rate": 5.40306707330262e-08, |
|
"loss": 2.1134, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.0033742154138053427, |
|
"grad_norm": 1.169149398803711, |
|
"learning_rate": 5.619362792810331e-08, |
|
"loss": 2.1215, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.003503992929720933, |
|
"grad_norm": 1.3452564477920532, |
|
"learning_rate": 5.8356585123180416e-08, |
|
"loss": 2.1524, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.003633770445636523, |
|
"grad_norm": 1.3610825538635254, |
|
"learning_rate": 6.051954231825752e-08, |
|
"loss": 2.1389, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.003763547961552113, |
|
"grad_norm": 1.2172240018844604, |
|
"learning_rate": 6.268249951333464e-08, |
|
"loss": 2.1428, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.0038933254774677034, |
|
"grad_norm": 1.1681610345840454, |
|
"learning_rate": 6.484545670841174e-08, |
|
"loss": 2.1253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.004023102993383294, |
|
"grad_norm": 1.0709235668182373, |
|
"learning_rate": 6.700841390348886e-08, |
|
"loss": 2.1533, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.004152880509298884, |
|
"grad_norm": 1.2001404762268066, |
|
"learning_rate": 6.917137109856597e-08, |
|
"loss": 2.1371, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.004282658025214474, |
|
"grad_norm": 1.2642266750335693, |
|
"learning_rate": 7.133432829364308e-08, |
|
"loss": 2.129, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.004412435541130064, |
|
"grad_norm": 1.030280590057373, |
|
"learning_rate": 7.349728548872018e-08, |
|
"loss": 2.128, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.004542213057045654, |
|
"grad_norm": 1.4037690162658691, |
|
"learning_rate": 7.566024268379729e-08, |
|
"loss": 2.1383, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.0046719905729612436, |
|
"grad_norm": 1.1600065231323242, |
|
"learning_rate": 7.78231998788744e-08, |
|
"loss": 2.1193, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.004801768088876834, |
|
"grad_norm": 1.2075531482696533, |
|
"learning_rate": 7.998615707395151e-08, |
|
"loss": 2.1126, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.004931545604792424, |
|
"grad_norm": 1.1088111400604248, |
|
"learning_rate": 8.214911426902863e-08, |
|
"loss": 2.1394, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.005061323120708014, |
|
"grad_norm": 1.195092797279358, |
|
"learning_rate": 8.431207146410574e-08, |
|
"loss": 2.1359, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.005191100636623604, |
|
"grad_norm": 1.1595982313156128, |
|
"learning_rate": 8.647502865918283e-08, |
|
"loss": 2.1366, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.005320878152539194, |
|
"grad_norm": 1.1727768182754517, |
|
"learning_rate": 8.863798585425995e-08, |
|
"loss": 2.1106, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.005450655668454784, |
|
"grad_norm": 1.2424023151397705, |
|
"learning_rate": 9.080094304933706e-08, |
|
"loss": 2.1339, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.005580433184370375, |
|
"grad_norm": 1.2954424619674683, |
|
"learning_rate": 9.296390024441417e-08, |
|
"loss": 2.1107, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.005710210700285965, |
|
"grad_norm": 1.0388058423995972, |
|
"learning_rate": 9.512685743949129e-08, |
|
"loss": 2.118, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.005839988216201555, |
|
"grad_norm": 4.136488914489746, |
|
"learning_rate": 9.728981463456838e-08, |
|
"loss": 2.1159, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.005969765732117145, |
|
"grad_norm": 1.0597412586212158, |
|
"learning_rate": 9.945277182964549e-08, |
|
"loss": 2.1303, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.006099543248032735, |
|
"grad_norm": 1.0780799388885498, |
|
"learning_rate": 1.0161572902472261e-07, |
|
"loss": 2.1095, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.006229320763948326, |
|
"grad_norm": 1.2739732265472412, |
|
"learning_rate": 1.0377868621979972e-07, |
|
"loss": 2.1264, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0063590982798639156, |
|
"grad_norm": 3.3836748600006104, |
|
"learning_rate": 1.0594164341487683e-07, |
|
"loss": 2.1183, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.0064888757957795055, |
|
"grad_norm": 1.269440770149231, |
|
"learning_rate": 1.0810460060995395e-07, |
|
"loss": 2.1243, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0066186533116950955, |
|
"grad_norm": 1.1834193468093872, |
|
"learning_rate": 1.1026755780503104e-07, |
|
"loss": 2.1207, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.006748430827610685, |
|
"grad_norm": 1.2411448955535889, |
|
"learning_rate": 1.1243051500010815e-07, |
|
"loss": 2.1201, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.006878208343526275, |
|
"grad_norm": 1.1330574750900269, |
|
"learning_rate": 1.1459347219518527e-07, |
|
"loss": 2.1125, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.007007985859441866, |
|
"grad_norm": 1.199055790901184, |
|
"learning_rate": 1.1675642939026238e-07, |
|
"loss": 2.13, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.007137763375357456, |
|
"grad_norm": 1.2272251844406128, |
|
"learning_rate": 1.1891938658533949e-07, |
|
"loss": 2.1239, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.007267540891273046, |
|
"grad_norm": 1.1257809400558472, |
|
"learning_rate": 1.2108234378041658e-07, |
|
"loss": 2.12, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.007397318407188636, |
|
"grad_norm": 1.1028845310211182, |
|
"learning_rate": 1.2324530097549372e-07, |
|
"loss": 2.1227, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.007527095923104226, |
|
"grad_norm": 1.2196507453918457, |
|
"learning_rate": 1.254082581705708e-07, |
|
"loss": 2.1241, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.007656873439019816, |
|
"grad_norm": 1.0342847108840942, |
|
"learning_rate": 1.275712153656479e-07, |
|
"loss": 2.1148, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.007786650954935407, |
|
"grad_norm": 1.1587927341461182, |
|
"learning_rate": 1.2973417256072504e-07, |
|
"loss": 2.1293, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.007916428470850997, |
|
"grad_norm": 1.1751174926757812, |
|
"learning_rate": 1.3189712975580215e-07, |
|
"loss": 2.1084, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.008046205986766588, |
|
"grad_norm": 12.056378364562988, |
|
"learning_rate": 1.3406008695087926e-07, |
|
"loss": 2.103, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.008175983502682177, |
|
"grad_norm": 1.0078307390213013, |
|
"learning_rate": 1.3622304414595637e-07, |
|
"loss": 2.1345, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.008305761018597767, |
|
"grad_norm": 1.0840246677398682, |
|
"learning_rate": 1.3838600134103347e-07, |
|
"loss": 2.1151, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.008435538534513357, |
|
"grad_norm": 1.213310956954956, |
|
"learning_rate": 1.4054895853611058e-07, |
|
"loss": 2.1275, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.008565316050428947, |
|
"grad_norm": 1.2903615236282349, |
|
"learning_rate": 1.427119157311877e-07, |
|
"loss": 2.1105, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.008695093566344536, |
|
"grad_norm": 1.1746351718902588, |
|
"learning_rate": 1.448748729262648e-07, |
|
"loss": 2.1344, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.008824871082260127, |
|
"grad_norm": 1.1928184032440186, |
|
"learning_rate": 1.470378301213419e-07, |
|
"loss": 2.1278, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.008954648598175718, |
|
"grad_norm": 1.1132676601409912, |
|
"learning_rate": 1.4920078731641904e-07, |
|
"loss": 2.1134, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.009084426114091307, |
|
"grad_norm": 1.4539573192596436, |
|
"learning_rate": 1.5136374451149612e-07, |
|
"loss": 2.0943, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.009214203630006898, |
|
"grad_norm": 1.176128625869751, |
|
"learning_rate": 1.5352670170657323e-07, |
|
"loss": 2.1025, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.009343981145922487, |
|
"grad_norm": 1.0798020362854004, |
|
"learning_rate": 1.5568965890165036e-07, |
|
"loss": 2.0946, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.009473758661838078, |
|
"grad_norm": 1.1570450067520142, |
|
"learning_rate": 1.5785261609672747e-07, |
|
"loss": 2.1119, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.009603536177753669, |
|
"grad_norm": 0.9933484792709351, |
|
"learning_rate": 1.6001557329180458e-07, |
|
"loss": 2.1171, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.009733313693669258, |
|
"grad_norm": 1.1642405986785889, |
|
"learning_rate": 1.6217853048688166e-07, |
|
"loss": 2.1047, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.009863091209584849, |
|
"grad_norm": 1.266423225402832, |
|
"learning_rate": 1.643414876819588e-07, |
|
"loss": 2.0984, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.009992868725500438, |
|
"grad_norm": 1.0179153680801392, |
|
"learning_rate": 1.665044448770359e-07, |
|
"loss": 2.0977, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.010122646241416029, |
|
"grad_norm": 1.0938276052474976, |
|
"learning_rate": 1.68667402072113e-07, |
|
"loss": 2.1249, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.01025242375733162, |
|
"grad_norm": 1.1981333494186401, |
|
"learning_rate": 1.7083035926719012e-07, |
|
"loss": 2.1186, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.010382201273247208, |
|
"grad_norm": 1.238035798072815, |
|
"learning_rate": 1.7299331646226725e-07, |
|
"loss": 2.1254, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0105119787891628, |
|
"grad_norm": 1.0942214727401733, |
|
"learning_rate": 1.7515627365734433e-07, |
|
"loss": 2.0972, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.010641756305078388, |
|
"grad_norm": 1.1418002843856812, |
|
"learning_rate": 1.7731923085242144e-07, |
|
"loss": 2.1286, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.01077153382099398, |
|
"grad_norm": 2.164984703063965, |
|
"learning_rate": 1.7948218804749858e-07, |
|
"loss": 2.1195, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.010901311336909568, |
|
"grad_norm": 1.119779109954834, |
|
"learning_rate": 1.8164514524257566e-07, |
|
"loss": 2.1242, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.011031088852825159, |
|
"grad_norm": 1.0193780660629272, |
|
"learning_rate": 1.8380810243765277e-07, |
|
"loss": 2.0951, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.01116086636874075, |
|
"grad_norm": 1.1878997087478638, |
|
"learning_rate": 1.859710596327299e-07, |
|
"loss": 2.1067, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.011290643884656339, |
|
"grad_norm": 1.048115849494934, |
|
"learning_rate": 1.8813401682780698e-07, |
|
"loss": 2.0861, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.01142042140057193, |
|
"grad_norm": 1.1059479713439941, |
|
"learning_rate": 1.9029697402288412e-07, |
|
"loss": 2.1204, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.011550198916487519, |
|
"grad_norm": 0.942563533782959, |
|
"learning_rate": 1.9245993121796122e-07, |
|
"loss": 2.1256, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.01167997643240311, |
|
"grad_norm": 3.292470932006836, |
|
"learning_rate": 1.946228884130383e-07, |
|
"loss": 2.0964, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0118097539483187, |
|
"grad_norm": 1.131181001663208, |
|
"learning_rate": 1.9678584560811544e-07, |
|
"loss": 2.1079, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.01193953146423429, |
|
"grad_norm": 1.1684244871139526, |
|
"learning_rate": 1.9894880280319255e-07, |
|
"loss": 2.0918, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.01206930898014988, |
|
"grad_norm": 1.094470500946045, |
|
"learning_rate": 2.0111175999826965e-07, |
|
"loss": 2.1129, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.01219908649606547, |
|
"grad_norm": 1.1580520868301392, |
|
"learning_rate": 2.0327471719334676e-07, |
|
"loss": 2.1175, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.01232886401198106, |
|
"grad_norm": 1.110461711883545, |
|
"learning_rate": 2.054376743884239e-07, |
|
"loss": 2.1188, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.012458641527896651, |
|
"grad_norm": 1.1533987522125244, |
|
"learning_rate": 2.0760063158350098e-07, |
|
"loss": 2.1072, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.01258841904381224, |
|
"grad_norm": 1.3358995914459229, |
|
"learning_rate": 2.0976358877857809e-07, |
|
"loss": 2.1031, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.012718196559727831, |
|
"grad_norm": 1.100576639175415, |
|
"learning_rate": 2.119265459736552e-07, |
|
"loss": 2.0903, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.01284797407564342, |
|
"grad_norm": 1.1505376100540161, |
|
"learning_rate": 2.140895031687323e-07, |
|
"loss": 2.123, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.012977751591559011, |
|
"grad_norm": 1.063899278640747, |
|
"learning_rate": 2.1625246036380944e-07, |
|
"loss": 2.0802, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0131075291074746, |
|
"grad_norm": 0.9469916224479675, |
|
"learning_rate": 2.1841541755888652e-07, |
|
"loss": 2.0917, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.013237306623390191, |
|
"grad_norm": 1.1475921869277954, |
|
"learning_rate": 2.2057837475396363e-07, |
|
"loss": 2.116, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.013367084139305782, |
|
"grad_norm": 1.5712428092956543, |
|
"learning_rate": 2.2274133194904076e-07, |
|
"loss": 2.1019, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.01349686165522137, |
|
"grad_norm": 1.0301058292388916, |
|
"learning_rate": 2.2490428914411784e-07, |
|
"loss": 2.0985, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.013626639171136962, |
|
"grad_norm": 1.1751989126205444, |
|
"learning_rate": 2.2706724633919495e-07, |
|
"loss": 2.1023, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.01375641668705255, |
|
"grad_norm": 2.097857713699341, |
|
"learning_rate": 2.2923020353427208e-07, |
|
"loss": 2.1226, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.013886194202968142, |
|
"grad_norm": 1.0331202745437622, |
|
"learning_rate": 2.3139316072934916e-07, |
|
"loss": 2.1205, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.014015971718883732, |
|
"grad_norm": 1.0770936012268066, |
|
"learning_rate": 2.335561179244263e-07, |
|
"loss": 2.1037, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.014145749234799321, |
|
"grad_norm": 1.1082065105438232, |
|
"learning_rate": 2.357190751195034e-07, |
|
"loss": 2.1027, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.014275526750714912, |
|
"grad_norm": 1.2101250886917114, |
|
"learning_rate": 2.378820323145805e-07, |
|
"loss": 2.1103, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.014405304266630501, |
|
"grad_norm": 1.163560152053833, |
|
"learning_rate": 2.400449895096576e-07, |
|
"loss": 2.1035, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.014535081782546092, |
|
"grad_norm": 1.1837356090545654, |
|
"learning_rate": 2.4220794670473476e-07, |
|
"loss": 2.0891, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.014664859298461683, |
|
"grad_norm": 1.2113701105117798, |
|
"learning_rate": 2.4437090389981184e-07, |
|
"loss": 2.1096, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.014794636814377272, |
|
"grad_norm": 1.1284654140472412, |
|
"learning_rate": 2.4653386109488897e-07, |
|
"loss": 2.0948, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.014924414330292863, |
|
"grad_norm": 1.4470899105072021, |
|
"learning_rate": 2.4869681828996605e-07, |
|
"loss": 2.0954, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.015054191846208452, |
|
"grad_norm": 1.0791606903076172, |
|
"learning_rate": 2.5085977548504314e-07, |
|
"loss": 2.1022, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.015183969362124043, |
|
"grad_norm": 1.1813191175460815, |
|
"learning_rate": 2.5302273268012027e-07, |
|
"loss": 2.1102, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.015313746878039632, |
|
"grad_norm": 1.1993714570999146, |
|
"learning_rate": 2.551856898751974e-07, |
|
"loss": 2.0967, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.015443524393955223, |
|
"grad_norm": 1.1809765100479126, |
|
"learning_rate": 2.573486470702745e-07, |
|
"loss": 2.1054, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.015573301909870814, |
|
"grad_norm": 1.0799180269241333, |
|
"learning_rate": 2.595116042653516e-07, |
|
"loss": 2.1144, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.015703079425786404, |
|
"grad_norm": 1.0349640846252441, |
|
"learning_rate": 2.6167456146042875e-07, |
|
"loss": 2.0978, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.015832856941701993, |
|
"grad_norm": 0.9997969269752502, |
|
"learning_rate": 2.6383751865550584e-07, |
|
"loss": 2.1274, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.015962634457617583, |
|
"grad_norm": 1.3014293909072876, |
|
"learning_rate": 2.660004758505829e-07, |
|
"loss": 2.0857, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.016092411973533175, |
|
"grad_norm": 1.1863785982131958, |
|
"learning_rate": 2.6816343304566005e-07, |
|
"loss": 2.1051, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.016222189489448764, |
|
"grad_norm": 1.6769137382507324, |
|
"learning_rate": 2.7032639024073713e-07, |
|
"loss": 2.0934, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.016351967005364353, |
|
"grad_norm": 1.0514180660247803, |
|
"learning_rate": 2.7248934743581427e-07, |
|
"loss": 2.0948, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.016481744521279942, |
|
"grad_norm": 1.0475189685821533, |
|
"learning_rate": 2.746523046308914e-07, |
|
"loss": 2.088, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.016611522037195535, |
|
"grad_norm": 2.3959105014801025, |
|
"learning_rate": 2.768152618259685e-07, |
|
"loss": 2.1053, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.016741299553111124, |
|
"grad_norm": 1.291269302368164, |
|
"learning_rate": 2.789782190210456e-07, |
|
"loss": 2.0576, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.016871077069026713, |
|
"grad_norm": 3.1083991527557373, |
|
"learning_rate": 2.811411762161227e-07, |
|
"loss": 2.103, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.017000854584942306, |
|
"grad_norm": 1.2403531074523926, |
|
"learning_rate": 2.833041334111998e-07, |
|
"loss": 2.1099, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.017130632100857895, |
|
"grad_norm": 1.0552589893341064, |
|
"learning_rate": 2.854670906062769e-07, |
|
"loss": 2.1089, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.017260409616773484, |
|
"grad_norm": 1.15003502368927, |
|
"learning_rate": 2.8763004780135405e-07, |
|
"loss": 2.1039, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.017390187132689073, |
|
"grad_norm": 1.0832091569900513, |
|
"learning_rate": 2.8979300499643113e-07, |
|
"loss": 2.1084, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.017519964648604665, |
|
"grad_norm": 1.3141324520111084, |
|
"learning_rate": 2.9195596219150826e-07, |
|
"loss": 2.1135, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.017649742164520255, |
|
"grad_norm": 1.1030374765396118, |
|
"learning_rate": 2.941189193865854e-07, |
|
"loss": 2.1015, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.017779519680435844, |
|
"grad_norm": 1.1264283657073975, |
|
"learning_rate": 2.962818765816625e-07, |
|
"loss": 2.1046, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.017909297196351436, |
|
"grad_norm": 1.0352332592010498, |
|
"learning_rate": 2.9844483377673956e-07, |
|
"loss": 2.1123, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.018039074712267025, |
|
"grad_norm": 1.6451269388198853, |
|
"learning_rate": 3.006077909718167e-07, |
|
"loss": 2.0875, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.018168852228182614, |
|
"grad_norm": 1.0955110788345337, |
|
"learning_rate": 3.027707481668938e-07, |
|
"loss": 2.0838, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.018298629744098207, |
|
"grad_norm": 1.128531813621521, |
|
"learning_rate": 3.049337053619709e-07, |
|
"loss": 2.1029, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.018428407260013796, |
|
"grad_norm": 1.0489044189453125, |
|
"learning_rate": 3.0709666255704805e-07, |
|
"loss": 2.0908, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.018558184775929385, |
|
"grad_norm": 1.0876027345657349, |
|
"learning_rate": 3.0925961975212513e-07, |
|
"loss": 2.0817, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.018687962291844974, |
|
"grad_norm": 1.025060772895813, |
|
"learning_rate": 3.1142257694720226e-07, |
|
"loss": 2.0924, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.018817739807760567, |
|
"grad_norm": 1.3133209943771362, |
|
"learning_rate": 3.1358553414227934e-07, |
|
"loss": 2.0848, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.018947517323676156, |
|
"grad_norm": 1.159995436668396, |
|
"learning_rate": 3.157484913373565e-07, |
|
"loss": 2.0938, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.019077294839591745, |
|
"grad_norm": 1.151329755783081, |
|
"learning_rate": 3.1791144853243356e-07, |
|
"loss": 2.1002, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.019207072355507337, |
|
"grad_norm": 1.123695731163025, |
|
"learning_rate": 3.200744057275107e-07, |
|
"loss": 2.0951, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.019336849871422927, |
|
"grad_norm": 1.143547534942627, |
|
"learning_rate": 3.222373629225878e-07, |
|
"loss": 2.1034, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.019466627387338516, |
|
"grad_norm": 1.093329906463623, |
|
"learning_rate": 3.2440032011766486e-07, |
|
"loss": 2.0921, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.019596404903254105, |
|
"grad_norm": 1.3572251796722412, |
|
"learning_rate": 3.2656327731274204e-07, |
|
"loss": 2.0902, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.019726182419169697, |
|
"grad_norm": 1.146531343460083, |
|
"learning_rate": 3.287262345078191e-07, |
|
"loss": 2.0899, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.019855959935085286, |
|
"grad_norm": 1.0585743188858032, |
|
"learning_rate": 3.308891917028962e-07, |
|
"loss": 2.108, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.019985737451000875, |
|
"grad_norm": 1.1923290491104126, |
|
"learning_rate": 3.3305214889797334e-07, |
|
"loss": 2.098, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.020115514966916468, |
|
"grad_norm": 1.1357568502426147, |
|
"learning_rate": 3.352151060930504e-07, |
|
"loss": 2.1021, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.020245292482832057, |
|
"grad_norm": 1.2182716131210327, |
|
"learning_rate": 3.373780632881275e-07, |
|
"loss": 2.0816, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.020375069998747646, |
|
"grad_norm": 1.1091363430023193, |
|
"learning_rate": 3.395410204832047e-07, |
|
"loss": 2.0832, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.02050484751466324, |
|
"grad_norm": 1.1325336694717407, |
|
"learning_rate": 3.4170397767828177e-07, |
|
"loss": 2.0872, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.020634625030578828, |
|
"grad_norm": 1.020922064781189, |
|
"learning_rate": 3.4386693487335885e-07, |
|
"loss": 2.0798, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.020764402546494417, |
|
"grad_norm": 1.1414934396743774, |
|
"learning_rate": 3.46029892068436e-07, |
|
"loss": 2.0745, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.020894180062410006, |
|
"grad_norm": 1.1155861616134644, |
|
"learning_rate": 3.481928492635131e-07, |
|
"loss": 2.0975, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.0210239575783256, |
|
"grad_norm": 0.9747071266174316, |
|
"learning_rate": 3.503558064585902e-07, |
|
"loss": 2.0796, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.021153735094241188, |
|
"grad_norm": 1.2895739078521729, |
|
"learning_rate": 3.5251876365366734e-07, |
|
"loss": 2.0951, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.021283512610156777, |
|
"grad_norm": 1.147414207458496, |
|
"learning_rate": 3.546817208487444e-07, |
|
"loss": 2.0955, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.02141329012607237, |
|
"grad_norm": 1.1811184883117676, |
|
"learning_rate": 3.568446780438215e-07, |
|
"loss": 2.0895, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.02154306764198796, |
|
"grad_norm": 1.2630963325500488, |
|
"learning_rate": 3.590076352388987e-07, |
|
"loss": 2.0668, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.021672845157903547, |
|
"grad_norm": 1.0883618593215942, |
|
"learning_rate": 3.6117059243397577e-07, |
|
"loss": 2.0981, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.021802622673819137, |
|
"grad_norm": 1.2500261068344116, |
|
"learning_rate": 3.6333354962905285e-07, |
|
"loss": 2.0911, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.02193240018973473, |
|
"grad_norm": 1.133091926574707, |
|
"learning_rate": 3.6549650682413e-07, |
|
"loss": 2.082, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.022062177705650318, |
|
"grad_norm": 1.1500440835952759, |
|
"learning_rate": 3.6765946401920707e-07, |
|
"loss": 2.091, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.022191955221565907, |
|
"grad_norm": 1.1573790311813354, |
|
"learning_rate": 3.6982242121428415e-07, |
|
"loss": 2.091, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.0223217327374815, |
|
"grad_norm": 1.0022162199020386, |
|
"learning_rate": 3.7198537840936134e-07, |
|
"loss": 2.0734, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.02245151025339709, |
|
"grad_norm": 1.2101961374282837, |
|
"learning_rate": 3.741483356044384e-07, |
|
"loss": 2.0976, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.022581287769312678, |
|
"grad_norm": 1.1183929443359375, |
|
"learning_rate": 3.763112927995155e-07, |
|
"loss": 2.0742, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.02271106528522827, |
|
"grad_norm": 1.1698428392410278, |
|
"learning_rate": 3.784742499945927e-07, |
|
"loss": 2.1189, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.02284084280114386, |
|
"grad_norm": 1.238348126411438, |
|
"learning_rate": 3.8063720718966977e-07, |
|
"loss": 2.0864, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.02297062031705945, |
|
"grad_norm": 1.0891568660736084, |
|
"learning_rate": 3.8280016438474685e-07, |
|
"loss": 2.071, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.023100397832975038, |
|
"grad_norm": 1.0950003862380981, |
|
"learning_rate": 3.84963121579824e-07, |
|
"loss": 2.0944, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.02323017534889063, |
|
"grad_norm": 1.0031663179397583, |
|
"learning_rate": 3.8712607877490106e-07, |
|
"loss": 2.0688, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.02335995286480622, |
|
"grad_norm": 1.1025946140289307, |
|
"learning_rate": 3.8928903596997815e-07, |
|
"loss": 2.0853, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.02348973038072181, |
|
"grad_norm": 1.0795261859893799, |
|
"learning_rate": 3.9145199316505533e-07, |
|
"loss": 2.0813, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.0236195078966374, |
|
"grad_norm": 1.1669412851333618, |
|
"learning_rate": 3.936149503601324e-07, |
|
"loss": 2.0802, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.02374928541255299, |
|
"grad_norm": 1.186626672744751, |
|
"learning_rate": 3.957779075552095e-07, |
|
"loss": 2.1015, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.02387906292846858, |
|
"grad_norm": 1.053902506828308, |
|
"learning_rate": 3.9794086475028663e-07, |
|
"loss": 2.1003, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.02400884044438417, |
|
"grad_norm": 1.1948777437210083, |
|
"learning_rate": 4.001038219453637e-07, |
|
"loss": 2.0914, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.02413861796029976, |
|
"grad_norm": 1.0830193758010864, |
|
"learning_rate": 4.0226677914044085e-07, |
|
"loss": 2.0892, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.02426839547621535, |
|
"grad_norm": 1.0737528800964355, |
|
"learning_rate": 4.04429736335518e-07, |
|
"loss": 2.0949, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.02439817299213094, |
|
"grad_norm": 1.2443790435791016, |
|
"learning_rate": 4.0659269353059506e-07, |
|
"loss": 2.0725, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.02452795050804653, |
|
"grad_norm": 0.9910159111022949, |
|
"learning_rate": 4.0875565072567214e-07, |
|
"loss": 2.092, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.02465772802396212, |
|
"grad_norm": 1.1105308532714844, |
|
"learning_rate": 4.1091860792074933e-07, |
|
"loss": 2.0894, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.02478750553987771, |
|
"grad_norm": 1.3401215076446533, |
|
"learning_rate": 4.130815651158264e-07, |
|
"loss": 2.091, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.024917283055793302, |
|
"grad_norm": 1.1136138439178467, |
|
"learning_rate": 4.152445223109035e-07, |
|
"loss": 2.1022, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.02504706057170889, |
|
"grad_norm": 1.1129764318466187, |
|
"learning_rate": 4.1740747950598063e-07, |
|
"loss": 2.0841, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.02517683808762448, |
|
"grad_norm": 1.1361297369003296, |
|
"learning_rate": 4.195704367010577e-07, |
|
"loss": 2.0987, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.02530661560354007, |
|
"grad_norm": 1.2290136814117432, |
|
"learning_rate": 4.217333938961348e-07, |
|
"loss": 2.0967, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.025436393119455662, |
|
"grad_norm": 1.1932119131088257, |
|
"learning_rate": 4.23896351091212e-07, |
|
"loss": 2.1018, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.02556617063537125, |
|
"grad_norm": 1.1398112773895264, |
|
"learning_rate": 4.2605930828628906e-07, |
|
"loss": 2.076, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.02569594815128684, |
|
"grad_norm": 1.255175232887268, |
|
"learning_rate": 4.2822226548136614e-07, |
|
"loss": 2.0979, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.025825725667202433, |
|
"grad_norm": 1.063835620880127, |
|
"learning_rate": 4.303852226764433e-07, |
|
"loss": 2.0982, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.025955503183118022, |
|
"grad_norm": 1.0199131965637207, |
|
"learning_rate": 4.3254817987152036e-07, |
|
"loss": 2.077, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.02608528069903361, |
|
"grad_norm": 1.2938398122787476, |
|
"learning_rate": 4.347111370665975e-07, |
|
"loss": 2.0966, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.0262150582149492, |
|
"grad_norm": 1.2516087293624878, |
|
"learning_rate": 4.368740942616746e-07, |
|
"loss": 2.0996, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.026344835730864793, |
|
"grad_norm": 1.140458345413208, |
|
"learning_rate": 4.390370514567517e-07, |
|
"loss": 2.0862, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.026474613246780382, |
|
"grad_norm": 1.245771884918213, |
|
"learning_rate": 4.412000086518288e-07, |
|
"loss": 2.095, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.02660439076269597, |
|
"grad_norm": 1.1775400638580322, |
|
"learning_rate": 4.43362965846906e-07, |
|
"loss": 2.0818, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.026734168278611564, |
|
"grad_norm": 1.043639898300171, |
|
"learning_rate": 4.4552592304198306e-07, |
|
"loss": 2.075, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.026863945794527153, |
|
"grad_norm": 1.0813723802566528, |
|
"learning_rate": 4.4768888023706014e-07, |
|
"loss": 2.0525, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.02699372331044274, |
|
"grad_norm": 1.0008471012115479, |
|
"learning_rate": 4.4985183743213727e-07, |
|
"loss": 2.0835, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.027123500826358334, |
|
"grad_norm": 1.2055691480636597, |
|
"learning_rate": 4.5201479462721435e-07, |
|
"loss": 2.0877, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.027253278342273923, |
|
"grad_norm": 1.2838592529296875, |
|
"learning_rate": 4.5417775182229143e-07, |
|
"loss": 2.0844, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.027383055858189512, |
|
"grad_norm": 1.2694274187088013, |
|
"learning_rate": 4.563407090173686e-07, |
|
"loss": 2.0791, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.0275128333741051, |
|
"grad_norm": 1.1208597421646118, |
|
"learning_rate": 4.585036662124457e-07, |
|
"loss": 2.0846, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.027642610890020694, |
|
"grad_norm": 1.0968207120895386, |
|
"learning_rate": 4.606666234075228e-07, |
|
"loss": 2.0834, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.027772388405936283, |
|
"grad_norm": 0.9584913849830627, |
|
"learning_rate": 4.628295806025999e-07, |
|
"loss": 2.0792, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.027902165921851872, |
|
"grad_norm": 1.19157874584198, |
|
"learning_rate": 4.6499253779767705e-07, |
|
"loss": 2.0895, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.028031943437767465, |
|
"grad_norm": 1.1074448823928833, |
|
"learning_rate": 4.6715549499275413e-07, |
|
"loss": 2.0725, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.028161720953683054, |
|
"grad_norm": 1.2112274169921875, |
|
"learning_rate": 4.6931845218783127e-07, |
|
"loss": 2.097, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.028291498469598643, |
|
"grad_norm": 1.1916898488998413, |
|
"learning_rate": 4.7148140938290835e-07, |
|
"loss": 2.0703, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.028421275985514232, |
|
"grad_norm": 1.0195530652999878, |
|
"learning_rate": 4.7364436657798543e-07, |
|
"loss": 2.1164, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.028551053501429825, |
|
"grad_norm": 1.1741176843643188, |
|
"learning_rate": 4.758073237730626e-07, |
|
"loss": 2.0762, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.028680831017345414, |
|
"grad_norm": 1.100441336631775, |
|
"learning_rate": 4.779702809681397e-07, |
|
"loss": 2.0843, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.028810608533261003, |
|
"grad_norm": 1.2614903450012207, |
|
"learning_rate": 4.801332381632167e-07, |
|
"loss": 2.0912, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.028940386049176595, |
|
"grad_norm": 1.2177016735076904, |
|
"learning_rate": 4.82296195358294e-07, |
|
"loss": 2.0872, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.029070163565092184, |
|
"grad_norm": 1.0506807565689087, |
|
"learning_rate": 4.84459152553371e-07, |
|
"loss": 2.0469, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.029199941081007773, |
|
"grad_norm": 1.0414236783981323, |
|
"learning_rate": 4.866221097484481e-07, |
|
"loss": 2.0683, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.029329718596923366, |
|
"grad_norm": 1.0935665369033813, |
|
"learning_rate": 4.887850669435253e-07, |
|
"loss": 2.0809, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.029459496112838955, |
|
"grad_norm": 1.0495206117630005, |
|
"learning_rate": 4.909480241386023e-07, |
|
"loss": 2.0788, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.029589273628754544, |
|
"grad_norm": 1.3227081298828125, |
|
"learning_rate": 4.931109813336794e-07, |
|
"loss": 2.0858, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.029719051144670133, |
|
"grad_norm": 1.1815470457077026, |
|
"learning_rate": 4.952739385287566e-07, |
|
"loss": 2.0832, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.029848828660585726, |
|
"grad_norm": 1.1781071424484253, |
|
"learning_rate": 4.974368957238337e-07, |
|
"loss": 2.0796, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.029978606176501315, |
|
"grad_norm": 1.2186251878738403, |
|
"learning_rate": 4.995998529189107e-07, |
|
"loss": 2.0742, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.030108383692416904, |
|
"grad_norm": 1.1277467012405396, |
|
"learning_rate": 5.01762810113988e-07, |
|
"loss": 2.0745, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.030238161208332497, |
|
"grad_norm": 1.1841695308685303, |
|
"learning_rate": 5.03925767309065e-07, |
|
"loss": 2.0911, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.030367938724248086, |
|
"grad_norm": 1.2147952318191528, |
|
"learning_rate": 5.060887245041421e-07, |
|
"loss": 2.0719, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.030497716240163675, |
|
"grad_norm": 1.1648039817810059, |
|
"learning_rate": 5.082516816992193e-07, |
|
"loss": 2.0884, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.030627493756079264, |
|
"grad_norm": 1.2530500888824463, |
|
"learning_rate": 5.104146388942963e-07, |
|
"loss": 2.0804, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.030757271271994856, |
|
"grad_norm": 1.3491883277893066, |
|
"learning_rate": 5.125775960893734e-07, |
|
"loss": 2.0872, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.030887048787910446, |
|
"grad_norm": 1.2144231796264648, |
|
"learning_rate": 5.147405532844506e-07, |
|
"loss": 2.0628, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.031016826303826035, |
|
"grad_norm": 1.2478595972061157, |
|
"learning_rate": 5.169035104795277e-07, |
|
"loss": 2.0756, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.031146603819741627, |
|
"grad_norm": 1.0973178148269653, |
|
"learning_rate": 5.190664676746047e-07, |
|
"loss": 2.0966, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.031276381335657216, |
|
"grad_norm": 1.1409740447998047, |
|
"learning_rate": 5.212294248696819e-07, |
|
"loss": 2.0812, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.03140615885157281, |
|
"grad_norm": 1.206933617591858, |
|
"learning_rate": 5.23392382064759e-07, |
|
"loss": 2.1006, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.031535936367488394, |
|
"grad_norm": 1.085492491722107, |
|
"learning_rate": 5.255553392598361e-07, |
|
"loss": 2.1002, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.03166571388340399, |
|
"grad_norm": 1.0168064832687378, |
|
"learning_rate": 5.277182964549132e-07, |
|
"loss": 2.066, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.03179549139931958, |
|
"grad_norm": 1.0635970830917358, |
|
"learning_rate": 5.298812536499903e-07, |
|
"loss": 2.0697, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.031925268915235165, |
|
"grad_norm": 1.1196577548980713, |
|
"learning_rate": 5.320442108450674e-07, |
|
"loss": 2.0862, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.03205504643115076, |
|
"grad_norm": 1.4039437770843506, |
|
"learning_rate": 5.342071680401445e-07, |
|
"loss": 2.0736, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.03218482394706635, |
|
"grad_norm": 1.1052871942520142, |
|
"learning_rate": 5.363701252352217e-07, |
|
"loss": 2.0877, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.032314601462981936, |
|
"grad_norm": 0.966698408126831, |
|
"learning_rate": 5.385330824302987e-07, |
|
"loss": 2.0866, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.03244437897889753, |
|
"grad_norm": 0.9846018552780151, |
|
"learning_rate": 5.406960396253759e-07, |
|
"loss": 2.0717, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.032574156494813114, |
|
"grad_norm": 1.1824718713760376, |
|
"learning_rate": 5.42858996820453e-07, |
|
"loss": 2.0651, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.03270393401072871, |
|
"grad_norm": 1.1409893035888672, |
|
"learning_rate": 5.4502195401553e-07, |
|
"loss": 2.0904, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.0328337115266443, |
|
"grad_norm": 1.079150676727295, |
|
"learning_rate": 5.471849112106072e-07, |
|
"loss": 2.0842, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.032963489042559885, |
|
"grad_norm": 1.0430059432983398, |
|
"learning_rate": 5.493478684056843e-07, |
|
"loss": 2.0869, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.03309326655847548, |
|
"grad_norm": 1.085353136062622, |
|
"learning_rate": 5.515108256007614e-07, |
|
"loss": 2.0703, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.03322304407439107, |
|
"grad_norm": 1.1028053760528564, |
|
"learning_rate": 5.536737827958384e-07, |
|
"loss": 2.0804, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.033352821590306655, |
|
"grad_norm": 1.4301245212554932, |
|
"learning_rate": 5.558367399909157e-07, |
|
"loss": 2.0934, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.03348259910622225, |
|
"grad_norm": 1.1223058700561523, |
|
"learning_rate": 5.579996971859927e-07, |
|
"loss": 2.0927, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.03361237662213784, |
|
"grad_norm": 1.0447497367858887, |
|
"learning_rate": 5.601626543810699e-07, |
|
"loss": 2.0792, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.033742154138053426, |
|
"grad_norm": 1.1141220331192017, |
|
"learning_rate": 5.62325611576147e-07, |
|
"loss": 2.0661, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.03387193165396902, |
|
"grad_norm": 1.146681547164917, |
|
"learning_rate": 5.64488568771224e-07, |
|
"loss": 2.0906, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.03400170916988461, |
|
"grad_norm": 1.0003130435943604, |
|
"learning_rate": 5.666515259663012e-07, |
|
"loss": 2.0913, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.0341314866858002, |
|
"grad_norm": 1.2144840955734253, |
|
"learning_rate": 5.688144831613783e-07, |
|
"loss": 2.0802, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.03426126420171579, |
|
"grad_norm": 1.11890709400177, |
|
"learning_rate": 5.709774403564554e-07, |
|
"loss": 2.0709, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.03439104171763138, |
|
"grad_norm": 1.1874761581420898, |
|
"learning_rate": 5.731403975515324e-07, |
|
"loss": 2.0885, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.03452081923354697, |
|
"grad_norm": 1.155839443206787, |
|
"learning_rate": 5.753033547466096e-07, |
|
"loss": 2.0676, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.03465059674946256, |
|
"grad_norm": 1.1086000204086304, |
|
"learning_rate": 5.774663119416867e-07, |
|
"loss": 2.0721, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.034780374265378146, |
|
"grad_norm": 1.3329235315322876, |
|
"learning_rate": 5.796292691367639e-07, |
|
"loss": 2.0794, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.03491015178129374, |
|
"grad_norm": 1.0079444646835327, |
|
"learning_rate": 5.81792226331841e-07, |
|
"loss": 2.0946, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.03503992929720933, |
|
"grad_norm": 1.1033422946929932, |
|
"learning_rate": 5.83955183526918e-07, |
|
"loss": 2.0648, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.03516970681312492, |
|
"grad_norm": 1.1726226806640625, |
|
"learning_rate": 5.861181407219951e-07, |
|
"loss": 2.0772, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.03529948432904051, |
|
"grad_norm": 1.2747085094451904, |
|
"learning_rate": 5.882810979170723e-07, |
|
"loss": 2.0655, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.0354292618449561, |
|
"grad_norm": 1.3106458187103271, |
|
"learning_rate": 5.904440551121494e-07, |
|
"loss": 2.0703, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.03555903936087169, |
|
"grad_norm": 1.2663166522979736, |
|
"learning_rate": 5.926070123072264e-07, |
|
"loss": 2.0841, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.03568881687678728, |
|
"grad_norm": 1.0045231580734253, |
|
"learning_rate": 5.947699695023036e-07, |
|
"loss": 2.0815, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.03581859439270287, |
|
"grad_norm": 1.0900031328201294, |
|
"learning_rate": 5.969329266973807e-07, |
|
"loss": 2.0818, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.03594837190861846, |
|
"grad_norm": 1.065185308456421, |
|
"learning_rate": 5.990958838924577e-07, |
|
"loss": 2.0734, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.03607814942453405, |
|
"grad_norm": 1.117867112159729, |
|
"learning_rate": 6.01258841087535e-07, |
|
"loss": 2.065, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.03620792694044964, |
|
"grad_norm": 1.092624306678772, |
|
"learning_rate": 6.03421798282612e-07, |
|
"loss": 2.0847, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.03633770445636523, |
|
"grad_norm": 1.2159847021102905, |
|
"learning_rate": 6.055847554776891e-07, |
|
"loss": 2.0553, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.03646748197228082, |
|
"grad_norm": 1.0683104991912842, |
|
"learning_rate": 6.077477126727663e-07, |
|
"loss": 2.0834, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.036597259488196414, |
|
"grad_norm": 1.0531790256500244, |
|
"learning_rate": 6.099106698678434e-07, |
|
"loss": 2.0854, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.036727037004112, |
|
"grad_norm": 1.2325146198272705, |
|
"learning_rate": 6.120736270629204e-07, |
|
"loss": 2.0813, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.03685681452002759, |
|
"grad_norm": 1.091143012046814, |
|
"learning_rate": 6.142365842579976e-07, |
|
"loss": 2.0767, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.03698659203594318, |
|
"grad_norm": 1.235277533531189, |
|
"learning_rate": 6.163995414530747e-07, |
|
"loss": 2.1023, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.03711636955185877, |
|
"grad_norm": 1.065708041191101, |
|
"learning_rate": 6.185624986481517e-07, |
|
"loss": 2.0525, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.03724614706777436, |
|
"grad_norm": 1.121060848236084, |
|
"learning_rate": 6.20725455843229e-07, |
|
"loss": 2.086, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.03737592458368995, |
|
"grad_norm": 1.087768316268921, |
|
"learning_rate": 6.22888413038306e-07, |
|
"loss": 2.0788, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.03750570209960554, |
|
"grad_norm": 1.1760495901107788, |
|
"learning_rate": 6.250513702333831e-07, |
|
"loss": 2.0599, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.037635479615521134, |
|
"grad_norm": 1.1613560914993286, |
|
"learning_rate": 6.272143274284602e-07, |
|
"loss": 2.0784, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.03776525713143672, |
|
"grad_norm": 1.0611546039581299, |
|
"learning_rate": 6.293772846235373e-07, |
|
"loss": 2.0622, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.03789503464735231, |
|
"grad_norm": 1.23395836353302, |
|
"learning_rate": 6.315402418186145e-07, |
|
"loss": 2.0801, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.038024812163267904, |
|
"grad_norm": 1.3746421337127686, |
|
"learning_rate": 6.337031990136916e-07, |
|
"loss": 2.0865, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.03815458967918349, |
|
"grad_norm": 1.2074872255325317, |
|
"learning_rate": 6.358661562087687e-07, |
|
"loss": 2.0814, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.03828436719509908, |
|
"grad_norm": 1.3351000547409058, |
|
"learning_rate": 6.380291134038457e-07, |
|
"loss": 2.072, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.038414144711014675, |
|
"grad_norm": 1.2587641477584839, |
|
"learning_rate": 6.401920705989229e-07, |
|
"loss": 2.0674, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.03854392222693026, |
|
"grad_norm": 1.3082313537597656, |
|
"learning_rate": 6.423550277939999e-07, |
|
"loss": 2.0591, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.03867369974284585, |
|
"grad_norm": 1.0227408409118652, |
|
"learning_rate": 6.445179849890771e-07, |
|
"loss": 2.0622, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.038803477258761446, |
|
"grad_norm": 1.067315697669983, |
|
"learning_rate": 6.466809421841543e-07, |
|
"loss": 2.0742, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.03893325477467703, |
|
"grad_norm": 1.0379241704940796, |
|
"learning_rate": 6.488438993792313e-07, |
|
"loss": 2.0816, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.039063032290592624, |
|
"grad_norm": 1.2002571821212769, |
|
"learning_rate": 6.510068565743084e-07, |
|
"loss": 2.089, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.03919280980650821, |
|
"grad_norm": 0.9409751296043396, |
|
"learning_rate": 6.531698137693855e-07, |
|
"loss": 2.0937, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.0393225873224238, |
|
"grad_norm": 1.1032634973526, |
|
"learning_rate": 6.553327709644626e-07, |
|
"loss": 2.0728, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.039452364838339395, |
|
"grad_norm": 1.1253653764724731, |
|
"learning_rate": 6.574957281595398e-07, |
|
"loss": 2.0654, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.03958214235425498, |
|
"grad_norm": 1.1984907388687134, |
|
"learning_rate": 6.596586853546169e-07, |
|
"loss": 2.0635, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.03971191987017057, |
|
"grad_norm": 1.8919013738632202, |
|
"learning_rate": 6.61821642549694e-07, |
|
"loss": 2.0908, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.039841697386086165, |
|
"grad_norm": 1.084962248802185, |
|
"learning_rate": 6.639845997447711e-07, |
|
"loss": 2.0776, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.03997147490200175, |
|
"grad_norm": 1.2666141986846924, |
|
"learning_rate": 6.661475569398482e-07, |
|
"loss": 2.0572, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.040101252417917344, |
|
"grad_norm": 0.9767414331436157, |
|
"learning_rate": 6.683105141349253e-07, |
|
"loss": 2.0601, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.040231029933832936, |
|
"grad_norm": 1.0183193683624268, |
|
"learning_rate": 6.704734713300024e-07, |
|
"loss": 2.0603, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.04036080744974852, |
|
"grad_norm": 1.166793704032898, |
|
"learning_rate": 6.726364285250796e-07, |
|
"loss": 2.067, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.040490584965664114, |
|
"grad_norm": 1.0793973207473755, |
|
"learning_rate": 6.747993857201567e-07, |
|
"loss": 2.0573, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.04062036248157971, |
|
"grad_norm": 1.1123896837234497, |
|
"learning_rate": 6.769623429152337e-07, |
|
"loss": 2.0871, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.04075013999749529, |
|
"grad_norm": 1.1629297733306885, |
|
"learning_rate": 6.791253001103109e-07, |
|
"loss": 2.084, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.040879917513410885, |
|
"grad_norm": 1.1267614364624023, |
|
"learning_rate": 6.812882573053879e-07, |
|
"loss": 2.078, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.04100969502932648, |
|
"grad_norm": 1.084740161895752, |
|
"learning_rate": 6.834512145004651e-07, |
|
"loss": 2.0553, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.04113947254524206, |
|
"grad_norm": 1.520933747291565, |
|
"learning_rate": 6.856141716955423e-07, |
|
"loss": 2.0453, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.041269250061157656, |
|
"grad_norm": 1.133069396018982, |
|
"learning_rate": 6.877771288906193e-07, |
|
"loss": 2.0711, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.04139902757707324, |
|
"grad_norm": 1.0531269311904907, |
|
"learning_rate": 6.899400860856964e-07, |
|
"loss": 2.0538, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.041528805092988834, |
|
"grad_norm": 1.2274492979049683, |
|
"learning_rate": 6.921030432807735e-07, |
|
"loss": 2.0566, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.041658582608904426, |
|
"grad_norm": 1.103072166442871, |
|
"learning_rate": 6.942660004758506e-07, |
|
"loss": 2.0643, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.04178836012482001, |
|
"grad_norm": 1.1152135133743286, |
|
"learning_rate": 6.964289576709278e-07, |
|
"loss": 2.0796, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.041918137640735605, |
|
"grad_norm": 0.9736570715904236, |
|
"learning_rate": 6.985919148660049e-07, |
|
"loss": 2.068, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.0420479151566512, |
|
"grad_norm": 1.0044413805007935, |
|
"learning_rate": 7.00754872061082e-07, |
|
"loss": 2.0544, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.04217769267256678, |
|
"grad_norm": 1.1356353759765625, |
|
"learning_rate": 7.02917829256159e-07, |
|
"loss": 2.0686, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.042307470188482375, |
|
"grad_norm": 1.0584495067596436, |
|
"learning_rate": 7.050807864512362e-07, |
|
"loss": 2.0539, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.04243724770439797, |
|
"grad_norm": 1.0496474504470825, |
|
"learning_rate": 7.072437436463132e-07, |
|
"loss": 2.0859, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.042567025220313554, |
|
"grad_norm": 1.0543708801269531, |
|
"learning_rate": 7.094067008413904e-07, |
|
"loss": 2.0569, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.042696802736229146, |
|
"grad_norm": 1.141650676727295, |
|
"learning_rate": 7.115696580364676e-07, |
|
"loss": 2.0698, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.04282658025214474, |
|
"grad_norm": 1.1720919609069824, |
|
"learning_rate": 7.137326152315446e-07, |
|
"loss": 2.0777, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.042956357768060324, |
|
"grad_norm": 1.0720762014389038, |
|
"learning_rate": 7.158955724266217e-07, |
|
"loss": 2.0814, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.04308613528397592, |
|
"grad_norm": 1.0790390968322754, |
|
"learning_rate": 7.180585296216989e-07, |
|
"loss": 2.086, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.04321591279989151, |
|
"grad_norm": 1.218596339225769, |
|
"learning_rate": 7.202214868167759e-07, |
|
"loss": 2.0786, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.043345690315807095, |
|
"grad_norm": 1.053062081336975, |
|
"learning_rate": 7.223844440118531e-07, |
|
"loss": 2.07, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.04347546783172269, |
|
"grad_norm": 1.214247226715088, |
|
"learning_rate": 7.245474012069302e-07, |
|
"loss": 2.0486, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.04360524534763827, |
|
"grad_norm": 1.1494215726852417, |
|
"learning_rate": 7.267103584020073e-07, |
|
"loss": 2.0772, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.043735022863553866, |
|
"grad_norm": 1.0832849740982056, |
|
"learning_rate": 7.288733155970844e-07, |
|
"loss": 2.0711, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.04386480037946946, |
|
"grad_norm": 1.3245981931686401, |
|
"learning_rate": 7.310362727921615e-07, |
|
"loss": 2.0627, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.043994577895385044, |
|
"grad_norm": 1.017421841621399, |
|
"learning_rate": 7.331992299872386e-07, |
|
"loss": 2.0425, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.044124355411300636, |
|
"grad_norm": 1.180729627609253, |
|
"learning_rate": 7.353621871823158e-07, |
|
"loss": 2.0655, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.04425413292721623, |
|
"grad_norm": 1.1626049280166626, |
|
"learning_rate": 7.375251443773929e-07, |
|
"loss": 2.0698, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.044383910443131815, |
|
"grad_norm": 1.1861342191696167, |
|
"learning_rate": 7.3968810157247e-07, |
|
"loss": 2.0708, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.04451368795904741, |
|
"grad_norm": 1.0834026336669922, |
|
"learning_rate": 7.41851058767547e-07, |
|
"loss": 2.0672, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.044643465474963, |
|
"grad_norm": 1.1176856756210327, |
|
"learning_rate": 7.440140159626242e-07, |
|
"loss": 2.0727, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.044773242990878585, |
|
"grad_norm": 1.0166140794754028, |
|
"learning_rate": 7.461769731577012e-07, |
|
"loss": 2.0884, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.04490302050679418, |
|
"grad_norm": 1.2397652864456177, |
|
"learning_rate": 7.483399303527784e-07, |
|
"loss": 2.0616, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.04503279802270977, |
|
"grad_norm": 1.3083019256591797, |
|
"learning_rate": 7.505028875478556e-07, |
|
"loss": 2.0749, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.045162575538625356, |
|
"grad_norm": 1.0846015214920044, |
|
"learning_rate": 7.526658447429326e-07, |
|
"loss": 2.0552, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.04529235305454095, |
|
"grad_norm": 1.1936324834823608, |
|
"learning_rate": 7.548288019380097e-07, |
|
"loss": 2.064, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.04542213057045654, |
|
"grad_norm": 1.2460789680480957, |
|
"learning_rate": 7.569917591330867e-07, |
|
"loss": 2.0637, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.04555190808637213, |
|
"grad_norm": 1.018153190612793, |
|
"learning_rate": 7.591547163281639e-07, |
|
"loss": 2.0386, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.04568168560228772, |
|
"grad_norm": 1.000977873802185, |
|
"learning_rate": 7.613176735232411e-07, |
|
"loss": 2.0399, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.045811463118203305, |
|
"grad_norm": 1.059472918510437, |
|
"learning_rate": 7.634806307183182e-07, |
|
"loss": 2.0672, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.0459412406341189, |
|
"grad_norm": 1.1349507570266724, |
|
"learning_rate": 7.656435879133953e-07, |
|
"loss": 2.0571, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.04607101815003449, |
|
"grad_norm": 1.0396134853363037, |
|
"learning_rate": 7.678065451084723e-07, |
|
"loss": 2.0768, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.046200795665950076, |
|
"grad_norm": 1.0826431512832642, |
|
"learning_rate": 7.699695023035494e-07, |
|
"loss": 2.0425, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.04633057318186567, |
|
"grad_norm": 1.0823532342910767, |
|
"learning_rate": 7.721324594986265e-07, |
|
"loss": 2.0574, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.04646035069778126, |
|
"grad_norm": 1.040619134902954, |
|
"learning_rate": 7.742954166937037e-07, |
|
"loss": 2.054, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.046590128213696846, |
|
"grad_norm": 1.0001822710037231, |
|
"learning_rate": 7.764583738887809e-07, |
|
"loss": 2.0685, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.04671990572961244, |
|
"grad_norm": 1.1200745105743408, |
|
"learning_rate": 7.786213310838579e-07, |
|
"loss": 2.0691, |
|
"step": 18000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1155822, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.879259894241034e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|