|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 11250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008888888888888889, |
|
"grad_norm": 5.835433483123779, |
|
"learning_rate": 1.7761989342806394e-06, |
|
"loss": 1.0096, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017777777777777778, |
|
"grad_norm": 6.898040294647217, |
|
"learning_rate": 3.5523978685612787e-06, |
|
"loss": 1.0634, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02666666666666667, |
|
"grad_norm": 7.464753150939941, |
|
"learning_rate": 5.328596802841918e-06, |
|
"loss": 0.9025, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 3.1078720092773438, |
|
"learning_rate": 7.1047957371225574e-06, |
|
"loss": 0.6682, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 1.5094455480575562, |
|
"learning_rate": 8.880994671403197e-06, |
|
"loss": 0.4396, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05333333333333334, |
|
"grad_norm": 1.504876971244812, |
|
"learning_rate": 1.0657193605683836e-05, |
|
"loss": 0.3367, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06222222222222222, |
|
"grad_norm": 1.5651991367340088, |
|
"learning_rate": 1.2433392539964476e-05, |
|
"loss": 0.279, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 1.1466246843338013, |
|
"learning_rate": 1.4209591474245115e-05, |
|
"loss": 0.2508, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.6467901468276978, |
|
"learning_rate": 1.5985790408525757e-05, |
|
"loss": 0.2371, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 1.2528489828109741, |
|
"learning_rate": 1.7761989342806394e-05, |
|
"loss": 0.2042, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09777777777777778, |
|
"grad_norm": 0.828504204750061, |
|
"learning_rate": 1.9538188277087034e-05, |
|
"loss": 0.196, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 0.7577255964279175, |
|
"learning_rate": 2.131438721136767e-05, |
|
"loss": 0.192, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11555555555555555, |
|
"grad_norm": 0.8037460446357727, |
|
"learning_rate": 2.3090586145648312e-05, |
|
"loss": 0.1773, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12444444444444444, |
|
"grad_norm": 1.2785838842391968, |
|
"learning_rate": 2.4866785079928952e-05, |
|
"loss": 0.1729, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.8146741390228271, |
|
"learning_rate": 2.6642984014209593e-05, |
|
"loss": 0.1719, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 0.9373874068260193, |
|
"learning_rate": 2.841918294849023e-05, |
|
"loss": 0.1604, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1511111111111111, |
|
"grad_norm": 0.7069199681282043, |
|
"learning_rate": 3.0195381882770874e-05, |
|
"loss": 0.1468, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.754027783870697, |
|
"learning_rate": 3.1971580817051514e-05, |
|
"loss": 0.1504, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1688888888888889, |
|
"grad_norm": 0.9207645058631897, |
|
"learning_rate": 3.374777975133215e-05, |
|
"loss": 0.1365, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.8368526101112366, |
|
"learning_rate": 3.552397868561279e-05, |
|
"loss": 0.1263, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18666666666666668, |
|
"grad_norm": 1.1070560216903687, |
|
"learning_rate": 3.730017761989343e-05, |
|
"loss": 0.1306, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19555555555555557, |
|
"grad_norm": 1.1743391752243042, |
|
"learning_rate": 3.907637655417407e-05, |
|
"loss": 0.1175, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.20444444444444446, |
|
"grad_norm": 0.7877116799354553, |
|
"learning_rate": 4.085257548845471e-05, |
|
"loss": 0.1187, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 1.2333160638809204, |
|
"learning_rate": 4.262877442273534e-05, |
|
"loss": 0.1037, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.6903652548789978, |
|
"learning_rate": 4.440497335701599e-05, |
|
"loss": 0.1134, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2311111111111111, |
|
"grad_norm": 0.7636107802391052, |
|
"learning_rate": 4.6181172291296624e-05, |
|
"loss": 0.1035, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.6047253012657166, |
|
"learning_rate": 4.795737122557727e-05, |
|
"loss": 0.0969, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24888888888888888, |
|
"grad_norm": 0.6038394570350647, |
|
"learning_rate": 4.9733570159857905e-05, |
|
"loss": 0.0897, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2577777777777778, |
|
"grad_norm": 0.8348813652992249, |
|
"learning_rate": 5.150976909413855e-05, |
|
"loss": 0.0899, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.47054576873779297, |
|
"learning_rate": 5.3285968028419185e-05, |
|
"loss": 0.0759, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27555555555555555, |
|
"grad_norm": 0.8130301237106323, |
|
"learning_rate": 5.5062166962699826e-05, |
|
"loss": 0.0819, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 0.8308233022689819, |
|
"learning_rate": 5.683836589698046e-05, |
|
"loss": 0.0983, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.29333333333333333, |
|
"grad_norm": 0.6173644065856934, |
|
"learning_rate": 5.861456483126111e-05, |
|
"loss": 0.0869, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3022222222222222, |
|
"grad_norm": 0.7818973660469055, |
|
"learning_rate": 6.039076376554175e-05, |
|
"loss": 0.0896, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 0.8493295311927795, |
|
"learning_rate": 6.216696269982238e-05, |
|
"loss": 0.0876, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.7423477172851562, |
|
"learning_rate": 6.394316163410303e-05, |
|
"loss": 0.0873, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3288888888888889, |
|
"grad_norm": 0.7845220565795898, |
|
"learning_rate": 6.571936056838366e-05, |
|
"loss": 0.0817, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3377777777777778, |
|
"grad_norm": 0.5911993384361267, |
|
"learning_rate": 6.74955595026643e-05, |
|
"loss": 0.0793, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3466666666666667, |
|
"grad_norm": 0.7266479730606079, |
|
"learning_rate": 6.927175843694494e-05, |
|
"loss": 0.0756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.5606906414031982, |
|
"learning_rate": 7.104795737122558e-05, |
|
"loss": 0.0697, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36444444444444446, |
|
"grad_norm": 0.5985639095306396, |
|
"learning_rate": 7.282415630550622e-05, |
|
"loss": 0.0775, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.37333333333333335, |
|
"grad_norm": 0.6423097848892212, |
|
"learning_rate": 7.460035523978686e-05, |
|
"loss": 0.0781, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.38222222222222224, |
|
"grad_norm": 0.7394803762435913, |
|
"learning_rate": 7.637655417406749e-05, |
|
"loss": 0.0725, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.39111111111111113, |
|
"grad_norm": 0.6536367535591125, |
|
"learning_rate": 7.815275310834814e-05, |
|
"loss": 0.0754, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.640508770942688, |
|
"learning_rate": 7.992895204262878e-05, |
|
"loss": 0.0799, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4088888888888889, |
|
"grad_norm": 0.5902549028396606, |
|
"learning_rate": 8.170515097690942e-05, |
|
"loss": 0.0772, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4177777777777778, |
|
"grad_norm": 0.5078624486923218, |
|
"learning_rate": 8.348134991119005e-05, |
|
"loss": 0.0614, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 0.649238646030426, |
|
"learning_rate": 8.525754884547069e-05, |
|
"loss": 0.0634, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.43555555555555553, |
|
"grad_norm": 0.5236638188362122, |
|
"learning_rate": 8.703374777975135e-05, |
|
"loss": 0.083, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.5363730192184448, |
|
"learning_rate": 8.880994671403198e-05, |
|
"loss": 0.0667, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4533333333333333, |
|
"grad_norm": 0.47589802742004395, |
|
"learning_rate": 9.058614564831261e-05, |
|
"loss": 0.0704, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4622222222222222, |
|
"grad_norm": 0.5703943967819214, |
|
"learning_rate": 9.236234458259325e-05, |
|
"loss": 0.0691, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4711111111111111, |
|
"grad_norm": 0.4499058127403259, |
|
"learning_rate": 9.41385435168739e-05, |
|
"loss": 0.0664, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.750669002532959, |
|
"learning_rate": 9.591474245115454e-05, |
|
"loss": 0.068, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 0.7603127360343933, |
|
"learning_rate": 9.769094138543518e-05, |
|
"loss": 0.0804, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.49777777777777776, |
|
"grad_norm": 0.48980197310447693, |
|
"learning_rate": 9.946714031971581e-05, |
|
"loss": 0.0658, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5066666666666667, |
|
"grad_norm": 0.798550009727478, |
|
"learning_rate": 9.999989414190798e-05, |
|
"loss": 0.0718, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5155555555555555, |
|
"grad_norm": 0.7420524954795837, |
|
"learning_rate": 9.999937565437301e-05, |
|
"loss": 0.0595, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5244444444444445, |
|
"grad_norm": 0.4677121043205261, |
|
"learning_rate": 9.9998425098547e-05, |
|
"loss": 0.0581, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.4046495854854584, |
|
"learning_rate": 9.99970424826441e-05, |
|
"loss": 0.0668, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5422222222222223, |
|
"grad_norm": 0.44588854908943176, |
|
"learning_rate": 9.99952278186122e-05, |
|
"loss": 0.0625, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5511111111111111, |
|
"grad_norm": 0.47325143218040466, |
|
"learning_rate": 9.999298112213265e-05, |
|
"loss": 0.061, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.5829877257347107, |
|
"learning_rate": 9.999030241262021e-05, |
|
"loss": 0.0687, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 0.4549849331378937, |
|
"learning_rate": 9.998719171322288e-05, |
|
"loss": 0.0579, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5777777777777777, |
|
"grad_norm": 0.41104447841644287, |
|
"learning_rate": 9.998364905082171e-05, |
|
"loss": 0.0621, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5866666666666667, |
|
"grad_norm": 0.4671890139579773, |
|
"learning_rate": 9.997967445603051e-05, |
|
"loss": 0.0516, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5955555555555555, |
|
"grad_norm": 0.5537863373756409, |
|
"learning_rate": 9.997526796319562e-05, |
|
"loss": 0.055, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6044444444444445, |
|
"grad_norm": 0.5803991556167603, |
|
"learning_rate": 9.997042961039565e-05, |
|
"loss": 0.0598, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6133333333333333, |
|
"grad_norm": 0.5602235794067383, |
|
"learning_rate": 9.99651594394411e-05, |
|
"loss": 0.062, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 0.3967384397983551, |
|
"learning_rate": 9.995945749587401e-05, |
|
"loss": 0.0625, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6311111111111111, |
|
"grad_norm": 0.5022227168083191, |
|
"learning_rate": 9.995332382896757e-05, |
|
"loss": 0.0652, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.47698643803596497, |
|
"learning_rate": 9.99467584917257e-05, |
|
"loss": 0.0649, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6488888888888888, |
|
"grad_norm": 0.48257672786712646, |
|
"learning_rate": 9.993976154088261e-05, |
|
"loss": 0.0609, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6577777777777778, |
|
"grad_norm": 0.3622733950614929, |
|
"learning_rate": 9.993233303690225e-05, |
|
"loss": 0.0546, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.3327910900115967, |
|
"learning_rate": 9.992447304397782e-05, |
|
"loss": 0.0483, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6755555555555556, |
|
"grad_norm": 0.6955730319023132, |
|
"learning_rate": 9.99161816300313e-05, |
|
"loss": 0.0583, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6844444444444444, |
|
"grad_norm": 0.5221269726753235, |
|
"learning_rate": 9.990745886671268e-05, |
|
"loss": 0.0631, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6933333333333334, |
|
"grad_norm": 0.4846705496311188, |
|
"learning_rate": 9.98983048293995e-05, |
|
"loss": 0.0576, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7022222222222222, |
|
"grad_norm": 0.617433488368988, |
|
"learning_rate": 9.988871959719615e-05, |
|
"loss": 0.0677, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.37552621960639954, |
|
"learning_rate": 9.987870325293311e-05, |
|
"loss": 0.0647, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.4279898405075073, |
|
"learning_rate": 9.986825588316642e-05, |
|
"loss": 0.0652, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7288888888888889, |
|
"grad_norm": 0.5149549841880798, |
|
"learning_rate": 9.985737757817672e-05, |
|
"loss": 0.0579, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7377777777777778, |
|
"grad_norm": 0.3736940026283264, |
|
"learning_rate": 9.984606843196862e-05, |
|
"loss": 0.0599, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 0.5054724812507629, |
|
"learning_rate": 9.983432854226977e-05, |
|
"loss": 0.0632, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7555555555555555, |
|
"grad_norm": 0.4900479018688202, |
|
"learning_rate": 9.982215801053014e-05, |
|
"loss": 0.057, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7644444444444445, |
|
"grad_norm": 0.3823259770870209, |
|
"learning_rate": 9.980955694192107e-05, |
|
"loss": 0.0551, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7733333333333333, |
|
"grad_norm": 0.2877548038959503, |
|
"learning_rate": 9.979652544533434e-05, |
|
"loss": 0.0521, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7822222222222223, |
|
"grad_norm": 0.5414068698883057, |
|
"learning_rate": 9.97830636333813e-05, |
|
"loss": 0.0479, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7911111111111111, |
|
"grad_norm": 0.47189751267433167, |
|
"learning_rate": 9.976917162239185e-05, |
|
"loss": 0.0548, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.42453688383102417, |
|
"learning_rate": 9.975484953241343e-05, |
|
"loss": 0.0502, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8088888888888889, |
|
"grad_norm": 0.4671352803707123, |
|
"learning_rate": 9.974009748721e-05, |
|
"loss": 0.0523, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 0.6095179915428162, |
|
"learning_rate": 9.972491561426099e-05, |
|
"loss": 0.0595, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8266666666666667, |
|
"grad_norm": 0.4958152770996094, |
|
"learning_rate": 9.970930404476013e-05, |
|
"loss": 0.0496, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8355555555555556, |
|
"grad_norm": 0.49585989117622375, |
|
"learning_rate": 9.96932629136144e-05, |
|
"loss": 0.0559, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8444444444444444, |
|
"grad_norm": 0.39392414689064026, |
|
"learning_rate": 9.967679235944281e-05, |
|
"loss": 0.0547, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 0.33945757150650024, |
|
"learning_rate": 9.965989252457523e-05, |
|
"loss": 0.0615, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8622222222222222, |
|
"grad_norm": 0.40421101450920105, |
|
"learning_rate": 9.964256355505116e-05, |
|
"loss": 0.0597, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8711111111111111, |
|
"grad_norm": 0.4000270366668701, |
|
"learning_rate": 9.962480560061837e-05, |
|
"loss": 0.0553, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.543621301651001, |
|
"learning_rate": 9.96066188147318e-05, |
|
"loss": 0.0583, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.5387834310531616, |
|
"learning_rate": 9.958800335455208e-05, |
|
"loss": 0.0534, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8977777777777778, |
|
"grad_norm": 0.45324257016181946, |
|
"learning_rate": 9.956895938094423e-05, |
|
"loss": 0.0596, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9066666666666666, |
|
"grad_norm": 0.3962705731391907, |
|
"learning_rate": 9.954948705847622e-05, |
|
"loss": 0.0556, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9155555555555556, |
|
"grad_norm": 0.5762264132499695, |
|
"learning_rate": 9.952958655541764e-05, |
|
"loss": 0.0572, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9244444444444444, |
|
"grad_norm": 0.8296062350273132, |
|
"learning_rate": 9.950925804373814e-05, |
|
"loss": 0.0497, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.42008882761001587, |
|
"learning_rate": 9.948850169910604e-05, |
|
"loss": 0.0484, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9422222222222222, |
|
"grad_norm": 0.24791453778743744, |
|
"learning_rate": 9.946731770088676e-05, |
|
"loss": 0.0513, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9511111111111111, |
|
"grad_norm": 0.3856937289237976, |
|
"learning_rate": 9.94457062321412e-05, |
|
"loss": 0.0511, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.36315223574638367, |
|
"learning_rate": 9.942366747962437e-05, |
|
"loss": 0.0439, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9688888888888889, |
|
"grad_norm": 0.4662598967552185, |
|
"learning_rate": 9.940120163378354e-05, |
|
"loss": 0.0576, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 0.5072160959243774, |
|
"learning_rate": 9.937830888875672e-05, |
|
"loss": 0.0484, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9866666666666667, |
|
"grad_norm": 0.5154268145561218, |
|
"learning_rate": 9.935498944237098e-05, |
|
"loss": 0.0485, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 0.46556252241134644, |
|
"learning_rate": 9.933124349614069e-05, |
|
"loss": 0.0498, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.0044444444444445, |
|
"grad_norm": 0.42616957426071167, |
|
"learning_rate": 9.930707125526584e-05, |
|
"loss": 0.0543, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.0133333333333334, |
|
"grad_norm": 0.4739038050174713, |
|
"learning_rate": 9.928247292863019e-05, |
|
"loss": 0.0494, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.0222222222222221, |
|
"grad_norm": 0.40663331747055054, |
|
"learning_rate": 9.925744872879956e-05, |
|
"loss": 0.046, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.031111111111111, |
|
"grad_norm": 0.40094658732414246, |
|
"learning_rate": 9.92319988720199e-05, |
|
"loss": 0.0529, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.4923577308654785, |
|
"learning_rate": 9.920612357821548e-05, |
|
"loss": 0.0534, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.048888888888889, |
|
"grad_norm": 0.41850945353507996, |
|
"learning_rate": 9.917982307098697e-05, |
|
"loss": 0.0535, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.0577777777777777, |
|
"grad_norm": 0.4174967408180237, |
|
"learning_rate": 9.91530975776095e-05, |
|
"loss": 0.0495, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.571321964263916, |
|
"learning_rate": 9.912594732903073e-05, |
|
"loss": 0.048, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0755555555555556, |
|
"grad_norm": 0.36701643466949463, |
|
"learning_rate": 9.909837255986885e-05, |
|
"loss": 0.0497, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.0844444444444445, |
|
"grad_norm": 0.38277170062065125, |
|
"learning_rate": 9.907037350841045e-05, |
|
"loss": 0.0479, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.0933333333333333, |
|
"grad_norm": 0.35891497135162354, |
|
"learning_rate": 9.904195041660864e-05, |
|
"loss": 0.0529, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.1022222222222222, |
|
"grad_norm": 0.3245532214641571, |
|
"learning_rate": 9.90131035300808e-05, |
|
"loss": 0.0411, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.40029340982437134, |
|
"learning_rate": 9.898383309810653e-05, |
|
"loss": 0.0471, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.4895324409008026, |
|
"learning_rate": 9.895413937362553e-05, |
|
"loss": 0.0612, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.1288888888888888, |
|
"grad_norm": 0.4078107476234436, |
|
"learning_rate": 9.892402261323532e-05, |
|
"loss": 0.048, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 0.4852098822593689, |
|
"learning_rate": 9.889348307718911e-05, |
|
"loss": 0.0471, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.1466666666666667, |
|
"grad_norm": 0.48769694566726685, |
|
"learning_rate": 9.886252102939347e-05, |
|
"loss": 0.0409, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.1555555555555554, |
|
"grad_norm": 0.37530139088630676, |
|
"learning_rate": 9.883113673740615e-05, |
|
"loss": 0.0438, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1644444444444444, |
|
"grad_norm": 0.4452308714389801, |
|
"learning_rate": 9.879933047243367e-05, |
|
"loss": 0.0503, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.1733333333333333, |
|
"grad_norm": 0.4109489917755127, |
|
"learning_rate": 9.876710250932904e-05, |
|
"loss": 0.0487, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.1822222222222223, |
|
"grad_norm": 0.3781728744506836, |
|
"learning_rate": 9.873445312658936e-05, |
|
"loss": 0.0475, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.1911111111111112, |
|
"grad_norm": 0.39596325159072876, |
|
"learning_rate": 9.870138260635338e-05, |
|
"loss": 0.058, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.5044455528259277, |
|
"learning_rate": 9.86678912343992e-05, |
|
"loss": 0.0485, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.208888888888889, |
|
"grad_norm": 0.5257819294929504, |
|
"learning_rate": 9.863397930014155e-05, |
|
"loss": 0.0542, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.2177777777777778, |
|
"grad_norm": 0.4690333604812622, |
|
"learning_rate": 9.859964709662957e-05, |
|
"loss": 0.0455, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.2266666666666666, |
|
"grad_norm": 0.3435376286506653, |
|
"learning_rate": 9.85648949205441e-05, |
|
"loss": 0.056, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.2355555555555555, |
|
"grad_norm": 0.3969825804233551, |
|
"learning_rate": 9.852972307219513e-05, |
|
"loss": 0.0514, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 0.39472100138664246, |
|
"learning_rate": 9.849413185551926e-05, |
|
"loss": 0.0445, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.2533333333333334, |
|
"grad_norm": 0.4064704179763794, |
|
"learning_rate": 9.845812157807707e-05, |
|
"loss": 0.0461, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.2622222222222224, |
|
"grad_norm": 0.5617078542709351, |
|
"learning_rate": 9.842169255105043e-05, |
|
"loss": 0.0549, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.271111111111111, |
|
"grad_norm": 0.32629531621932983, |
|
"learning_rate": 9.838484508923982e-05, |
|
"loss": 0.0515, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.4199066758155823, |
|
"learning_rate": 9.83475795110616e-05, |
|
"loss": 0.0464, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.2888888888888888, |
|
"grad_norm": 0.4101389944553375, |
|
"learning_rate": 9.830989613854528e-05, |
|
"loss": 0.0484, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.2977777777777777, |
|
"grad_norm": 0.31827065348625183, |
|
"learning_rate": 9.827179529733075e-05, |
|
"loss": 0.0482, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.3066666666666666, |
|
"grad_norm": 0.34162506461143494, |
|
"learning_rate": 9.823327731666543e-05, |
|
"loss": 0.0488, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.3155555555555556, |
|
"grad_norm": 0.42143964767456055, |
|
"learning_rate": 9.819434252940142e-05, |
|
"loss": 0.0488, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.3244444444444445, |
|
"grad_norm": 0.449290931224823, |
|
"learning_rate": 9.815499127199268e-05, |
|
"loss": 0.0513, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.47996440529823303, |
|
"learning_rate": 9.811522388449206e-05, |
|
"loss": 0.0411, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.3422222222222222, |
|
"grad_norm": 0.445571631193161, |
|
"learning_rate": 9.807504071054839e-05, |
|
"loss": 0.0567, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.3511111111111112, |
|
"grad_norm": 0.34127169847488403, |
|
"learning_rate": 9.803444209740352e-05, |
|
"loss": 0.0544, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 0.3782777786254883, |
|
"learning_rate": 9.799342839588929e-05, |
|
"loss": 0.0528, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.3688888888888888, |
|
"grad_norm": 0.4436071813106537, |
|
"learning_rate": 9.795199996042452e-05, |
|
"loss": 0.0483, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.3777777777777778, |
|
"grad_norm": 0.435794472694397, |
|
"learning_rate": 9.791015714901197e-05, |
|
"loss": 0.0428, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3866666666666667, |
|
"grad_norm": 0.35971954464912415, |
|
"learning_rate": 9.786790032323516e-05, |
|
"loss": 0.0431, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.3955555555555557, |
|
"grad_norm": 0.48946893215179443, |
|
"learning_rate": 9.782522984825537e-05, |
|
"loss": 0.0487, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.4044444444444444, |
|
"grad_norm": 0.34978681802749634, |
|
"learning_rate": 9.778214609280838e-05, |
|
"loss": 0.0452, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.4133333333333333, |
|
"grad_norm": 0.23440729081630707, |
|
"learning_rate": 9.773864942920134e-05, |
|
"loss": 0.0399, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 0.4934622645378113, |
|
"learning_rate": 9.76947402333095e-05, |
|
"loss": 0.0425, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.431111111111111, |
|
"grad_norm": 0.37194010615348816, |
|
"learning_rate": 9.765041888457304e-05, |
|
"loss": 0.0413, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.45446038246154785, |
|
"learning_rate": 9.760568576599371e-05, |
|
"loss": 0.0408, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.448888888888889, |
|
"grad_norm": 0.3959934115409851, |
|
"learning_rate": 9.756054126413163e-05, |
|
"loss": 0.0451, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.4577777777777778, |
|
"grad_norm": 0.4348146915435791, |
|
"learning_rate": 9.751498576910178e-05, |
|
"loss": 0.0433, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 0.5030315518379211, |
|
"learning_rate": 9.746901967457078e-05, |
|
"loss": 0.0434, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.4755555555555555, |
|
"grad_norm": 0.31760266423225403, |
|
"learning_rate": 9.742264337775345e-05, |
|
"loss": 0.0437, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.4844444444444445, |
|
"grad_norm": 0.411958247423172, |
|
"learning_rate": 9.73758572794093e-05, |
|
"loss": 0.0421, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.4933333333333334, |
|
"grad_norm": 0.28856176137924194, |
|
"learning_rate": 9.732866178383921e-05, |
|
"loss": 0.0467, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.5022222222222221, |
|
"grad_norm": 0.40374937653541565, |
|
"learning_rate": 9.728105729888179e-05, |
|
"loss": 0.0475, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.511111111111111, |
|
"grad_norm": 0.4501747488975525, |
|
"learning_rate": 9.723304423590997e-05, |
|
"loss": 0.0486, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.4531670808792114, |
|
"learning_rate": 9.718462300982736e-05, |
|
"loss": 0.0461, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.528888888888889, |
|
"grad_norm": 0.27342939376831055, |
|
"learning_rate": 9.713579403906471e-05, |
|
"loss": 0.0428, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.537777777777778, |
|
"grad_norm": 0.3965563476085663, |
|
"learning_rate": 9.708655774557628e-05, |
|
"loss": 0.048, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.5466666666666666, |
|
"grad_norm": 0.46752646565437317, |
|
"learning_rate": 9.703691455483622e-05, |
|
"loss": 0.0431, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.4319798946380615, |
|
"learning_rate": 9.698686489583487e-05, |
|
"loss": 0.0543, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 0.5014967918395996, |
|
"learning_rate": 9.693640920107501e-05, |
|
"loss": 0.047, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.5733333333333333, |
|
"grad_norm": 0.4571453928947449, |
|
"learning_rate": 9.688554790656821e-05, |
|
"loss": 0.0416, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.5822222222222222, |
|
"grad_norm": 0.3979610204696655, |
|
"learning_rate": 9.683428145183103e-05, |
|
"loss": 0.0524, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.5911111111111111, |
|
"grad_norm": 0.44301512837409973, |
|
"learning_rate": 9.678261027988118e-05, |
|
"loss": 0.0458, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.4120193421840668, |
|
"learning_rate": 9.673053483723375e-05, |
|
"loss": 0.0436, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.608888888888889, |
|
"grad_norm": 0.28014975786209106, |
|
"learning_rate": 9.667805557389726e-05, |
|
"loss": 0.049, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.6177777777777778, |
|
"grad_norm": 0.27927517890930176, |
|
"learning_rate": 9.662517294336994e-05, |
|
"loss": 0.0433, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.6266666666666667, |
|
"grad_norm": 0.29386866092681885, |
|
"learning_rate": 9.657188740263563e-05, |
|
"loss": 0.0552, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.6355555555555554, |
|
"grad_norm": 0.37175601720809937, |
|
"learning_rate": 9.651819941215995e-05, |
|
"loss": 0.0417, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.6444444444444444, |
|
"grad_norm": 0.4516772925853729, |
|
"learning_rate": 9.646410943588622e-05, |
|
"loss": 0.0412, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.6533333333333333, |
|
"grad_norm": 0.37636223435401917, |
|
"learning_rate": 9.640961794123158e-05, |
|
"loss": 0.0395, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.6622222222222223, |
|
"grad_norm": 0.43672800064086914, |
|
"learning_rate": 9.635472539908284e-05, |
|
"loss": 0.0415, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.6711111111111112, |
|
"grad_norm": 0.41261500120162964, |
|
"learning_rate": 9.629943228379246e-05, |
|
"loss": 0.0442, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 0.2579341530799866, |
|
"learning_rate": 9.624373907317444e-05, |
|
"loss": 0.0425, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.6888888888888889, |
|
"grad_norm": 0.42779308557510376, |
|
"learning_rate": 9.618764624850018e-05, |
|
"loss": 0.0381, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6977777777777778, |
|
"grad_norm": 0.30951082706451416, |
|
"learning_rate": 9.61311542944944e-05, |
|
"loss": 0.0416, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 0.4475822150707245, |
|
"learning_rate": 9.607426369933079e-05, |
|
"loss": 0.0357, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.7155555555555555, |
|
"grad_norm": 0.2528528571128845, |
|
"learning_rate": 9.601697495462796e-05, |
|
"loss": 0.0356, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.7244444444444444, |
|
"grad_norm": 0.38093945384025574, |
|
"learning_rate": 9.595928855544508e-05, |
|
"loss": 0.0434, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 0.329484760761261, |
|
"learning_rate": 9.590120500027765e-05, |
|
"loss": 0.0398, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.7422222222222223, |
|
"grad_norm": 0.456683486700058, |
|
"learning_rate": 9.584272479105319e-05, |
|
"loss": 0.046, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.751111111111111, |
|
"grad_norm": 0.40798524022102356, |
|
"learning_rate": 9.578384843312691e-05, |
|
"loss": 0.0351, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.38196903467178345, |
|
"learning_rate": 9.572457643527728e-05, |
|
"loss": 0.0413, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.7688888888888887, |
|
"grad_norm": 0.28042274713516235, |
|
"learning_rate": 9.566490930970171e-05, |
|
"loss": 0.0473, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.35825109481811523, |
|
"learning_rate": 9.560484757201213e-05, |
|
"loss": 0.0474, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7866666666666666, |
|
"grad_norm": 0.35610640048980713, |
|
"learning_rate": 9.554439174123046e-05, |
|
"loss": 0.0408, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.7955555555555556, |
|
"grad_norm": 0.3943731486797333, |
|
"learning_rate": 9.548354233978415e-05, |
|
"loss": 0.0435, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.8044444444444445, |
|
"grad_norm": 0.33480551838874817, |
|
"learning_rate": 9.542229989350172e-05, |
|
"loss": 0.044, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.8133333333333335, |
|
"grad_norm": 0.31774142384529114, |
|
"learning_rate": 9.536066493160817e-05, |
|
"loss": 0.0434, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.8222222222222222, |
|
"grad_norm": 0.3610334098339081, |
|
"learning_rate": 9.529863798672039e-05, |
|
"loss": 0.0432, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.8311111111111111, |
|
"grad_norm": 0.27150750160217285, |
|
"learning_rate": 9.523621959484258e-05, |
|
"loss": 0.0398, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.3129183351993561, |
|
"learning_rate": 9.517341029536167e-05, |
|
"loss": 0.0433, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 0.3935423195362091, |
|
"learning_rate": 9.511021063104254e-05, |
|
"loss": 0.0427, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.8577777777777778, |
|
"grad_norm": 0.3993721902370453, |
|
"learning_rate": 9.504662114802344e-05, |
|
"loss": 0.0425, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.4343392550945282, |
|
"learning_rate": 9.498264239581122e-05, |
|
"loss": 0.0447, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.8755555555555556, |
|
"grad_norm": 0.28636467456817627, |
|
"learning_rate": 9.491827492727658e-05, |
|
"loss": 0.0416, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.8844444444444446, |
|
"grad_norm": 0.44886764883995056, |
|
"learning_rate": 9.485351929864932e-05, |
|
"loss": 0.0469, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.8933333333333333, |
|
"grad_norm": 0.42084500193595886, |
|
"learning_rate": 9.47883760695135e-05, |
|
"loss": 0.0436, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.9022222222222223, |
|
"grad_norm": 0.5370213985443115, |
|
"learning_rate": 9.472284580280261e-05, |
|
"loss": 0.0439, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.911111111111111, |
|
"grad_norm": 0.2400428056716919, |
|
"learning_rate": 9.465692906479475e-05, |
|
"loss": 0.044, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.34335649013519287, |
|
"learning_rate": 9.459062642510766e-05, |
|
"loss": 0.0362, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.9288888888888889, |
|
"grad_norm": 0.3093022406101227, |
|
"learning_rate": 9.452393845669385e-05, |
|
"loss": 0.0408, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.9377777777777778, |
|
"grad_norm": 0.3216879367828369, |
|
"learning_rate": 9.445686573583567e-05, |
|
"loss": 0.0392, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.9466666666666668, |
|
"grad_norm": 0.4455442726612091, |
|
"learning_rate": 9.438940884214027e-05, |
|
"loss": 0.0426, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 0.4701933264732361, |
|
"learning_rate": 9.432156835853463e-05, |
|
"loss": 0.0439, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.9644444444444444, |
|
"grad_norm": 0.4961441457271576, |
|
"learning_rate": 9.425334487126049e-05, |
|
"loss": 0.0413, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.9733333333333334, |
|
"grad_norm": 0.33902812004089355, |
|
"learning_rate": 9.418473896986932e-05, |
|
"loss": 0.0447, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.982222222222222, |
|
"grad_norm": 0.29202884435653687, |
|
"learning_rate": 9.411575124721724e-05, |
|
"loss": 0.0328, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 0.284365713596344, |
|
"learning_rate": 9.404638229945983e-05, |
|
"loss": 0.0391, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.681056797504425, |
|
"learning_rate": 9.397663272604702e-05, |
|
"loss": 0.041, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.008888888888889, |
|
"grad_norm": 0.422743022441864, |
|
"learning_rate": 9.390650312971793e-05, |
|
"loss": 0.0438, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.017777777777778, |
|
"grad_norm": 0.41324707865715027, |
|
"learning_rate": 9.383599411649562e-05, |
|
"loss": 0.0394, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.026666666666667, |
|
"grad_norm": 0.2756149172782898, |
|
"learning_rate": 9.376510629568187e-05, |
|
"loss": 0.0395, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.0355555555555553, |
|
"grad_norm": 0.4113193154335022, |
|
"learning_rate": 9.369384027985191e-05, |
|
"loss": 0.0432, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.0444444444444443, |
|
"grad_norm": 0.41306623816490173, |
|
"learning_rate": 9.362219668484917e-05, |
|
"loss": 0.0419, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.0533333333333332, |
|
"grad_norm": 0.2843189835548401, |
|
"learning_rate": 9.355017612977988e-05, |
|
"loss": 0.0433, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.062222222222222, |
|
"grad_norm": 0.33685362339019775, |
|
"learning_rate": 9.347777923700778e-05, |
|
"loss": 0.0357, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.071111111111111, |
|
"grad_norm": 0.3132250905036926, |
|
"learning_rate": 9.34050066321487e-05, |
|
"loss": 0.04, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.2785365581512451, |
|
"learning_rate": 9.333185894406523e-05, |
|
"loss": 0.0446, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.088888888888889, |
|
"grad_norm": 0.3503095507621765, |
|
"learning_rate": 9.325833680486116e-05, |
|
"loss": 0.0463, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.097777777777778, |
|
"grad_norm": 0.3490595817565918, |
|
"learning_rate": 9.318444084987612e-05, |
|
"loss": 0.0399, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.1066666666666665, |
|
"grad_norm": 0.24215000867843628, |
|
"learning_rate": 9.31101717176801e-05, |
|
"loss": 0.0375, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.1155555555555554, |
|
"grad_norm": 0.3222099244594574, |
|
"learning_rate": 9.303553005006782e-05, |
|
"loss": 0.0374, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.1244444444444444, |
|
"grad_norm": 0.2714426815509796, |
|
"learning_rate": 9.29605164920533e-05, |
|
"loss": 0.0417, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 0.3506973683834076, |
|
"learning_rate": 9.288513169186423e-05, |
|
"loss": 0.0459, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.1422222222222222, |
|
"grad_norm": 0.3329187333583832, |
|
"learning_rate": 9.28093763009364e-05, |
|
"loss": 0.0406, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.151111111111111, |
|
"grad_norm": 0.367241233587265, |
|
"learning_rate": 9.2733250973908e-05, |
|
"loss": 0.0503, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.3036479353904724, |
|
"learning_rate": 9.265675636861406e-05, |
|
"loss": 0.0362, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.168888888888889, |
|
"grad_norm": 0.2860916256904602, |
|
"learning_rate": 9.25798931460807e-05, |
|
"loss": 0.0342, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.1777777777777776, |
|
"grad_norm": 0.3841829001903534, |
|
"learning_rate": 9.250266197051945e-05, |
|
"loss": 0.0358, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.1866666666666665, |
|
"grad_norm": 0.46269235014915466, |
|
"learning_rate": 9.242506350932146e-05, |
|
"loss": 0.0359, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.1955555555555555, |
|
"grad_norm": 0.29752272367477417, |
|
"learning_rate": 9.23470984330518e-05, |
|
"loss": 0.0363, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.2044444444444444, |
|
"grad_norm": 0.30834946036338806, |
|
"learning_rate": 9.226876741544363e-05, |
|
"loss": 0.0409, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.2133333333333334, |
|
"grad_norm": 0.30017754435539246, |
|
"learning_rate": 9.21900711333924e-05, |
|
"loss": 0.041, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.32493507862091064, |
|
"learning_rate": 9.211101026694994e-05, |
|
"loss": 0.0376, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.2311111111111113, |
|
"grad_norm": 0.30905264616012573, |
|
"learning_rate": 9.203158549931865e-05, |
|
"loss": 0.0377, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.3972308039665222, |
|
"learning_rate": 9.19517975168456e-05, |
|
"loss": 0.0384, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.2488888888888887, |
|
"grad_norm": 0.3591013252735138, |
|
"learning_rate": 9.18716470090165e-05, |
|
"loss": 0.0366, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.2577777777777777, |
|
"grad_norm": 0.2968563139438629, |
|
"learning_rate": 9.179113466844991e-05, |
|
"loss": 0.0421, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.2666666666666666, |
|
"grad_norm": 0.3402132987976074, |
|
"learning_rate": 9.171026119089106e-05, |
|
"loss": 0.0391, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 0.4288232922554016, |
|
"learning_rate": 9.162902727520599e-05, |
|
"loss": 0.0362, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.2844444444444445, |
|
"grad_norm": 0.3101450502872467, |
|
"learning_rate": 9.154743362337548e-05, |
|
"loss": 0.0419, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.2933333333333334, |
|
"grad_norm": 0.3307853639125824, |
|
"learning_rate": 9.14654809404889e-05, |
|
"loss": 0.0333, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.3022222222222224, |
|
"grad_norm": 0.3155650496482849, |
|
"learning_rate": 9.138316993473821e-05, |
|
"loss": 0.0396, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 0.27981093525886536, |
|
"learning_rate": 9.13005013174118e-05, |
|
"loss": 0.0376, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.3052055537700653, |
|
"learning_rate": 9.121747580288836e-05, |
|
"loss": 0.0334, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.328888888888889, |
|
"grad_norm": 0.362469345331192, |
|
"learning_rate": 9.113409410863069e-05, |
|
"loss": 0.0316, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.3377777777777777, |
|
"grad_norm": 0.37619614601135254, |
|
"learning_rate": 9.105035695517954e-05, |
|
"loss": 0.0345, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.3466666666666667, |
|
"grad_norm": 0.34524768590927124, |
|
"learning_rate": 9.096626506614728e-05, |
|
"loss": 0.0388, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.3555555555555556, |
|
"grad_norm": 0.3361116945743561, |
|
"learning_rate": 9.088181916821175e-05, |
|
"loss": 0.0394, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.3644444444444446, |
|
"grad_norm": 0.35816025733947754, |
|
"learning_rate": 9.079701999111001e-05, |
|
"loss": 0.0295, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.3733333333333335, |
|
"grad_norm": 0.3454771041870117, |
|
"learning_rate": 9.07118682676319e-05, |
|
"loss": 0.0444, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.3822222222222225, |
|
"grad_norm": 0.3092334270477295, |
|
"learning_rate": 9.062636473361376e-05, |
|
"loss": 0.0389, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.391111111111111, |
|
"grad_norm": 0.3155195116996765, |
|
"learning_rate": 9.05405101279322e-05, |
|
"loss": 0.0344, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.32855644822120667, |
|
"learning_rate": 9.045430519249749e-05, |
|
"loss": 0.0366, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.408888888888889, |
|
"grad_norm": 0.24201026558876038, |
|
"learning_rate": 9.036775067224734e-05, |
|
"loss": 0.0341, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 0.2772887349128723, |
|
"learning_rate": 9.028084731514034e-05, |
|
"loss": 0.0343, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.4266666666666667, |
|
"grad_norm": 0.2846852242946625, |
|
"learning_rate": 9.01935958721496e-05, |
|
"loss": 0.0339, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.4355555555555557, |
|
"grad_norm": 0.36860916018486023, |
|
"learning_rate": 9.010599709725615e-05, |
|
"loss": 0.0409, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.3374043405056, |
|
"learning_rate": 9.001805174744252e-05, |
|
"loss": 0.0371, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.453333333333333, |
|
"grad_norm": 0.3596254289150238, |
|
"learning_rate": 8.992976058268608e-05, |
|
"loss": 0.0463, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.462222222222222, |
|
"grad_norm": 0.3271300494670868, |
|
"learning_rate": 8.984112436595269e-05, |
|
"loss": 0.043, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.471111111111111, |
|
"grad_norm": 0.23201748728752136, |
|
"learning_rate": 8.975214386318984e-05, |
|
"loss": 0.0391, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.34638941287994385, |
|
"learning_rate": 8.966281984332024e-05, |
|
"loss": 0.0373, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 0.3061824440956116, |
|
"learning_rate": 8.957315307823502e-05, |
|
"loss": 0.0327, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.497777777777778, |
|
"grad_norm": 0.37074342370033264, |
|
"learning_rate": 8.948314434278719e-05, |
|
"loss": 0.0356, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.506666666666667, |
|
"grad_norm": 0.25076544284820557, |
|
"learning_rate": 8.939279441478489e-05, |
|
"loss": 0.0335, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.5155555555555553, |
|
"grad_norm": 0.3397284150123596, |
|
"learning_rate": 8.930210407498465e-05, |
|
"loss": 0.0398, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.5244444444444447, |
|
"grad_norm": 0.35481202602386475, |
|
"learning_rate": 8.921107410708464e-05, |
|
"loss": 0.0409, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.533333333333333, |
|
"grad_norm": 0.31643539667129517, |
|
"learning_rate": 8.911970529771793e-05, |
|
"loss": 0.0337, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.542222222222222, |
|
"grad_norm": 0.3066181242465973, |
|
"learning_rate": 8.902799843644572e-05, |
|
"loss": 0.0344, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.551111111111111, |
|
"grad_norm": 0.23725642263889313, |
|
"learning_rate": 8.89359543157504e-05, |
|
"loss": 0.0319, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.21943092346191406, |
|
"learning_rate": 8.884357373102885e-05, |
|
"loss": 0.0305, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.568888888888889, |
|
"grad_norm": 0.3394530117511749, |
|
"learning_rate": 8.875085748058545e-05, |
|
"loss": 0.0368, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.5777777777777775, |
|
"grad_norm": 0.3907650411128998, |
|
"learning_rate": 8.865780636562525e-05, |
|
"loss": 0.0377, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.586666666666667, |
|
"grad_norm": 0.33532536029815674, |
|
"learning_rate": 8.856442119024701e-05, |
|
"loss": 0.0385, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.5955555555555554, |
|
"grad_norm": 0.3615197241306305, |
|
"learning_rate": 8.847070276143627e-05, |
|
"loss": 0.0427, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.6044444444444443, |
|
"grad_norm": 0.31558263301849365, |
|
"learning_rate": 8.83766518890584e-05, |
|
"loss": 0.0336, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.6133333333333333, |
|
"grad_norm": 0.32211971282958984, |
|
"learning_rate": 8.82822693858515e-05, |
|
"loss": 0.0334, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.6222222222222222, |
|
"grad_norm": 0.27533215284347534, |
|
"learning_rate": 8.81875560674195e-05, |
|
"loss": 0.0346, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.631111111111111, |
|
"grad_norm": 0.3535873591899872, |
|
"learning_rate": 8.809251275222505e-05, |
|
"loss": 0.0366, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.3150239586830139, |
|
"learning_rate": 8.79971402615825e-05, |
|
"loss": 0.0384, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.648888888888889, |
|
"grad_norm": 0.3877773880958557, |
|
"learning_rate": 8.790143941965067e-05, |
|
"loss": 0.0417, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.6577777777777776, |
|
"grad_norm": 0.31549036502838135, |
|
"learning_rate": 8.780541105342592e-05, |
|
"loss": 0.0322, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.34817999601364136, |
|
"learning_rate": 8.770905599273482e-05, |
|
"loss": 0.0321, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.6755555555555555, |
|
"grad_norm": 0.28988024592399597, |
|
"learning_rate": 8.761237507022709e-05, |
|
"loss": 0.0386, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.6844444444444444, |
|
"grad_norm": 0.31453558802604675, |
|
"learning_rate": 8.75153691213684e-05, |
|
"loss": 0.038, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.6933333333333334, |
|
"grad_norm": 0.271332710981369, |
|
"learning_rate": 8.741803898443312e-05, |
|
"loss": 0.0374, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 0.22859227657318115, |
|
"learning_rate": 8.732038550049704e-05, |
|
"loss": 0.0327, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.7111111111111112, |
|
"grad_norm": 0.3633359372615814, |
|
"learning_rate": 8.72224095134302e-05, |
|
"loss": 0.0407, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 0.3309880495071411, |
|
"learning_rate": 8.712411186988952e-05, |
|
"loss": 0.031, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.728888888888889, |
|
"grad_norm": 0.3379570543766022, |
|
"learning_rate": 8.702549341931146e-05, |
|
"loss": 0.0403, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.7377777777777776, |
|
"grad_norm": 0.2955557703971863, |
|
"learning_rate": 8.692655501390483e-05, |
|
"loss": 0.0323, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.7466666666666666, |
|
"grad_norm": 0.29133856296539307, |
|
"learning_rate": 8.682729750864322e-05, |
|
"loss": 0.0367, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.7555555555555555, |
|
"grad_norm": 0.2095378190279007, |
|
"learning_rate": 8.672772176125777e-05, |
|
"loss": 0.033, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.7644444444444445, |
|
"grad_norm": 0.23368041217327118, |
|
"learning_rate": 8.66278286322297e-05, |
|
"loss": 0.0339, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.7733333333333334, |
|
"grad_norm": 0.33772075176239014, |
|
"learning_rate": 8.652761898478282e-05, |
|
"loss": 0.0355, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.7822222222222224, |
|
"grad_norm": 0.2612389922142029, |
|
"learning_rate": 8.64270936848762e-05, |
|
"loss": 0.0279, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.7911111111111113, |
|
"grad_norm": 0.29112932085990906, |
|
"learning_rate": 8.632625360119656e-05, |
|
"loss": 0.0324, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.26036763191223145, |
|
"learning_rate": 8.622509960515084e-05, |
|
"loss": 0.0446, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.8088888888888888, |
|
"grad_norm": 0.32259121537208557, |
|
"learning_rate": 8.612363257085865e-05, |
|
"loss": 0.038, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.8177777777777777, |
|
"grad_norm": 0.3676278293132782, |
|
"learning_rate": 8.602185337514467e-05, |
|
"loss": 0.0318, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.8266666666666667, |
|
"grad_norm": 0.23763470351696014, |
|
"learning_rate": 8.591976289753119e-05, |
|
"loss": 0.041, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.8355555555555556, |
|
"grad_norm": 0.3735649287700653, |
|
"learning_rate": 8.581736202023034e-05, |
|
"loss": 0.0376, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 0.35430651903152466, |
|
"learning_rate": 8.571465162813665e-05, |
|
"loss": 0.0398, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.8533333333333335, |
|
"grad_norm": 0.2871086597442627, |
|
"learning_rate": 8.561163260881926e-05, |
|
"loss": 0.0349, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.862222222222222, |
|
"grad_norm": 0.2968667447566986, |
|
"learning_rate": 8.550830585251432e-05, |
|
"loss": 0.0352, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.871111111111111, |
|
"grad_norm": 0.38278257846832275, |
|
"learning_rate": 8.540467225211728e-05, |
|
"loss": 0.0421, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.3006772994995117, |
|
"learning_rate": 8.530073270317516e-05, |
|
"loss": 0.0354, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.3274925947189331, |
|
"learning_rate": 8.519648810387888e-05, |
|
"loss": 0.0339, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.897777777777778, |
|
"grad_norm": 0.340465247631073, |
|
"learning_rate": 8.509193935505537e-05, |
|
"loss": 0.0337, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.9066666666666667, |
|
"grad_norm": 0.32372379302978516, |
|
"learning_rate": 8.498708736015991e-05, |
|
"loss": 0.0321, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.9155555555555557, |
|
"grad_norm": 0.2627938687801361, |
|
"learning_rate": 8.488193302526825e-05, |
|
"loss": 0.0329, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.924444444444444, |
|
"grad_norm": 0.3578833341598511, |
|
"learning_rate": 8.477647725906883e-05, |
|
"loss": 0.0295, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.9333333333333336, |
|
"grad_norm": 0.25700679421424866, |
|
"learning_rate": 8.467072097285486e-05, |
|
"loss": 0.0338, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.942222222222222, |
|
"grad_norm": 0.30343925952911377, |
|
"learning_rate": 8.456466508051655e-05, |
|
"loss": 0.03, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.951111111111111, |
|
"grad_norm": 0.23489569127559662, |
|
"learning_rate": 8.445831049853305e-05, |
|
"loss": 0.034, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.28458407521247864, |
|
"learning_rate": 8.435165814596474e-05, |
|
"loss": 0.0365, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.968888888888889, |
|
"grad_norm": 0.2304985672235489, |
|
"learning_rate": 8.424470894444506e-05, |
|
"loss": 0.028, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.977777777777778, |
|
"grad_norm": 0.2949499487876892, |
|
"learning_rate": 8.413746381817278e-05, |
|
"loss": 0.0354, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 0.2686779499053955, |
|
"learning_rate": 8.402992369390384e-05, |
|
"loss": 0.0289, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.9955555555555557, |
|
"grad_norm": 0.3748651146888733, |
|
"learning_rate": 8.392208950094335e-05, |
|
"loss": 0.0321, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 3.0044444444444443, |
|
"grad_norm": 0.318801611661911, |
|
"learning_rate": 8.381396217113769e-05, |
|
"loss": 0.0366, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 3.013333333333333, |
|
"grad_norm": 0.3406936824321747, |
|
"learning_rate": 8.370554263886635e-05, |
|
"loss": 0.0322, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 3.022222222222222, |
|
"grad_norm": 0.26772964000701904, |
|
"learning_rate": 8.359683184103386e-05, |
|
"loss": 0.03, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.031111111111111, |
|
"grad_norm": 0.21105527877807617, |
|
"learning_rate": 8.34878307170617e-05, |
|
"loss": 0.0316, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.2714666724205017, |
|
"learning_rate": 8.337854020888025e-05, |
|
"loss": 0.0319, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 3.048888888888889, |
|
"grad_norm": 0.32699909806251526, |
|
"learning_rate": 8.326896126092057e-05, |
|
"loss": 0.0322, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 3.057777777777778, |
|
"grad_norm": 0.3508802652359009, |
|
"learning_rate": 8.315909482010622e-05, |
|
"loss": 0.0341, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 3.066666666666667, |
|
"grad_norm": 0.29050078988075256, |
|
"learning_rate": 8.304894183584519e-05, |
|
"loss": 0.0427, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 3.0755555555555554, |
|
"grad_norm": 0.20840810239315033, |
|
"learning_rate": 8.29385032600216e-05, |
|
"loss": 0.0297, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 3.0844444444444443, |
|
"grad_norm": 0.4056035578250885, |
|
"learning_rate": 8.282778004698748e-05, |
|
"loss": 0.0316, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 3.0933333333333333, |
|
"grad_norm": 0.30306631326675415, |
|
"learning_rate": 8.271677315355459e-05, |
|
"loss": 0.032, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 3.102222222222222, |
|
"grad_norm": 0.24960987269878387, |
|
"learning_rate": 8.260548353898607e-05, |
|
"loss": 0.0316, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 0.35323524475097656, |
|
"learning_rate": 8.249391216498822e-05, |
|
"loss": 0.0345, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.23032651841640472, |
|
"learning_rate": 8.238205999570212e-05, |
|
"loss": 0.0339, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 3.128888888888889, |
|
"grad_norm": 0.3570566773414612, |
|
"learning_rate": 8.226992799769532e-05, |
|
"loss": 0.0366, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 3.137777777777778, |
|
"grad_norm": 0.5721020698547363, |
|
"learning_rate": 8.215751713995361e-05, |
|
"loss": 0.0379, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 3.1466666666666665, |
|
"grad_norm": 0.35582879185676575, |
|
"learning_rate": 8.204482839387241e-05, |
|
"loss": 0.0316, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.1555555555555554, |
|
"grad_norm": 0.32101866602897644, |
|
"learning_rate": 8.193186273324858e-05, |
|
"loss": 0.0303, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.1644444444444444, |
|
"grad_norm": 0.2948249280452728, |
|
"learning_rate": 8.181862113427187e-05, |
|
"loss": 0.0375, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 3.1733333333333333, |
|
"grad_norm": 0.27720171213150024, |
|
"learning_rate": 8.170510457551664e-05, |
|
"loss": 0.0359, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.1822222222222223, |
|
"grad_norm": 0.2581128776073456, |
|
"learning_rate": 8.15913140379332e-05, |
|
"loss": 0.0378, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 3.1911111111111112, |
|
"grad_norm": 0.42939019203186035, |
|
"learning_rate": 8.147725050483953e-05, |
|
"loss": 0.0346, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.23781177401542664, |
|
"learning_rate": 8.136291496191263e-05, |
|
"loss": 0.0297, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.2088888888888887, |
|
"grad_norm": 0.3163319230079651, |
|
"learning_rate": 8.12483083971801e-05, |
|
"loss": 0.0296, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 3.2177777777777776, |
|
"grad_norm": 0.25159966945648193, |
|
"learning_rate": 8.113343180101156e-05, |
|
"loss": 0.0384, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 3.2266666666666666, |
|
"grad_norm": 0.30123138427734375, |
|
"learning_rate": 8.101828616611008e-05, |
|
"loss": 0.0333, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 3.2355555555555555, |
|
"grad_norm": 0.34495824575424194, |
|
"learning_rate": 8.090287248750365e-05, |
|
"loss": 0.033, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 3.2444444444444445, |
|
"grad_norm": 0.25632116198539734, |
|
"learning_rate": 8.078719176253657e-05, |
|
"loss": 0.0286, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.2533333333333334, |
|
"grad_norm": 0.29117098450660706, |
|
"learning_rate": 8.067124499086074e-05, |
|
"loss": 0.033, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 3.2622222222222224, |
|
"grad_norm": 0.2838457524776459, |
|
"learning_rate": 8.055503317442716e-05, |
|
"loss": 0.034, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 3.2711111111111113, |
|
"grad_norm": 0.28109484910964966, |
|
"learning_rate": 8.043855731747718e-05, |
|
"loss": 0.0363, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 3.2800000000000002, |
|
"grad_norm": 0.2678098976612091, |
|
"learning_rate": 8.032181842653388e-05, |
|
"loss": 0.041, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 3.2888888888888888, |
|
"grad_norm": 0.2752769887447357, |
|
"learning_rate": 8.02048175103933e-05, |
|
"loss": 0.0311, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.2977777777777777, |
|
"grad_norm": 0.2947790026664734, |
|
"learning_rate": 8.008755558011577e-05, |
|
"loss": 0.0364, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 3.3066666666666666, |
|
"grad_norm": 0.2984977960586548, |
|
"learning_rate": 7.997003364901723e-05, |
|
"loss": 0.0328, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.3155555555555556, |
|
"grad_norm": 0.2523363530635834, |
|
"learning_rate": 7.98522527326603e-05, |
|
"loss": 0.0316, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 3.3244444444444445, |
|
"grad_norm": 0.29454678297042847, |
|
"learning_rate": 7.973421384884571e-05, |
|
"loss": 0.034, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.28193044662475586, |
|
"learning_rate": 7.961591801760337e-05, |
|
"loss": 0.0294, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.3422222222222224, |
|
"grad_norm": 0.24379950761795044, |
|
"learning_rate": 7.949736626118359e-05, |
|
"loss": 0.0364, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 3.351111111111111, |
|
"grad_norm": 0.31770026683807373, |
|
"learning_rate": 7.937855960404825e-05, |
|
"loss": 0.032, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"grad_norm": 0.2881859242916107, |
|
"learning_rate": 7.925949907286197e-05, |
|
"loss": 0.031, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 3.368888888888889, |
|
"grad_norm": 0.3091081380844116, |
|
"learning_rate": 7.91401856964832e-05, |
|
"loss": 0.0413, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 3.3777777777777778, |
|
"grad_norm": 0.2018543779850006, |
|
"learning_rate": 7.902062050595536e-05, |
|
"loss": 0.0377, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.3866666666666667, |
|
"grad_norm": 0.30731314420700073, |
|
"learning_rate": 7.890080453449788e-05, |
|
"loss": 0.0325, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 3.3955555555555557, |
|
"grad_norm": 0.3453946113586426, |
|
"learning_rate": 7.878073881749732e-05, |
|
"loss": 0.0311, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 3.4044444444444446, |
|
"grad_norm": 0.3527245819568634, |
|
"learning_rate": 7.866042439249846e-05, |
|
"loss": 0.0333, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 3.413333333333333, |
|
"grad_norm": 0.3628523349761963, |
|
"learning_rate": 7.853986229919521e-05, |
|
"loss": 0.0292, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 3.422222222222222, |
|
"grad_norm": 0.31253212690353394, |
|
"learning_rate": 7.841905357942174e-05, |
|
"loss": 0.0271, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 3.431111111111111, |
|
"grad_norm": 0.378937304019928, |
|
"learning_rate": 7.829799927714343e-05, |
|
"loss": 0.0389, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.2748241722583771, |
|
"learning_rate": 7.817670043844785e-05, |
|
"loss": 0.0307, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 3.448888888888889, |
|
"grad_norm": 0.28134390711784363, |
|
"learning_rate": 7.805515811153574e-05, |
|
"loss": 0.0356, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 3.457777777777778, |
|
"grad_norm": 0.24201825261116028, |
|
"learning_rate": 7.793337334671189e-05, |
|
"loss": 0.0258, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 3.466666666666667, |
|
"grad_norm": 0.2448202222585678, |
|
"learning_rate": 7.78113471963762e-05, |
|
"loss": 0.0335, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.4755555555555557, |
|
"grad_norm": 0.265367716550827, |
|
"learning_rate": 7.768908071501438e-05, |
|
"loss": 0.0288, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 3.4844444444444447, |
|
"grad_norm": 0.28007182478904724, |
|
"learning_rate": 7.756657495918906e-05, |
|
"loss": 0.0241, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 3.493333333333333, |
|
"grad_norm": 0.2705933451652527, |
|
"learning_rate": 7.74438309875305e-05, |
|
"loss": 0.0328, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 3.502222222222222, |
|
"grad_norm": 0.2906828820705414, |
|
"learning_rate": 7.732084986072751e-05, |
|
"loss": 0.0302, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 3.511111111111111, |
|
"grad_norm": 0.3073039650917053, |
|
"learning_rate": 7.719763264151826e-05, |
|
"loss": 0.0343, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 0.35828664898872375, |
|
"learning_rate": 7.70741803946811e-05, |
|
"loss": 0.03, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 3.528888888888889, |
|
"grad_norm": 0.30561575293540955, |
|
"learning_rate": 7.695049418702541e-05, |
|
"loss": 0.0327, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 3.537777777777778, |
|
"grad_norm": 0.24497006833553314, |
|
"learning_rate": 7.682657508738227e-05, |
|
"loss": 0.0347, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 3.546666666666667, |
|
"grad_norm": 0.28651517629623413, |
|
"learning_rate": 7.670242416659535e-05, |
|
"loss": 0.04, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 0.2008180320262909, |
|
"learning_rate": 7.657804249751154e-05, |
|
"loss": 0.0361, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.5644444444444443, |
|
"grad_norm": 0.3066563308238983, |
|
"learning_rate": 7.645343115497179e-05, |
|
"loss": 0.0349, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 3.5733333333333333, |
|
"grad_norm": 0.4057365357875824, |
|
"learning_rate": 7.632859121580174e-05, |
|
"loss": 0.0321, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 3.582222222222222, |
|
"grad_norm": 0.413873553276062, |
|
"learning_rate": 7.620352375880243e-05, |
|
"loss": 0.0413, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 3.591111111111111, |
|
"grad_norm": 0.3336070775985718, |
|
"learning_rate": 7.607822986474102e-05, |
|
"loss": 0.0319, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.3071659207344055, |
|
"learning_rate": 7.59527106163414e-05, |
|
"loss": 0.0341, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.608888888888889, |
|
"grad_norm": 0.32620692253112793, |
|
"learning_rate": 7.582696709827486e-05, |
|
"loss": 0.0289, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 3.6177777777777775, |
|
"grad_norm": 0.23675960302352905, |
|
"learning_rate": 7.57010003971507e-05, |
|
"loss": 0.0269, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 3.626666666666667, |
|
"grad_norm": 0.29968374967575073, |
|
"learning_rate": 7.557481160150686e-05, |
|
"loss": 0.036, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 3.6355555555555554, |
|
"grad_norm": 0.19663968682289124, |
|
"learning_rate": 7.54484018018005e-05, |
|
"loss": 0.0285, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 3.6444444444444444, |
|
"grad_norm": 0.26385265588760376, |
|
"learning_rate": 7.532177209039859e-05, |
|
"loss": 0.0339, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.6533333333333333, |
|
"grad_norm": 0.2986868619918823, |
|
"learning_rate": 7.519492356156845e-05, |
|
"loss": 0.0283, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 3.6622222222222223, |
|
"grad_norm": 0.2307564616203308, |
|
"learning_rate": 7.506785731146831e-05, |
|
"loss": 0.0272, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 3.671111111111111, |
|
"grad_norm": 0.28506651520729065, |
|
"learning_rate": 7.494057443813783e-05, |
|
"loss": 0.0277, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 0.3333166837692261, |
|
"learning_rate": 7.481307604148862e-05, |
|
"loss": 0.0311, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 3.688888888888889, |
|
"grad_norm": 0.2694551348686218, |
|
"learning_rate": 7.468536322329471e-05, |
|
"loss": 0.0407, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.6977777777777776, |
|
"grad_norm": 0.25156208872795105, |
|
"learning_rate": 7.455743708718308e-05, |
|
"loss": 0.0332, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.7066666666666666, |
|
"grad_norm": 0.2539307773113251, |
|
"learning_rate": 7.442929873862406e-05, |
|
"loss": 0.0301, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 3.7155555555555555, |
|
"grad_norm": 0.2578217089176178, |
|
"learning_rate": 7.430094928492185e-05, |
|
"loss": 0.0304, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 3.7244444444444444, |
|
"grad_norm": 0.22169755399227142, |
|
"learning_rate": 7.417238983520484e-05, |
|
"loss": 0.0296, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 0.25318798422813416, |
|
"learning_rate": 7.404362150041618e-05, |
|
"loss": 0.0328, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.7422222222222223, |
|
"grad_norm": 0.3096628487110138, |
|
"learning_rate": 7.391464539330404e-05, |
|
"loss": 0.0308, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.7511111111111113, |
|
"grad_norm": 0.2936866283416748, |
|
"learning_rate": 7.378546262841203e-05, |
|
"loss": 0.0338, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.34558936953544617, |
|
"learning_rate": 7.365607432206966e-05, |
|
"loss": 0.0336, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.7688888888888887, |
|
"grad_norm": 0.2456526756286621, |
|
"learning_rate": 7.352648159238255e-05, |
|
"loss": 0.0289, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 0.23498155176639557, |
|
"learning_rate": 7.339668555922288e-05, |
|
"loss": 0.0306, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.7866666666666666, |
|
"grad_norm": 0.28147223591804504, |
|
"learning_rate": 7.326668734421967e-05, |
|
"loss": 0.0302, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.7955555555555556, |
|
"grad_norm": 0.3440922498703003, |
|
"learning_rate": 7.313648807074904e-05, |
|
"loss": 0.032, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.8044444444444445, |
|
"grad_norm": 0.2751706838607788, |
|
"learning_rate": 7.300608886392465e-05, |
|
"loss": 0.0291, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.8133333333333335, |
|
"grad_norm": 0.28021329641342163, |
|
"learning_rate": 7.287549085058779e-05, |
|
"loss": 0.0341, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.822222222222222, |
|
"grad_norm": 0.21772396564483643, |
|
"learning_rate": 7.274469515929775e-05, |
|
"loss": 0.0317, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.8311111111111114, |
|
"grad_norm": 0.2639859914779663, |
|
"learning_rate": 7.261370292032208e-05, |
|
"loss": 0.0315, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.3674484193325043, |
|
"learning_rate": 7.248251526562677e-05, |
|
"loss": 0.0255, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.848888888888889, |
|
"grad_norm": 0.2838074266910553, |
|
"learning_rate": 7.235113332886647e-05, |
|
"loss": 0.0321, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 3.8577777777777778, |
|
"grad_norm": 0.28545433282852173, |
|
"learning_rate": 7.221955824537475e-05, |
|
"loss": 0.0292, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.8666666666666667, |
|
"grad_norm": 0.2666172385215759, |
|
"learning_rate": 7.208779115215425e-05, |
|
"loss": 0.0314, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.8755555555555556, |
|
"grad_norm": 0.4304070472717285, |
|
"learning_rate": 7.195583318786682e-05, |
|
"loss": 0.0305, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.8844444444444446, |
|
"grad_norm": 0.29872727394104004, |
|
"learning_rate": 7.182368549282375e-05, |
|
"loss": 0.0325, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 3.8933333333333335, |
|
"grad_norm": 0.23472265899181366, |
|
"learning_rate": 7.169134920897588e-05, |
|
"loss": 0.0294, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.902222222222222, |
|
"grad_norm": 0.24736790359020233, |
|
"learning_rate": 7.155882547990373e-05, |
|
"loss": 0.0278, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 3.911111111111111, |
|
"grad_norm": 0.3307061493396759, |
|
"learning_rate": 7.142611545080761e-05, |
|
"loss": 0.0379, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.23942095041275024, |
|
"learning_rate": 7.129322026849776e-05, |
|
"loss": 0.0339, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 3.928888888888889, |
|
"grad_norm": 0.2947057783603668, |
|
"learning_rate": 7.116014108138441e-05, |
|
"loss": 0.0302, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.937777777777778, |
|
"grad_norm": 0.15690277516841888, |
|
"learning_rate": 7.102687903946786e-05, |
|
"loss": 0.0291, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 3.9466666666666668, |
|
"grad_norm": 0.2776757776737213, |
|
"learning_rate": 7.089343529432852e-05, |
|
"loss": 0.0355, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.9555555555555557, |
|
"grad_norm": 0.23891988396644592, |
|
"learning_rate": 7.075981099911704e-05, |
|
"loss": 0.0301, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.964444444444444, |
|
"grad_norm": 0.24151529371738434, |
|
"learning_rate": 7.062600730854424e-05, |
|
"loss": 0.0315, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.9733333333333336, |
|
"grad_norm": 0.2382468432188034, |
|
"learning_rate": 7.049202537887121e-05, |
|
"loss": 0.0384, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 3.982222222222222, |
|
"grad_norm": 0.20705050230026245, |
|
"learning_rate": 7.035786636789923e-05, |
|
"loss": 0.0305, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.991111111111111, |
|
"grad_norm": 0.3636414110660553, |
|
"learning_rate": 7.022353143495993e-05, |
|
"loss": 0.0264, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.9779302477836609, |
|
"learning_rate": 7.008902174090507e-05, |
|
"loss": 0.038, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0088888888888885, |
|
"grad_norm": 0.28130674362182617, |
|
"learning_rate": 6.995433844809664e-05, |
|
"loss": 0.0318, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 4.017777777777778, |
|
"grad_norm": 0.3121441602706909, |
|
"learning_rate": 6.981948272039678e-05, |
|
"loss": 0.0257, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 4.026666666666666, |
|
"grad_norm": 0.1995745152235031, |
|
"learning_rate": 6.968445572315773e-05, |
|
"loss": 0.0345, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 4.035555555555556, |
|
"grad_norm": 0.2579461932182312, |
|
"learning_rate": 6.954925862321171e-05, |
|
"loss": 0.0377, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 4.044444444444444, |
|
"grad_norm": 0.23612239956855774, |
|
"learning_rate": 6.941389258886093e-05, |
|
"loss": 0.0227, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 4.053333333333334, |
|
"grad_norm": 0.34935837984085083, |
|
"learning_rate": 6.927835878986741e-05, |
|
"loss": 0.0317, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 4.062222222222222, |
|
"grad_norm": 0.28258001804351807, |
|
"learning_rate": 6.914265839744291e-05, |
|
"loss": 0.029, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 4.071111111111111, |
|
"grad_norm": 0.23054401576519012, |
|
"learning_rate": 6.900679258423882e-05, |
|
"loss": 0.039, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.33513006567955017, |
|
"learning_rate": 6.8870762524336e-05, |
|
"loss": 0.0293, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 4.088888888888889, |
|
"grad_norm": 0.260124146938324, |
|
"learning_rate": 6.87345693932346e-05, |
|
"loss": 0.0292, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.097777777777778, |
|
"grad_norm": 0.303379625082016, |
|
"learning_rate": 6.859821436784402e-05, |
|
"loss": 0.0256, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 4.1066666666666665, |
|
"grad_norm": 0.26939424872398376, |
|
"learning_rate": 6.846169862647261e-05, |
|
"loss": 0.0369, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 4.115555555555556, |
|
"grad_norm": 0.35925671458244324, |
|
"learning_rate": 6.832502334881758e-05, |
|
"loss": 0.0287, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 4.124444444444444, |
|
"grad_norm": 0.2849293649196625, |
|
"learning_rate": 6.818818971595474e-05, |
|
"loss": 0.0288, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 4.133333333333334, |
|
"grad_norm": 0.3612135350704193, |
|
"learning_rate": 6.805119891032834e-05, |
|
"loss": 0.0277, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 4.142222222222222, |
|
"grad_norm": 0.3013782501220703, |
|
"learning_rate": 6.791405211574083e-05, |
|
"loss": 0.0337, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 4.151111111111111, |
|
"grad_norm": 0.33569642901420593, |
|
"learning_rate": 6.77767505173426e-05, |
|
"loss": 0.0321, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.2754763662815094, |
|
"learning_rate": 6.763929530162185e-05, |
|
"loss": 0.035, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 4.168888888888889, |
|
"grad_norm": 0.27353253960609436, |
|
"learning_rate": 6.750168765639419e-05, |
|
"loss": 0.0314, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 4.177777777777778, |
|
"grad_norm": 0.27372586727142334, |
|
"learning_rate": 6.736392877079246e-05, |
|
"loss": 0.0305, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.1866666666666665, |
|
"grad_norm": 0.35082343220710754, |
|
"learning_rate": 6.72260198352565e-05, |
|
"loss": 0.0346, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 4.195555555555556, |
|
"grad_norm": 0.3252710998058319, |
|
"learning_rate": 6.708796204152269e-05, |
|
"loss": 0.0281, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 4.204444444444444, |
|
"grad_norm": 0.41605478525161743, |
|
"learning_rate": 6.694975658261387e-05, |
|
"loss": 0.0337, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 4.213333333333333, |
|
"grad_norm": 0.3029637336730957, |
|
"learning_rate": 6.681140465282887e-05, |
|
"loss": 0.0309, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 0.2987290918827057, |
|
"learning_rate": 6.667290744773226e-05, |
|
"loss": 0.0275, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.231111111111111, |
|
"grad_norm": 0.26917970180511475, |
|
"learning_rate": 6.653426616414397e-05, |
|
"loss": 0.0276, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.32736000418663025, |
|
"learning_rate": 6.639548200012908e-05, |
|
"loss": 0.0284, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 4.248888888888889, |
|
"grad_norm": 0.30274200439453125, |
|
"learning_rate": 6.62565561549872e-05, |
|
"loss": 0.0306, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 4.257777777777778, |
|
"grad_norm": 0.39345672726631165, |
|
"learning_rate": 6.611748982924247e-05, |
|
"loss": 0.032, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 0.2618786692619324, |
|
"learning_rate": 6.59782842246328e-05, |
|
"loss": 0.0264, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.275555555555556, |
|
"grad_norm": 0.2741163969039917, |
|
"learning_rate": 6.583894054409983e-05, |
|
"loss": 0.033, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 4.2844444444444445, |
|
"grad_norm": 0.30024799704551697, |
|
"learning_rate": 6.569945999177828e-05, |
|
"loss": 0.0345, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 4.293333333333333, |
|
"grad_norm": 0.23707608878612518, |
|
"learning_rate": 6.55598437729857e-05, |
|
"loss": 0.0308, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 4.302222222222222, |
|
"grad_norm": 0.2317287176847458, |
|
"learning_rate": 6.542009309421195e-05, |
|
"loss": 0.0292, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 4.311111111111111, |
|
"grad_norm": 0.23210635781288147, |
|
"learning_rate": 6.528020916310888e-05, |
|
"loss": 0.0254, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 0.24638275802135468, |
|
"learning_rate": 6.51401931884798e-05, |
|
"loss": 0.036, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 4.328888888888889, |
|
"grad_norm": 0.24534523487091064, |
|
"learning_rate": 6.500004638026905e-05, |
|
"loss": 0.0323, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 4.337777777777778, |
|
"grad_norm": 0.27173087000846863, |
|
"learning_rate": 6.485976994955161e-05, |
|
"loss": 0.0295, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 4.346666666666667, |
|
"grad_norm": 0.3001212477684021, |
|
"learning_rate": 6.471936510852257e-05, |
|
"loss": 0.0294, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 4.355555555555555, |
|
"grad_norm": 0.30450788140296936, |
|
"learning_rate": 6.457883307048665e-05, |
|
"loss": 0.0284, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.364444444444445, |
|
"grad_norm": 0.3031301200389862, |
|
"learning_rate": 6.443817504984782e-05, |
|
"loss": 0.031, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 4.373333333333333, |
|
"grad_norm": 0.3478601574897766, |
|
"learning_rate": 6.429739226209861e-05, |
|
"loss": 0.0281, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 4.3822222222222225, |
|
"grad_norm": 0.242881640791893, |
|
"learning_rate": 6.415648592380983e-05, |
|
"loss": 0.0214, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 4.391111111111111, |
|
"grad_norm": 0.22651013731956482, |
|
"learning_rate": 6.401545725261986e-05, |
|
"loss": 0.0299, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.18639731407165527, |
|
"learning_rate": 6.38743074672243e-05, |
|
"loss": 0.0327, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 4.408888888888889, |
|
"grad_norm": 0.2530800402164459, |
|
"learning_rate": 6.373303778736526e-05, |
|
"loss": 0.0351, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 4.417777777777777, |
|
"grad_norm": 0.24797551333904266, |
|
"learning_rate": 6.3591649433821e-05, |
|
"loss": 0.0272, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 4.426666666666667, |
|
"grad_norm": 0.30934616923332214, |
|
"learning_rate": 6.345014362839528e-05, |
|
"loss": 0.0267, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 4.435555555555555, |
|
"grad_norm": 0.21211177110671997, |
|
"learning_rate": 6.330852159390675e-05, |
|
"loss": 0.0253, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.23755398392677307, |
|
"learning_rate": 6.316678455417854e-05, |
|
"loss": 0.0276, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.453333333333333, |
|
"grad_norm": 0.3287028968334198, |
|
"learning_rate": 6.302493373402754e-05, |
|
"loss": 0.0271, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 4.4622222222222225, |
|
"grad_norm": 0.23436780273914337, |
|
"learning_rate": 6.288297035925389e-05, |
|
"loss": 0.0262, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 4.471111111111111, |
|
"grad_norm": 0.24987344443798065, |
|
"learning_rate": 6.274089565663035e-05, |
|
"loss": 0.0316, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.25846901535987854, |
|
"learning_rate": 6.259871085389174e-05, |
|
"loss": 0.0307, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 4.488888888888889, |
|
"grad_norm": 0.3026222586631775, |
|
"learning_rate": 6.24564171797243e-05, |
|
"loss": 0.0319, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 4.497777777777777, |
|
"grad_norm": 0.2525983452796936, |
|
"learning_rate": 6.231401586375507e-05, |
|
"loss": 0.0264, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 4.506666666666667, |
|
"grad_norm": 0.2488236427307129, |
|
"learning_rate": 6.21715081365413e-05, |
|
"loss": 0.0255, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 4.515555555555555, |
|
"grad_norm": 0.19766047596931458, |
|
"learning_rate": 6.202889522955974e-05, |
|
"loss": 0.0264, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 4.524444444444445, |
|
"grad_norm": 0.3547951877117157, |
|
"learning_rate": 6.18861783751961e-05, |
|
"loss": 0.0293, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 4.533333333333333, |
|
"grad_norm": 0.2230035811662674, |
|
"learning_rate": 6.174335880673432e-05, |
|
"loss": 0.0272, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.542222222222223, |
|
"grad_norm": 0.39101654291152954, |
|
"learning_rate": 6.160043775834594e-05, |
|
"loss": 0.0288, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 4.551111111111111, |
|
"grad_norm": 0.17163528501987457, |
|
"learning_rate": 6.145741646507948e-05, |
|
"loss": 0.0298, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 4.5600000000000005, |
|
"grad_norm": 0.22149497270584106, |
|
"learning_rate": 6.131429616284963e-05, |
|
"loss": 0.0268, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 4.568888888888889, |
|
"grad_norm": 0.29167938232421875, |
|
"learning_rate": 6.117107808842677e-05, |
|
"loss": 0.0248, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 4.5777777777777775, |
|
"grad_norm": 0.2699131369590759, |
|
"learning_rate": 6.1027763479426114e-05, |
|
"loss": 0.0273, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 4.586666666666667, |
|
"grad_norm": 0.22501330077648163, |
|
"learning_rate": 6.088435357429708e-05, |
|
"loss": 0.0257, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 4.595555555555555, |
|
"grad_norm": 0.27462032437324524, |
|
"learning_rate": 6.074084961231261e-05, |
|
"loss": 0.031, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 4.604444444444445, |
|
"grad_norm": 0.20761147141456604, |
|
"learning_rate": 6.0597252833558414e-05, |
|
"loss": 0.025, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 4.613333333333333, |
|
"grad_norm": 0.23973309993743896, |
|
"learning_rate": 6.045356447892229e-05, |
|
"loss": 0.0257, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 4.622222222222222, |
|
"grad_norm": 0.3320258557796478, |
|
"learning_rate": 6.030978579008335e-05, |
|
"loss": 0.0289, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 4.631111111111111, |
|
"grad_norm": 0.24688801169395447, |
|
"learning_rate": 6.01659180095014e-05, |
|
"loss": 0.032, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"grad_norm": 0.2569980025291443, |
|
"learning_rate": 6.002196238040605e-05, |
|
"loss": 0.0355, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 4.648888888888889, |
|
"grad_norm": 0.30560818314552307, |
|
"learning_rate": 5.9877920146786106e-05, |
|
"loss": 0.0281, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 4.657777777777778, |
|
"grad_norm": 0.2636353373527527, |
|
"learning_rate": 5.973379255337874e-05, |
|
"loss": 0.0322, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 0.1854434758424759, |
|
"learning_rate": 5.9589580845658756e-05, |
|
"loss": 0.0268, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.6755555555555555, |
|
"grad_norm": 0.24719204008579254, |
|
"learning_rate": 5.944528626982786e-05, |
|
"loss": 0.0266, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 4.684444444444445, |
|
"grad_norm": 0.2648203670978546, |
|
"learning_rate": 5.9300910072803804e-05, |
|
"loss": 0.0323, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 4.693333333333333, |
|
"grad_norm": 0.363930344581604, |
|
"learning_rate": 5.9156453502209744e-05, |
|
"loss": 0.0284, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 4.702222222222222, |
|
"grad_norm": 0.3846636712551117, |
|
"learning_rate": 5.901191780636331e-05, |
|
"loss": 0.0264, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 4.711111111111111, |
|
"grad_norm": 0.2667284309864044, |
|
"learning_rate": 5.886730423426592e-05, |
|
"loss": 0.0276, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.21568116545677185, |
|
"learning_rate": 5.872261403559195e-05, |
|
"loss": 0.0283, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 4.728888888888889, |
|
"grad_norm": 0.24188649654388428, |
|
"learning_rate": 5.857784846067799e-05, |
|
"loss": 0.0277, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 4.737777777777778, |
|
"grad_norm": 0.2591087818145752, |
|
"learning_rate": 5.843300876051191e-05, |
|
"loss": 0.0289, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 4.746666666666667, |
|
"grad_norm": 0.2975991368293762, |
|
"learning_rate": 5.82880961867222e-05, |
|
"loss": 0.0305, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 4.7555555555555555, |
|
"grad_norm": 0.3373737633228302, |
|
"learning_rate": 5.814311199156704e-05, |
|
"loss": 0.0285, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 4.764444444444445, |
|
"grad_norm": 0.23469702899456024, |
|
"learning_rate": 5.799805742792356e-05, |
|
"loss": 0.0262, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 4.773333333333333, |
|
"grad_norm": 0.31551775336265564, |
|
"learning_rate": 5.785293374927693e-05, |
|
"loss": 0.0288, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 4.782222222222222, |
|
"grad_norm": 0.23292727768421173, |
|
"learning_rate": 5.770774220970966e-05, |
|
"loss": 0.0275, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 4.791111111111111, |
|
"grad_norm": 0.2928922176361084, |
|
"learning_rate": 5.7562484063890577e-05, |
|
"loss": 0.0279, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.23222550749778748, |
|
"learning_rate": 5.741716056706416e-05, |
|
"loss": 0.0245, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.808888888888889, |
|
"grad_norm": 0.19032378494739532, |
|
"learning_rate": 5.727177297503956e-05, |
|
"loss": 0.028, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 4.817777777777778, |
|
"grad_norm": 0.27426227927207947, |
|
"learning_rate": 5.712632254417986e-05, |
|
"loss": 0.0267, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 4.826666666666666, |
|
"grad_norm": 0.28979581594467163, |
|
"learning_rate": 5.698081053139113e-05, |
|
"loss": 0.0257, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 4.835555555555556, |
|
"grad_norm": 0.2780580520629883, |
|
"learning_rate": 5.68352381941116e-05, |
|
"loss": 0.0263, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 4.844444444444444, |
|
"grad_norm": 0.17843307554721832, |
|
"learning_rate": 5.66896067903008e-05, |
|
"loss": 0.0263, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 4.8533333333333335, |
|
"grad_norm": 0.28419029712677, |
|
"learning_rate": 5.6543917578428675e-05, |
|
"loss": 0.0295, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 4.862222222222222, |
|
"grad_norm": 0.24868139624595642, |
|
"learning_rate": 5.639817181746473e-05, |
|
"loss": 0.0236, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 4.871111111111111, |
|
"grad_norm": 0.3074285387992859, |
|
"learning_rate": 5.6252370766867135e-05, |
|
"loss": 0.0281, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.27747851610183716, |
|
"learning_rate": 5.6106515686571815e-05, |
|
"loss": 0.0247, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 0.18275123834609985, |
|
"learning_rate": 5.596060783698165e-05, |
|
"loss": 0.0238, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.897777777777778, |
|
"grad_norm": 0.29003438353538513, |
|
"learning_rate": 5.581464847895545e-05, |
|
"loss": 0.029, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 4.906666666666666, |
|
"grad_norm": 0.19174236059188843, |
|
"learning_rate": 5.56686388737972e-05, |
|
"loss": 0.0271, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 4.915555555555556, |
|
"grad_norm": 0.24748900532722473, |
|
"learning_rate": 5.552258028324504e-05, |
|
"loss": 0.0328, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 4.924444444444444, |
|
"grad_norm": 0.20556414127349854, |
|
"learning_rate": 5.5376473969460474e-05, |
|
"loss": 0.0301, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 4.933333333333334, |
|
"grad_norm": 0.20509490370750427, |
|
"learning_rate": 5.5230321195017345e-05, |
|
"loss": 0.0216, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 4.942222222222222, |
|
"grad_norm": 0.2661460340023041, |
|
"learning_rate": 5.508412322289105e-05, |
|
"loss": 0.029, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 4.9511111111111115, |
|
"grad_norm": 0.29772576689720154, |
|
"learning_rate": 5.493788131644748e-05, |
|
"loss": 0.0338, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 0.2668423652648926, |
|
"learning_rate": 5.479159673943226e-05, |
|
"loss": 0.0291, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 4.968888888888889, |
|
"grad_norm": 0.18408338725566864, |
|
"learning_rate": 5.464527075595969e-05, |
|
"loss": 0.0282, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 4.977777777777778, |
|
"grad_norm": 0.1908939629793167, |
|
"learning_rate": 5.449890463050194e-05, |
|
"loss": 0.0266, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.986666666666666, |
|
"grad_norm": 0.2464345097541809, |
|
"learning_rate": 5.435249962787804e-05, |
|
"loss": 0.0292, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 4.995555555555556, |
|
"grad_norm": 0.23453561961650848, |
|
"learning_rate": 5.420605701324295e-05, |
|
"loss": 0.026, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 5.004444444444444, |
|
"grad_norm": 0.3007136881351471, |
|
"learning_rate": 5.405957805207669e-05, |
|
"loss": 0.0249, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 5.013333333333334, |
|
"grad_norm": 0.2784118950366974, |
|
"learning_rate": 5.391306401017335e-05, |
|
"loss": 0.0267, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 5.022222222222222, |
|
"grad_norm": 0.23861227929592133, |
|
"learning_rate": 5.376651615363018e-05, |
|
"loss": 0.0268, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 5.0311111111111115, |
|
"grad_norm": 0.34088486433029175, |
|
"learning_rate": 5.3619935748836635e-05, |
|
"loss": 0.0303, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.28462904691696167, |
|
"learning_rate": 5.34733240624634e-05, |
|
"loss": 0.0283, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 5.0488888888888885, |
|
"grad_norm": 0.32660117745399475, |
|
"learning_rate": 5.332668236145156e-05, |
|
"loss": 0.0242, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 5.057777777777778, |
|
"grad_norm": 0.30648064613342285, |
|
"learning_rate": 5.3180011913001485e-05, |
|
"loss": 0.0255, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 5.066666666666666, |
|
"grad_norm": 0.24275556206703186, |
|
"learning_rate": 5.3033313984562016e-05, |
|
"loss": 0.0231, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.075555555555556, |
|
"grad_norm": 0.24886679649353027, |
|
"learning_rate": 5.2886589843819446e-05, |
|
"loss": 0.027, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 5.084444444444444, |
|
"grad_norm": 0.27152377367019653, |
|
"learning_rate": 5.273984075868657e-05, |
|
"loss": 0.0239, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 5.093333333333334, |
|
"grad_norm": 0.22943544387817383, |
|
"learning_rate": 5.259306799729178e-05, |
|
"loss": 0.0263, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 5.102222222222222, |
|
"grad_norm": 0.2757958173751831, |
|
"learning_rate": 5.2446272827968014e-05, |
|
"loss": 0.035, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 5.111111111111111, |
|
"grad_norm": 0.31382718682289124, |
|
"learning_rate": 5.229945651924187e-05, |
|
"loss": 0.0271, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"grad_norm": 0.29276415705680847, |
|
"learning_rate": 5.2152620339822646e-05, |
|
"loss": 0.0241, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 5.128888888888889, |
|
"grad_norm": 0.26941701769828796, |
|
"learning_rate": 5.200576555859129e-05, |
|
"loss": 0.0263, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 5.137777777777778, |
|
"grad_norm": 0.2378821223974228, |
|
"learning_rate": 5.1858893444589576e-05, |
|
"loss": 0.0303, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 5.1466666666666665, |
|
"grad_norm": 0.2597375214099884, |
|
"learning_rate": 5.171200526700899e-05, |
|
"loss": 0.0288, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 5.155555555555556, |
|
"grad_norm": 0.18972061574459076, |
|
"learning_rate": 5.156510229517988e-05, |
|
"loss": 0.028, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 5.164444444444444, |
|
"grad_norm": 0.2856453061103821, |
|
"learning_rate": 5.141818579856038e-05, |
|
"loss": 0.0268, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 5.173333333333334, |
|
"grad_norm": 0.20504191517829895, |
|
"learning_rate": 5.1271257046725584e-05, |
|
"loss": 0.0241, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 5.182222222222222, |
|
"grad_norm": 0.18461136519908905, |
|
"learning_rate": 5.112431730935641e-05, |
|
"loss": 0.0228, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 5.191111111111111, |
|
"grad_norm": 0.2955813407897949, |
|
"learning_rate": 5.0977367856228764e-05, |
|
"loss": 0.0273, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 0.20536279678344727, |
|
"learning_rate": 5.083040995720244e-05, |
|
"loss": 0.0272, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 5.208888888888889, |
|
"grad_norm": 0.20190496742725372, |
|
"learning_rate": 5.068344488221032e-05, |
|
"loss": 0.0269, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 5.217777777777778, |
|
"grad_norm": 0.25095266103744507, |
|
"learning_rate": 5.053647390124718e-05, |
|
"loss": 0.0261, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 5.226666666666667, |
|
"grad_norm": 0.23401197791099548, |
|
"learning_rate": 5.038949828435894e-05, |
|
"loss": 0.0226, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 5.235555555555556, |
|
"grad_norm": 0.31305333971977234, |
|
"learning_rate": 5.0242519301631486e-05, |
|
"loss": 0.0282, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 5.2444444444444445, |
|
"grad_norm": 0.29324230551719666, |
|
"learning_rate": 5.0095538223179886e-05, |
|
"loss": 0.0367, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.253333333333333, |
|
"grad_norm": 0.21020615100860596, |
|
"learning_rate": 4.994855631913721e-05, |
|
"loss": 0.0242, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 5.262222222222222, |
|
"grad_norm": 0.2547796368598938, |
|
"learning_rate": 4.980157485964376e-05, |
|
"loss": 0.0265, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 5.271111111111111, |
|
"grad_norm": 0.2116064429283142, |
|
"learning_rate": 4.965459511483596e-05, |
|
"loss": 0.0275, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 0.2624657154083252, |
|
"learning_rate": 4.9507618354835386e-05, |
|
"loss": 0.0265, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 5.288888888888889, |
|
"grad_norm": 0.24334849417209625, |
|
"learning_rate": 4.936064584973788e-05, |
|
"loss": 0.0234, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 5.297777777777778, |
|
"grad_norm": 0.25470516085624695, |
|
"learning_rate": 4.9213678869602444e-05, |
|
"loss": 0.0267, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 5.306666666666667, |
|
"grad_norm": 0.2393457591533661, |
|
"learning_rate": 4.906671868444042e-05, |
|
"loss": 0.0264, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 5.315555555555555, |
|
"grad_norm": 0.20267800986766815, |
|
"learning_rate": 4.891976656420434e-05, |
|
"loss": 0.0268, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 5.3244444444444445, |
|
"grad_norm": 0.24776336550712585, |
|
"learning_rate": 4.877282377877714e-05, |
|
"loss": 0.0253, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.29501602053642273, |
|
"learning_rate": 4.8625891597960985e-05, |
|
"loss": 0.0317, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.342222222222222, |
|
"grad_norm": 0.26491251587867737, |
|
"learning_rate": 4.8478971291466506e-05, |
|
"loss": 0.0232, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 5.351111111111111, |
|
"grad_norm": 0.2548244893550873, |
|
"learning_rate": 4.8332064128901636e-05, |
|
"loss": 0.0254, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"grad_norm": 0.22577853500843048, |
|
"learning_rate": 4.8185171379760786e-05, |
|
"loss": 0.025, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 5.368888888888889, |
|
"grad_norm": 0.24568921327590942, |
|
"learning_rate": 4.8038294313413765e-05, |
|
"loss": 0.0274, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 5.377777777777778, |
|
"grad_norm": 0.22717340290546417, |
|
"learning_rate": 4.7891434199094904e-05, |
|
"loss": 0.025, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 5.386666666666667, |
|
"grad_norm": 0.27397602796554565, |
|
"learning_rate": 4.7744592305892e-05, |
|
"loss": 0.0255, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 5.395555555555555, |
|
"grad_norm": 0.22169044613838196, |
|
"learning_rate": 4.759776990273544e-05, |
|
"loss": 0.0343, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 5.404444444444445, |
|
"grad_norm": 0.2937365770339966, |
|
"learning_rate": 4.745096825838714e-05, |
|
"loss": 0.0235, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 5.413333333333333, |
|
"grad_norm": 0.25168099999427795, |
|
"learning_rate": 4.730418864142967e-05, |
|
"loss": 0.0216, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 5.4222222222222225, |
|
"grad_norm": 0.2527238130569458, |
|
"learning_rate": 4.715743232025523e-05, |
|
"loss": 0.0224, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.431111111111111, |
|
"grad_norm": 0.2228160798549652, |
|
"learning_rate": 4.70107005630547e-05, |
|
"loss": 0.0263, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"grad_norm": 0.21488693356513977, |
|
"learning_rate": 4.686399463780671e-05, |
|
"loss": 0.0255, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 5.448888888888889, |
|
"grad_norm": 0.23617248237133026, |
|
"learning_rate": 4.6717315812266685e-05, |
|
"loss": 0.019, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 5.457777777777777, |
|
"grad_norm": 0.26646044850349426, |
|
"learning_rate": 4.657066535395579e-05, |
|
"loss": 0.0225, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 5.466666666666667, |
|
"grad_norm": 0.27618274092674255, |
|
"learning_rate": 4.6424044530150165e-05, |
|
"loss": 0.0253, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 5.475555555555555, |
|
"grad_norm": 0.28857922554016113, |
|
"learning_rate": 4.627745460786977e-05, |
|
"loss": 0.0305, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 5.484444444444445, |
|
"grad_norm": 0.2334105223417282, |
|
"learning_rate": 4.613089685386758e-05, |
|
"loss": 0.0227, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 5.493333333333333, |
|
"grad_norm": 0.3289278745651245, |
|
"learning_rate": 4.598437253461858e-05, |
|
"loss": 0.0291, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 5.502222222222223, |
|
"grad_norm": 0.17898818850517273, |
|
"learning_rate": 4.5837882916308805e-05, |
|
"loss": 0.0253, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 5.511111111111111, |
|
"grad_norm": 0.2660018801689148, |
|
"learning_rate": 4.569142926482447e-05, |
|
"loss": 0.0279, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 0.23165705800056458, |
|
"learning_rate": 4.554501284574093e-05, |
|
"loss": 0.02, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 5.528888888888889, |
|
"grad_norm": 0.16429558396339417, |
|
"learning_rate": 4.5398634924311845e-05, |
|
"loss": 0.0246, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 5.5377777777777775, |
|
"grad_norm": 0.20626378059387207, |
|
"learning_rate": 4.5252296765458155e-05, |
|
"loss": 0.0249, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 5.546666666666667, |
|
"grad_norm": 0.21239334344863892, |
|
"learning_rate": 4.510599963375724e-05, |
|
"loss": 0.0271, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 0.35829076170921326, |
|
"learning_rate": 4.4959744793431906e-05, |
|
"loss": 0.0243, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.564444444444445, |
|
"grad_norm": 0.18468238413333893, |
|
"learning_rate": 4.4813533508339516e-05, |
|
"loss": 0.0284, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 5.573333333333333, |
|
"grad_norm": 0.3122689425945282, |
|
"learning_rate": 4.466736704196104e-05, |
|
"loss": 0.026, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 5.582222222222223, |
|
"grad_norm": 0.28802892565727234, |
|
"learning_rate": 4.4521246657390174e-05, |
|
"loss": 0.0259, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 5.591111111111111, |
|
"grad_norm": 0.2965587377548218, |
|
"learning_rate": 4.437517361732236e-05, |
|
"loss": 0.025, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 0.2500908374786377, |
|
"learning_rate": 4.422914918404397e-05, |
|
"loss": 0.0249, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.608888888888889, |
|
"grad_norm": 0.3020857274532318, |
|
"learning_rate": 4.408317461942126e-05, |
|
"loss": 0.0292, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 5.6177777777777775, |
|
"grad_norm": 0.19300854206085205, |
|
"learning_rate": 4.393725118488964e-05, |
|
"loss": 0.0218, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 5.626666666666667, |
|
"grad_norm": 0.18631188571453094, |
|
"learning_rate": 4.379138014144261e-05, |
|
"loss": 0.027, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 5.635555555555555, |
|
"grad_norm": 0.17751100659370422, |
|
"learning_rate": 4.364556274962097e-05, |
|
"loss": 0.0232, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 5.644444444444445, |
|
"grad_norm": 0.2798633575439453, |
|
"learning_rate": 4.349980026950187e-05, |
|
"loss": 0.0284, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 5.653333333333333, |
|
"grad_norm": 0.22250615060329437, |
|
"learning_rate": 4.335409396068797e-05, |
|
"loss": 0.0269, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 5.662222222222223, |
|
"grad_norm": 0.35000017285346985, |
|
"learning_rate": 4.3208445082296475e-05, |
|
"loss": 0.0246, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 5.671111111111111, |
|
"grad_norm": 0.33724892139434814, |
|
"learning_rate": 4.3062854892948365e-05, |
|
"loss": 0.0237, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"grad_norm": 0.24912554025650024, |
|
"learning_rate": 4.2917324650757465e-05, |
|
"loss": 0.0244, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 5.688888888888889, |
|
"grad_norm": 0.2757589519023895, |
|
"learning_rate": 4.277185561331948e-05, |
|
"loss": 0.0223, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 5.697777777777778, |
|
"grad_norm": 0.25983983278274536, |
|
"learning_rate": 4.262644903770134e-05, |
|
"loss": 0.0266, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 5.706666666666667, |
|
"grad_norm": 0.2947065830230713, |
|
"learning_rate": 4.248110618043007e-05, |
|
"loss": 0.0218, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 5.7155555555555555, |
|
"grad_norm": 0.23530763387680054, |
|
"learning_rate": 4.233582829748223e-05, |
|
"loss": 0.0198, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 5.724444444444444, |
|
"grad_norm": 0.2127206176519394, |
|
"learning_rate": 4.219061664427276e-05, |
|
"loss": 0.0249, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 5.733333333333333, |
|
"grad_norm": 0.2693474590778351, |
|
"learning_rate": 4.204547247564442e-05, |
|
"loss": 0.0226, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 5.742222222222222, |
|
"grad_norm": 0.22766007483005524, |
|
"learning_rate": 4.190039704585665e-05, |
|
"loss": 0.0311, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 5.751111111111111, |
|
"grad_norm": 0.30966290831565857, |
|
"learning_rate": 4.175539160857504e-05, |
|
"loss": 0.0232, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 0.25286442041397095, |
|
"learning_rate": 4.161045741686019e-05, |
|
"loss": 0.0265, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 5.768888888888889, |
|
"grad_norm": 0.17193371057510376, |
|
"learning_rate": 4.146559572315719e-05, |
|
"loss": 0.0196, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 5.777777777777778, |
|
"grad_norm": 0.2662070095539093, |
|
"learning_rate": 4.1320807779284486e-05, |
|
"loss": 0.0221, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.786666666666667, |
|
"grad_norm": 0.23293465375900269, |
|
"learning_rate": 4.117609483642336e-05, |
|
"loss": 0.0215, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 5.795555555555556, |
|
"grad_norm": 0.2291824072599411, |
|
"learning_rate": 4.103145814510684e-05, |
|
"loss": 0.0207, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 5.804444444444444, |
|
"grad_norm": 0.28128278255462646, |
|
"learning_rate": 4.088689895520915e-05, |
|
"loss": 0.024, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 5.8133333333333335, |
|
"grad_norm": 0.15259166061878204, |
|
"learning_rate": 4.0742418515934674e-05, |
|
"loss": 0.0213, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 5.822222222222222, |
|
"grad_norm": 0.25373372435569763, |
|
"learning_rate": 4.059801807580741e-05, |
|
"loss": 0.0236, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 5.831111111111111, |
|
"grad_norm": 0.20980583131313324, |
|
"learning_rate": 4.045369888265988e-05, |
|
"loss": 0.0302, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.15859052538871765, |
|
"learning_rate": 4.030946218362266e-05, |
|
"loss": 0.0217, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 5.848888888888889, |
|
"grad_norm": 0.15832751989364624, |
|
"learning_rate": 4.016530922511337e-05, |
|
"loss": 0.0191, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 5.857777777777778, |
|
"grad_norm": 0.18795259296894073, |
|
"learning_rate": 4.002124125282602e-05, |
|
"loss": 0.0246, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 5.866666666666667, |
|
"grad_norm": 0.21467606723308563, |
|
"learning_rate": 3.987725951172022e-05, |
|
"loss": 0.0216, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 5.875555555555556, |
|
"grad_norm": 0.24360638856887817, |
|
"learning_rate": 3.973336524601038e-05, |
|
"loss": 0.0219, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 5.884444444444444, |
|
"grad_norm": 0.21992219984531403, |
|
"learning_rate": 3.9589559699155034e-05, |
|
"loss": 0.0245, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 5.8933333333333335, |
|
"grad_norm": 0.3251780569553375, |
|
"learning_rate": 3.944584411384602e-05, |
|
"loss": 0.0235, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 5.902222222222222, |
|
"grad_norm": 0.23676486313343048, |
|
"learning_rate": 3.930221973199781e-05, |
|
"loss": 0.0251, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 5.911111111111111, |
|
"grad_norm": 0.234356090426445, |
|
"learning_rate": 3.915868779473671e-05, |
|
"loss": 0.0282, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 0.24023783206939697, |
|
"learning_rate": 3.9015249542390215e-05, |
|
"loss": 0.0234, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 5.928888888888888, |
|
"grad_norm": 0.22661913931369781, |
|
"learning_rate": 3.8871906214476175e-05, |
|
"loss": 0.0227, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 5.937777777777778, |
|
"grad_norm": 0.25377124547958374, |
|
"learning_rate": 3.872865904969222e-05, |
|
"loss": 0.0211, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 5.946666666666666, |
|
"grad_norm": 0.2356473058462143, |
|
"learning_rate": 3.8585509285904936e-05, |
|
"loss": 0.0272, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 5.955555555555556, |
|
"grad_norm": 0.2327507585287094, |
|
"learning_rate": 3.844245816013928e-05, |
|
"loss": 0.0186, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 5.964444444444444, |
|
"grad_norm": 0.23347589373588562, |
|
"learning_rate": 3.8299506908567754e-05, |
|
"loss": 0.0238, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 5.973333333333334, |
|
"grad_norm": 0.21290481090545654, |
|
"learning_rate": 3.815665676649989e-05, |
|
"loss": 0.0269, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 5.982222222222222, |
|
"grad_norm": 0.23502108454704285, |
|
"learning_rate": 3.801390896837139e-05, |
|
"loss": 0.0203, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 5.9911111111111115, |
|
"grad_norm": 0.23788826167583466, |
|
"learning_rate": 3.787126474773364e-05, |
|
"loss": 0.0238, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.5352295637130737, |
|
"learning_rate": 3.77287253372429e-05, |
|
"loss": 0.0274, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.0088888888888885, |
|
"grad_norm": 0.14990390837192535, |
|
"learning_rate": 3.758629196864976e-05, |
|
"loss": 0.0185, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 6.017777777777778, |
|
"grad_norm": 0.2006434053182602, |
|
"learning_rate": 3.7443965872788414e-05, |
|
"loss": 0.0263, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 6.026666666666666, |
|
"grad_norm": 0.19517108798027039, |
|
"learning_rate": 3.73017482795661e-05, |
|
"loss": 0.0262, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 6.035555555555556, |
|
"grad_norm": 0.2470463067293167, |
|
"learning_rate": 3.715964041795239e-05, |
|
"loss": 0.0296, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 6.044444444444444, |
|
"grad_norm": 0.15231075882911682, |
|
"learning_rate": 3.701764351596865e-05, |
|
"loss": 0.0212, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 6.053333333333334, |
|
"grad_norm": 0.25489985942840576, |
|
"learning_rate": 3.687575880067737e-05, |
|
"loss": 0.021, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 6.062222222222222, |
|
"grad_norm": 0.22227603197097778, |
|
"learning_rate": 3.673398749817159e-05, |
|
"loss": 0.022, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 6.071111111111111, |
|
"grad_norm": 0.1832754909992218, |
|
"learning_rate": 3.659233083356433e-05, |
|
"loss": 0.0241, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.3198046386241913, |
|
"learning_rate": 3.645079003097788e-05, |
|
"loss": 0.0252, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 6.088888888888889, |
|
"grad_norm": 0.3462732136249542, |
|
"learning_rate": 3.630936631353341e-05, |
|
"loss": 0.0287, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 6.097777777777778, |
|
"grad_norm": 0.20065750181674957, |
|
"learning_rate": 3.616806090334023e-05, |
|
"loss": 0.0217, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 6.1066666666666665, |
|
"grad_norm": 0.24152086675167084, |
|
"learning_rate": 3.6026875021485354e-05, |
|
"loss": 0.0258, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 6.115555555555556, |
|
"grad_norm": 0.33517709374427795, |
|
"learning_rate": 3.588580988802284e-05, |
|
"loss": 0.0213, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 6.124444444444444, |
|
"grad_norm": 0.23623313009738922, |
|
"learning_rate": 3.5744866721963373e-05, |
|
"loss": 0.022, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 6.133333333333334, |
|
"grad_norm": 0.28897619247436523, |
|
"learning_rate": 3.560404674126358e-05, |
|
"loss": 0.0217, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.142222222222222, |
|
"grad_norm": 0.42943817377090454, |
|
"learning_rate": 3.546335116281565e-05, |
|
"loss": 0.0263, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 6.151111111111111, |
|
"grad_norm": 0.2889060974121094, |
|
"learning_rate": 3.5322781202436715e-05, |
|
"loss": 0.028, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"grad_norm": 0.1770927757024765, |
|
"learning_rate": 3.5182338074858415e-05, |
|
"loss": 0.0224, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 6.168888888888889, |
|
"grad_norm": 0.2616649270057678, |
|
"learning_rate": 3.504202299371631e-05, |
|
"loss": 0.0295, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 6.177777777777778, |
|
"grad_norm": 0.2130998969078064, |
|
"learning_rate": 3.490183717153951e-05, |
|
"loss": 0.0217, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 6.1866666666666665, |
|
"grad_norm": 0.30380457639694214, |
|
"learning_rate": 3.4761781819740114e-05, |
|
"loss": 0.0228, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 6.195555555555556, |
|
"grad_norm": 0.19739991426467896, |
|
"learning_rate": 3.462185814860277e-05, |
|
"loss": 0.0253, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 6.204444444444444, |
|
"grad_norm": 0.23829267919063568, |
|
"learning_rate": 3.4482067367274194e-05, |
|
"loss": 0.0248, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 6.213333333333333, |
|
"grad_norm": 0.24205361306667328, |
|
"learning_rate": 3.4342410683752756e-05, |
|
"loss": 0.0213, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"grad_norm": 0.19280356168746948, |
|
"learning_rate": 3.420288930487802e-05, |
|
"loss": 0.0262, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.231111111111111, |
|
"grad_norm": 0.25751227140426636, |
|
"learning_rate": 3.4063504436320306e-05, |
|
"loss": 0.0198, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 0.26085150241851807, |
|
"learning_rate": 3.392425728257029e-05, |
|
"loss": 0.024, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 6.248888888888889, |
|
"grad_norm": 0.27481091022491455, |
|
"learning_rate": 3.378514904692861e-05, |
|
"loss": 0.0189, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 6.257777777777778, |
|
"grad_norm": 0.1653834879398346, |
|
"learning_rate": 3.364618093149543e-05, |
|
"loss": 0.0225, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 6.266666666666667, |
|
"grad_norm": 0.2331220954656601, |
|
"learning_rate": 3.350735413716005e-05, |
|
"loss": 0.0216, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 6.275555555555556, |
|
"grad_norm": 0.21019650995731354, |
|
"learning_rate": 3.3368669863590584e-05, |
|
"loss": 0.0222, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 6.2844444444444445, |
|
"grad_norm": 0.1742238998413086, |
|
"learning_rate": 3.3230129309223524e-05, |
|
"loss": 0.0195, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 6.293333333333333, |
|
"grad_norm": 0.15180321037769318, |
|
"learning_rate": 3.309173367125344e-05, |
|
"loss": 0.0174, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 6.302222222222222, |
|
"grad_norm": 0.23088690638542175, |
|
"learning_rate": 3.2953484145622594e-05, |
|
"loss": 0.0197, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 6.311111111111111, |
|
"grad_norm": 0.16930706799030304, |
|
"learning_rate": 3.2815381927010644e-05, |
|
"loss": 0.0193, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 0.2832207679748535, |
|
"learning_rate": 3.2677428208824253e-05, |
|
"loss": 0.0267, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.328888888888889, |
|
"grad_norm": 0.4588395655155182, |
|
"learning_rate": 3.25396241831869e-05, |
|
"loss": 0.0217, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 6.337777777777778, |
|
"grad_norm": 0.16503450274467468, |
|
"learning_rate": 3.2401971040928395e-05, |
|
"loss": 0.0222, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 6.346666666666667, |
|
"grad_norm": 0.19788210093975067, |
|
"learning_rate": 3.226446997157481e-05, |
|
"loss": 0.0233, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 6.355555555555555, |
|
"grad_norm": 0.20312805473804474, |
|
"learning_rate": 3.212712216333796e-05, |
|
"loss": 0.0207, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 6.364444444444445, |
|
"grad_norm": 0.2645591199398041, |
|
"learning_rate": 3.1989928803105385e-05, |
|
"loss": 0.0251, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 6.373333333333333, |
|
"grad_norm": 0.20243127644062042, |
|
"learning_rate": 3.1852891076429846e-05, |
|
"loss": 0.0195, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 6.3822222222222225, |
|
"grad_norm": 0.16191408038139343, |
|
"learning_rate": 3.1716010167519305e-05, |
|
"loss": 0.016, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 6.391111111111111, |
|
"grad_norm": 0.28725966811180115, |
|
"learning_rate": 3.15792872592265e-05, |
|
"loss": 0.0207, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 0.19381630420684814, |
|
"learning_rate": 3.144272353303889e-05, |
|
"loss": 0.0199, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.408888888888889, |
|
"grad_norm": 0.2130502164363861, |
|
"learning_rate": 3.130632016906828e-05, |
|
"loss": 0.0256, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 6.417777777777777, |
|
"grad_norm": 0.23538649082183838, |
|
"learning_rate": 3.117007834604082e-05, |
|
"loss": 0.0217, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 6.426666666666667, |
|
"grad_norm": 0.32628190517425537, |
|
"learning_rate": 3.103399924128656e-05, |
|
"loss": 0.0187, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 6.435555555555555, |
|
"grad_norm": 0.27763476967811584, |
|
"learning_rate": 3.08980840307296e-05, |
|
"loss": 0.0191, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 6.444444444444445, |
|
"grad_norm": 0.25328052043914795, |
|
"learning_rate": 3.076233388887758e-05, |
|
"loss": 0.0212, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.453333333333333, |
|
"grad_norm": 0.15242406725883484, |
|
"learning_rate": 3.062674998881183e-05, |
|
"loss": 0.0222, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 6.4622222222222225, |
|
"grad_norm": 0.21338866651058197, |
|
"learning_rate": 3.049133350217706e-05, |
|
"loss": 0.0226, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 6.471111111111111, |
|
"grad_norm": 0.3376260995864868, |
|
"learning_rate": 3.0356085599171264e-05, |
|
"loss": 0.02, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 0.29789280891418457, |
|
"learning_rate": 3.0221007448535655e-05, |
|
"loss": 0.0166, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 6.488888888888889, |
|
"grad_norm": 0.3358229696750641, |
|
"learning_rate": 3.0086100217544512e-05, |
|
"loss": 0.0238, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 6.497777777777777, |
|
"grad_norm": 0.22155895829200745, |
|
"learning_rate": 2.9951365071995148e-05, |
|
"loss": 0.0211, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 6.506666666666667, |
|
"grad_norm": 0.2416805624961853, |
|
"learning_rate": 2.981680317619775e-05, |
|
"loss": 0.0234, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 6.515555555555555, |
|
"grad_norm": 1.5581576824188232, |
|
"learning_rate": 2.9682415692965437e-05, |
|
"loss": 0.0227, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 6.524444444444445, |
|
"grad_norm": 0.3416999280452728, |
|
"learning_rate": 2.954820378360409e-05, |
|
"loss": 0.0246, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 6.533333333333333, |
|
"grad_norm": 0.14920370280742645, |
|
"learning_rate": 2.941416860790242e-05, |
|
"loss": 0.0219, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 6.542222222222223, |
|
"grad_norm": 0.19279298186302185, |
|
"learning_rate": 2.9280311324121855e-05, |
|
"loss": 0.0239, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 6.551111111111111, |
|
"grad_norm": 0.12202126532793045, |
|
"learning_rate": 2.914663308898662e-05, |
|
"loss": 0.0197, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 6.5600000000000005, |
|
"grad_norm": 0.31409889459609985, |
|
"learning_rate": 2.9013135057673624e-05, |
|
"loss": 0.0231, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 6.568888888888889, |
|
"grad_norm": 0.29431045055389404, |
|
"learning_rate": 2.887981838380268e-05, |
|
"loss": 0.0201, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 6.5777777777777775, |
|
"grad_norm": 0.2436346411705017, |
|
"learning_rate": 2.8746684219426233e-05, |
|
"loss": 0.0211, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 6.586666666666667, |
|
"grad_norm": 0.27678927779197693, |
|
"learning_rate": 2.861373371501973e-05, |
|
"loss": 0.0263, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 6.595555555555555, |
|
"grad_norm": 0.27487096190452576, |
|
"learning_rate": 2.848096801947141e-05, |
|
"loss": 0.0226, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 6.604444444444445, |
|
"grad_norm": 0.25537124276161194, |
|
"learning_rate": 2.8348388280072625e-05, |
|
"loss": 0.0245, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 6.613333333333333, |
|
"grad_norm": 0.17786261439323425, |
|
"learning_rate": 2.8215995642507605e-05, |
|
"loss": 0.0214, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 6.622222222222222, |
|
"grad_norm": 0.2850896716117859, |
|
"learning_rate": 2.808379125084392e-05, |
|
"loss": 0.0273, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 6.631111111111111, |
|
"grad_norm": 0.2478863149881363, |
|
"learning_rate": 2.795177624752231e-05, |
|
"loss": 0.0202, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"grad_norm": 0.24125871062278748, |
|
"learning_rate": 2.7819951773346997e-05, |
|
"loss": 0.0198, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 6.648888888888889, |
|
"grad_norm": 0.35471686720848083, |
|
"learning_rate": 2.768831896747569e-05, |
|
"loss": 0.0246, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 6.657777777777778, |
|
"grad_norm": 0.21844857931137085, |
|
"learning_rate": 2.7556878967409794e-05, |
|
"loss": 0.0158, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.2778981029987335, |
|
"learning_rate": 2.7425632908984667e-05, |
|
"loss": 0.0195, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.6755555555555555, |
|
"grad_norm": 0.1651017665863037, |
|
"learning_rate": 2.729458192635966e-05, |
|
"loss": 0.0221, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 6.684444444444445, |
|
"grad_norm": 0.20991581678390503, |
|
"learning_rate": 2.716372715200838e-05, |
|
"loss": 0.019, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 6.693333333333333, |
|
"grad_norm": 0.1868899017572403, |
|
"learning_rate": 2.7033069716708908e-05, |
|
"loss": 0.0154, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 6.702222222222222, |
|
"grad_norm": 0.23849980533123016, |
|
"learning_rate": 2.6902610749534084e-05, |
|
"loss": 0.0225, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 6.711111111111111, |
|
"grad_norm": 0.23667502403259277, |
|
"learning_rate": 2.677235137784162e-05, |
|
"loss": 0.0233, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"grad_norm": 0.2226117104291916, |
|
"learning_rate": 2.664229272726445e-05, |
|
"loss": 0.0213, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 6.728888888888889, |
|
"grad_norm": 0.1580812931060791, |
|
"learning_rate": 2.6512435921700975e-05, |
|
"loss": 0.0216, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 6.737777777777778, |
|
"grad_norm": 0.1850135326385498, |
|
"learning_rate": 2.63827820833054e-05, |
|
"loss": 0.0228, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 6.746666666666667, |
|
"grad_norm": 0.25110775232315063, |
|
"learning_rate": 2.6253332332477954e-05, |
|
"loss": 0.0208, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 6.7555555555555555, |
|
"grad_norm": 0.24939677119255066, |
|
"learning_rate": 2.6124087787855245e-05, |
|
"loss": 0.0242, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 6.764444444444445, |
|
"grad_norm": 0.2066407948732376, |
|
"learning_rate": 2.59950495663006e-05, |
|
"loss": 0.0226, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 6.773333333333333, |
|
"grad_norm": 0.24095773696899414, |
|
"learning_rate": 2.586621878289446e-05, |
|
"loss": 0.0193, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 6.782222222222222, |
|
"grad_norm": 0.21902915835380554, |
|
"learning_rate": 2.573759655092462e-05, |
|
"loss": 0.0202, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 6.791111111111111, |
|
"grad_norm": 0.18483377993106842, |
|
"learning_rate": 2.560918398187674e-05, |
|
"loss": 0.0182, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 0.1789674460887909, |
|
"learning_rate": 2.548098218542462e-05, |
|
"loss": 0.0175, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 6.808888888888889, |
|
"grad_norm": 0.27864623069763184, |
|
"learning_rate": 2.535299226942077e-05, |
|
"loss": 0.0215, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 6.817777777777778, |
|
"grad_norm": 0.18592649698257446, |
|
"learning_rate": 2.5225215339886666e-05, |
|
"loss": 0.0196, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 6.826666666666666, |
|
"grad_norm": 0.21638546884059906, |
|
"learning_rate": 2.509765250100329e-05, |
|
"loss": 0.024, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 6.835555555555556, |
|
"grad_norm": 0.16733711957931519, |
|
"learning_rate": 2.4970304855101524e-05, |
|
"loss": 0.0161, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 6.844444444444444, |
|
"grad_norm": 0.16044211387634277, |
|
"learning_rate": 2.4843173502652745e-05, |
|
"loss": 0.0186, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 6.8533333333333335, |
|
"grad_norm": 0.20504949986934662, |
|
"learning_rate": 2.4716259542259224e-05, |
|
"loss": 0.017, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 6.862222222222222, |
|
"grad_norm": 0.28035327792167664, |
|
"learning_rate": 2.4589564070644544e-05, |
|
"loss": 0.0242, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 6.871111111111111, |
|
"grad_norm": 0.20129050314426422, |
|
"learning_rate": 2.4463088182644346e-05, |
|
"loss": 0.0209, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 0.16855022311210632, |
|
"learning_rate": 2.4336832971196656e-05, |
|
"loss": 0.0253, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"grad_norm": 0.23760779201984406, |
|
"learning_rate": 2.4210799527332602e-05, |
|
"loss": 0.0214, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 6.897777777777778, |
|
"grad_norm": 0.2209024429321289, |
|
"learning_rate": 2.4084988940166864e-05, |
|
"loss": 0.0165, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 6.906666666666666, |
|
"grad_norm": 0.31172770261764526, |
|
"learning_rate": 2.395940229688833e-05, |
|
"loss": 0.0215, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 6.915555555555556, |
|
"grad_norm": 0.22575290501117706, |
|
"learning_rate": 2.3834040682750648e-05, |
|
"loss": 0.0248, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 6.924444444444444, |
|
"grad_norm": 0.20021922886371613, |
|
"learning_rate": 2.370890518106296e-05, |
|
"loss": 0.026, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 6.933333333333334, |
|
"grad_norm": 0.27846986055374146, |
|
"learning_rate": 2.3583996873180386e-05, |
|
"loss": 0.0319, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 6.942222222222222, |
|
"grad_norm": 0.20904995501041412, |
|
"learning_rate": 2.34593168384948e-05, |
|
"loss": 0.0193, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 6.9511111111111115, |
|
"grad_norm": 0.19309522211551666, |
|
"learning_rate": 2.3334866154425415e-05, |
|
"loss": 0.02, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"grad_norm": 0.18167579174041748, |
|
"learning_rate": 2.32106458964096e-05, |
|
"loss": 0.0216, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 6.968888888888889, |
|
"grad_norm": 0.22423510253429413, |
|
"learning_rate": 2.308665713789342e-05, |
|
"loss": 0.0218, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 6.977777777777778, |
|
"grad_norm": 0.33304089307785034, |
|
"learning_rate": 2.2962900950322476e-05, |
|
"loss": 0.0196, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 6.986666666666666, |
|
"grad_norm": 0.23054029047489166, |
|
"learning_rate": 2.2839378403132595e-05, |
|
"loss": 0.0242, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 6.995555555555556, |
|
"grad_norm": 0.32826805114746094, |
|
"learning_rate": 2.271609056374066e-05, |
|
"loss": 0.0209, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 7.004444444444444, |
|
"grad_norm": 0.22956818342208862, |
|
"learning_rate": 2.2593038497535274e-05, |
|
"loss": 0.0175, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 7.013333333333334, |
|
"grad_norm": 0.17206411063671112, |
|
"learning_rate": 2.247022326786764e-05, |
|
"loss": 0.0195, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 7.022222222222222, |
|
"grad_norm": 0.2022615671157837, |
|
"learning_rate": 2.2347645936042323e-05, |
|
"loss": 0.0187, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 7.0311111111111115, |
|
"grad_norm": 0.19283053278923035, |
|
"learning_rate": 2.222530756130814e-05, |
|
"loss": 0.0202, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.2714560925960541, |
|
"learning_rate": 2.210320920084893e-05, |
|
"loss": 0.0236, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 7.0488888888888885, |
|
"grad_norm": 0.22155192494392395, |
|
"learning_rate": 2.198135190977445e-05, |
|
"loss": 0.0167, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 7.057777777777778, |
|
"grad_norm": 0.18083541095256805, |
|
"learning_rate": 2.1859736741111313e-05, |
|
"loss": 0.0226, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 7.066666666666666, |
|
"grad_norm": 0.23374620079994202, |
|
"learning_rate": 2.1738364745793794e-05, |
|
"loss": 0.019, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 7.075555555555556, |
|
"grad_norm": 0.19303861260414124, |
|
"learning_rate": 2.1617236972654786e-05, |
|
"loss": 0.0195, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 7.084444444444444, |
|
"grad_norm": 0.3223839998245239, |
|
"learning_rate": 2.1496354468416752e-05, |
|
"loss": 0.0203, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 7.093333333333334, |
|
"grad_norm": 0.14055588841438293, |
|
"learning_rate": 2.137571827768271e-05, |
|
"loss": 0.0194, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 7.102222222222222, |
|
"grad_norm": 0.3420756161212921, |
|
"learning_rate": 2.1255329442927113e-05, |
|
"loss": 0.0259, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 0.15780265629291534, |
|
"learning_rate": 2.1135189004486917e-05, |
|
"loss": 0.0189, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 0.16016796231269836, |
|
"learning_rate": 2.101529800055254e-05, |
|
"loss": 0.0164, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 7.128888888888889, |
|
"grad_norm": 0.17864546179771423, |
|
"learning_rate": 2.0895657467158985e-05, |
|
"loss": 0.0181, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 7.137777777777778, |
|
"grad_norm": 0.3572377562522888, |
|
"learning_rate": 2.0776268438176742e-05, |
|
"loss": 0.0187, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 7.1466666666666665, |
|
"grad_norm": 0.18679283559322357, |
|
"learning_rate": 2.0657131945303017e-05, |
|
"loss": 0.0197, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 7.155555555555556, |
|
"grad_norm": 0.20286810398101807, |
|
"learning_rate": 2.0538249018052613e-05, |
|
"loss": 0.0205, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 7.164444444444444, |
|
"grad_norm": 0.28327563405036926, |
|
"learning_rate": 2.0419620683749276e-05, |
|
"loss": 0.0236, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 7.173333333333334, |
|
"grad_norm": 0.2182239592075348, |
|
"learning_rate": 2.0301247967516608e-05, |
|
"loss": 0.0212, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 7.182222222222222, |
|
"grad_norm": 0.31717169284820557, |
|
"learning_rate": 2.0183131892269373e-05, |
|
"loss": 0.0207, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 7.191111111111111, |
|
"grad_norm": 0.20741143822669983, |
|
"learning_rate": 2.0065273478704467e-05, |
|
"loss": 0.0156, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 0.2119719386100769, |
|
"learning_rate": 1.994767374529232e-05, |
|
"loss": 0.0165, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 7.208888888888889, |
|
"grad_norm": 0.18549510836601257, |
|
"learning_rate": 1.9830333708267916e-05, |
|
"loss": 0.019, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 7.217777777777778, |
|
"grad_norm": 0.2186800241470337, |
|
"learning_rate": 1.9713254381622143e-05, |
|
"loss": 0.0162, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 7.226666666666667, |
|
"grad_norm": 0.18081362545490265, |
|
"learning_rate": 1.9596436777092864e-05, |
|
"loss": 0.0173, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 7.235555555555556, |
|
"grad_norm": 0.17753848433494568, |
|
"learning_rate": 1.9479881904156366e-05, |
|
"loss": 0.0234, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 7.2444444444444445, |
|
"grad_norm": 0.34621545672416687, |
|
"learning_rate": 1.9363590770018548e-05, |
|
"loss": 0.0195, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 7.253333333333333, |
|
"grad_norm": 0.2383035272359848, |
|
"learning_rate": 1.9247564379606164e-05, |
|
"loss": 0.0275, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 7.262222222222222, |
|
"grad_norm": 0.19915683567523956, |
|
"learning_rate": 1.9131803735558224e-05, |
|
"loss": 0.0198, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 7.271111111111111, |
|
"grad_norm": 0.18759453296661377, |
|
"learning_rate": 1.9016309838217273e-05, |
|
"loss": 0.0173, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 0.1518639326095581, |
|
"learning_rate": 1.8901083685620825e-05, |
|
"loss": 0.0221, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 7.288888888888889, |
|
"grad_norm": 0.20098768174648285, |
|
"learning_rate": 1.878612627349263e-05, |
|
"loss": 0.0211, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 7.297777777777778, |
|
"grad_norm": 0.31528663635253906, |
|
"learning_rate": 1.8671438595234155e-05, |
|
"loss": 0.0259, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 7.306666666666667, |
|
"grad_norm": 0.23191457986831665, |
|
"learning_rate": 1.855702164191593e-05, |
|
"loss": 0.0164, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 7.315555555555555, |
|
"grad_norm": 0.20536059141159058, |
|
"learning_rate": 1.844287640226909e-05, |
|
"loss": 0.017, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 7.3244444444444445, |
|
"grad_norm": 0.16823317110538483, |
|
"learning_rate": 1.8329003862676702e-05, |
|
"loss": 0.0226, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 7.333333333333333, |
|
"grad_norm": 0.2212604582309723, |
|
"learning_rate": 1.821540500716534e-05, |
|
"loss": 0.0196, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.342222222222222, |
|
"grad_norm": 0.2062293291091919, |
|
"learning_rate": 1.8102080817396487e-05, |
|
"loss": 0.0192, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 7.351111111111111, |
|
"grad_norm": 0.1987256109714508, |
|
"learning_rate": 1.7989032272658213e-05, |
|
"loss": 0.0224, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"grad_norm": 0.2974061071872711, |
|
"learning_rate": 1.7876260349856517e-05, |
|
"loss": 0.0207, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 7.368888888888889, |
|
"grad_norm": 0.22006143629550934, |
|
"learning_rate": 1.7763766023507002e-05, |
|
"loss": 0.0209, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 7.377777777777778, |
|
"grad_norm": 0.29517602920532227, |
|
"learning_rate": 1.7651550265726425e-05, |
|
"loss": 0.0168, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 7.386666666666667, |
|
"grad_norm": 0.2768726944923401, |
|
"learning_rate": 1.753961404622434e-05, |
|
"loss": 0.0199, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 7.395555555555555, |
|
"grad_norm": 0.355312705039978, |
|
"learning_rate": 1.7427958332294625e-05, |
|
"loss": 0.0168, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 7.404444444444445, |
|
"grad_norm": 0.16905713081359863, |
|
"learning_rate": 1.731658408880721e-05, |
|
"loss": 0.017, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 7.413333333333333, |
|
"grad_norm": 0.19279661774635315, |
|
"learning_rate": 1.7205492278199665e-05, |
|
"loss": 0.0173, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 7.4222222222222225, |
|
"grad_norm": 0.1918463557958603, |
|
"learning_rate": 1.709468386046899e-05, |
|
"loss": 0.0208, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 7.431111111111111, |
|
"grad_norm": 0.1732010543346405, |
|
"learning_rate": 1.6984159793163207e-05, |
|
"loss": 0.0245, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"grad_norm": 0.23559196293354034, |
|
"learning_rate": 1.6873921031373113e-05, |
|
"loss": 0.0222, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 7.448888888888889, |
|
"grad_norm": 0.1514008343219757, |
|
"learning_rate": 1.6763968527724105e-05, |
|
"loss": 0.0196, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 7.457777777777777, |
|
"grad_norm": 0.23101367056369781, |
|
"learning_rate": 1.6654303232367828e-05, |
|
"loss": 0.0171, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 7.466666666666667, |
|
"grad_norm": 0.19606225192546844, |
|
"learning_rate": 1.6544926092974093e-05, |
|
"loss": 0.0189, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 7.475555555555555, |
|
"grad_norm": 0.215582937002182, |
|
"learning_rate": 1.643583805472252e-05, |
|
"loss": 0.0187, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 7.484444444444445, |
|
"grad_norm": 0.2050636112689972, |
|
"learning_rate": 1.6327040060294564e-05, |
|
"loss": 0.0184, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 7.493333333333333, |
|
"grad_norm": 0.2433505803346634, |
|
"learning_rate": 1.6218533049865203e-05, |
|
"loss": 0.0204, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 7.502222222222223, |
|
"grad_norm": 0.15933063626289368, |
|
"learning_rate": 1.6110317961094977e-05, |
|
"loss": 0.017, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 7.511111111111111, |
|
"grad_norm": 0.2794565260410309, |
|
"learning_rate": 1.6002395729121655e-05, |
|
"loss": 0.0198, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"grad_norm": 0.21836277842521667, |
|
"learning_rate": 1.5894767286552424e-05, |
|
"loss": 0.0228, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 7.528888888888889, |
|
"grad_norm": 0.247945636510849, |
|
"learning_rate": 1.5787433563455618e-05, |
|
"loss": 0.0223, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 7.5377777777777775, |
|
"grad_norm": 0.2172486037015915, |
|
"learning_rate": 1.56803954873528e-05, |
|
"loss": 0.021, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 7.546666666666667, |
|
"grad_norm": 0.32153230905532837, |
|
"learning_rate": 1.5573653983210683e-05, |
|
"loss": 0.0187, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 7.555555555555555, |
|
"grad_norm": 0.24206095933914185, |
|
"learning_rate": 1.5467209973433178e-05, |
|
"loss": 0.0191, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.564444444444445, |
|
"grad_norm": 0.16280461847782135, |
|
"learning_rate": 1.536106437785338e-05, |
|
"loss": 0.0191, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 7.573333333333333, |
|
"grad_norm": 0.19574402272701263, |
|
"learning_rate": 1.525521811372569e-05, |
|
"loss": 0.0226, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 7.582222222222223, |
|
"grad_norm": 0.23104345798492432, |
|
"learning_rate": 1.5149672095717805e-05, |
|
"loss": 0.0205, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 7.591111111111111, |
|
"grad_norm": 0.16590049862861633, |
|
"learning_rate": 1.5044427235902869e-05, |
|
"loss": 0.0178, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 0.18938252329826355, |
|
"learning_rate": 1.4939484443751557e-05, |
|
"loss": 0.0158, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 7.608888888888889, |
|
"grad_norm": 0.12492207437753677, |
|
"learning_rate": 1.4834844626124284e-05, |
|
"loss": 0.0212, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 7.6177777777777775, |
|
"grad_norm": 0.2874824106693268, |
|
"learning_rate": 1.4730508687263261e-05, |
|
"loss": 0.021, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 7.626666666666667, |
|
"grad_norm": 0.3031473457813263, |
|
"learning_rate": 1.4626477528784766e-05, |
|
"loss": 0.0158, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 7.635555555555555, |
|
"grad_norm": 0.16178911924362183, |
|
"learning_rate": 1.4522752049671313e-05, |
|
"loss": 0.019, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 7.644444444444445, |
|
"grad_norm": 0.16021935641765594, |
|
"learning_rate": 1.4419333146263924e-05, |
|
"loss": 0.017, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 7.653333333333333, |
|
"grad_norm": 0.20860594511032104, |
|
"learning_rate": 1.4316221712254335e-05, |
|
"loss": 0.0179, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 7.662222222222223, |
|
"grad_norm": 0.16072997450828552, |
|
"learning_rate": 1.4213418638677261e-05, |
|
"loss": 0.0178, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 7.671111111111111, |
|
"grad_norm": 0.15961474180221558, |
|
"learning_rate": 1.4110924813902803e-05, |
|
"loss": 0.0171, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"grad_norm": 0.24492867290973663, |
|
"learning_rate": 1.4008741123628639e-05, |
|
"loss": 0.0165, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 7.688888888888889, |
|
"grad_norm": 0.18049024045467377, |
|
"learning_rate": 1.3906868450872446e-05, |
|
"loss": 0.0232, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 7.697777777777778, |
|
"grad_norm": 0.22683820128440857, |
|
"learning_rate": 1.3805307675964224e-05, |
|
"loss": 0.0172, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 7.706666666666667, |
|
"grad_norm": 0.22296828031539917, |
|
"learning_rate": 1.3704059676538777e-05, |
|
"loss": 0.0207, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 7.7155555555555555, |
|
"grad_norm": 0.18780431151390076, |
|
"learning_rate": 1.3603125327528022e-05, |
|
"loss": 0.018, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 7.724444444444444, |
|
"grad_norm": 0.20270521938800812, |
|
"learning_rate": 1.3502505501153472e-05, |
|
"loss": 0.0222, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 7.733333333333333, |
|
"grad_norm": 0.2765953838825226, |
|
"learning_rate": 1.3402201066918713e-05, |
|
"loss": 0.0187, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 7.742222222222222, |
|
"grad_norm": 0.20723624527454376, |
|
"learning_rate": 1.3302212891601895e-05, |
|
"loss": 0.0207, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 7.751111111111111, |
|
"grad_norm": 0.18875935673713684, |
|
"learning_rate": 1.3202541839248201e-05, |
|
"loss": 0.0157, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 0.29388731718063354, |
|
"learning_rate": 1.3103188771162406e-05, |
|
"loss": 0.0162, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 7.768888888888889, |
|
"grad_norm": 0.25692036747932434, |
|
"learning_rate": 1.300415454590143e-05, |
|
"loss": 0.0166, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 0.25091516971588135, |
|
"learning_rate": 1.2905440019266951e-05, |
|
"loss": 0.0193, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 7.786666666666667, |
|
"grad_norm": 0.20781750977039337, |
|
"learning_rate": 1.2807046044297926e-05, |
|
"loss": 0.0209, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 7.795555555555556, |
|
"grad_norm": 0.20112888514995575, |
|
"learning_rate": 1.2708973471263358e-05, |
|
"loss": 0.0181, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 7.804444444444444, |
|
"grad_norm": 0.157766193151474, |
|
"learning_rate": 1.261122314765475e-05, |
|
"loss": 0.0139, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 7.8133333333333335, |
|
"grad_norm": 0.2164987176656723, |
|
"learning_rate": 1.2513795918178988e-05, |
|
"loss": 0.0181, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 7.822222222222222, |
|
"grad_norm": 0.2092042863368988, |
|
"learning_rate": 1.2416692624750898e-05, |
|
"loss": 0.0204, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 7.831111111111111, |
|
"grad_norm": 0.2696256935596466, |
|
"learning_rate": 1.2319914106486064e-05, |
|
"loss": 0.018, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.21180713176727295, |
|
"learning_rate": 1.2223461199693492e-05, |
|
"loss": 0.0213, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 7.848888888888889, |
|
"grad_norm": 0.15319228172302246, |
|
"learning_rate": 1.2127334737868428e-05, |
|
"loss": 0.0152, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 7.857777777777778, |
|
"grad_norm": 0.2841119170188904, |
|
"learning_rate": 1.2031535551685202e-05, |
|
"loss": 0.0147, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 7.866666666666667, |
|
"grad_norm": 0.2753544747829437, |
|
"learning_rate": 1.1936064468989943e-05, |
|
"loss": 0.0257, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 7.875555555555556, |
|
"grad_norm": 0.13984255492687225, |
|
"learning_rate": 1.1840922314793512e-05, |
|
"loss": 0.019, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 7.884444444444444, |
|
"grad_norm": 0.283313125371933, |
|
"learning_rate": 1.1746109911264308e-05, |
|
"loss": 0.019, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 7.8933333333333335, |
|
"grad_norm": 0.2119409292936325, |
|
"learning_rate": 1.165162807772126e-05, |
|
"loss": 0.0196, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 7.902222222222222, |
|
"grad_norm": 0.25287100672721863, |
|
"learning_rate": 1.155747763062664e-05, |
|
"loss": 0.0189, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 7.911111111111111, |
|
"grad_norm": 0.2570377290248871, |
|
"learning_rate": 1.1463659383579056e-05, |
|
"loss": 0.0162, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 0.20245495438575745, |
|
"learning_rate": 1.1370174147306411e-05, |
|
"loss": 0.022, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 7.928888888888888, |
|
"grad_norm": 0.21050065755844116, |
|
"learning_rate": 1.1277022729658954e-05, |
|
"loss": 0.0182, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 7.937777777777778, |
|
"grad_norm": 0.21160998940467834, |
|
"learning_rate": 1.1184205935602193e-05, |
|
"loss": 0.0221, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 7.946666666666666, |
|
"grad_norm": 0.20674405992031097, |
|
"learning_rate": 1.1091724567210016e-05, |
|
"loss": 0.0181, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 7.955555555555556, |
|
"grad_norm": 0.15841062366962433, |
|
"learning_rate": 1.0999579423657725e-05, |
|
"loss": 0.0154, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 7.964444444444444, |
|
"grad_norm": 0.131067156791687, |
|
"learning_rate": 1.090777130121518e-05, |
|
"loss": 0.0165, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 7.973333333333334, |
|
"grad_norm": 0.17673835158348083, |
|
"learning_rate": 1.0816300993239853e-05, |
|
"loss": 0.0172, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 7.982222222222222, |
|
"grad_norm": 0.19889695942401886, |
|
"learning_rate": 1.072516929017e-05, |
|
"loss": 0.0206, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 7.9911111111111115, |
|
"grad_norm": 0.17969636619091034, |
|
"learning_rate": 1.0634376979517835e-05, |
|
"loss": 0.0193, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.24688442051410675, |
|
"learning_rate": 1.0543924845862746e-05, |
|
"loss": 0.0178, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.008888888888889, |
|
"grad_norm": 0.2032891809940338, |
|
"learning_rate": 1.0453813670844448e-05, |
|
"loss": 0.023, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 8.017777777777777, |
|
"grad_norm": 0.1844429224729538, |
|
"learning_rate": 1.0364044233156306e-05, |
|
"loss": 0.0146, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 8.026666666666667, |
|
"grad_norm": 0.24897849559783936, |
|
"learning_rate": 1.0274617308538537e-05, |
|
"loss": 0.016, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 8.035555555555556, |
|
"grad_norm": 0.22154377400875092, |
|
"learning_rate": 1.018553366977158e-05, |
|
"loss": 0.0169, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 8.044444444444444, |
|
"grad_norm": 0.2237059324979782, |
|
"learning_rate": 1.0096794086669375e-05, |
|
"loss": 0.0207, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 8.053333333333333, |
|
"grad_norm": 0.17183353006839752, |
|
"learning_rate": 1.0008399326072648e-05, |
|
"loss": 0.0164, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 8.062222222222223, |
|
"grad_norm": 0.23695304989814758, |
|
"learning_rate": 9.92035015184245e-06, |
|
"loss": 0.0226, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 8.071111111111112, |
|
"grad_norm": 0.2032201886177063, |
|
"learning_rate": 9.832647324853366e-06, |
|
"loss": 0.0175, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.18037071824073792, |
|
"learning_rate": 9.745291602987122e-06, |
|
"loss": 0.0195, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 8.088888888888889, |
|
"grad_norm": 0.2386200726032257, |
|
"learning_rate": 9.658283741125835e-06, |
|
"loss": 0.0162, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 8.097777777777777, |
|
"grad_norm": 0.17658579349517822, |
|
"learning_rate": 9.571624491145697e-06, |
|
"loss": 0.0162, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 8.106666666666667, |
|
"grad_norm": 0.28618794679641724, |
|
"learning_rate": 9.485314601910312e-06, |
|
"loss": 0.0155, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 8.115555555555556, |
|
"grad_norm": 0.24037206172943115, |
|
"learning_rate": 9.399354819264367e-06, |
|
"loss": 0.0156, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 8.124444444444444, |
|
"grad_norm": 0.1906750351190567, |
|
"learning_rate": 9.313745886027003e-06, |
|
"loss": 0.0188, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 8.133333333333333, |
|
"grad_norm": 0.14933055639266968, |
|
"learning_rate": 9.228488541985614e-06, |
|
"loss": 0.0184, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 8.142222222222221, |
|
"grad_norm": 0.14800281822681427, |
|
"learning_rate": 9.143583523889272e-06, |
|
"loss": 0.0196, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 8.151111111111112, |
|
"grad_norm": 0.19895856082439423, |
|
"learning_rate": 9.059031565442494e-06, |
|
"loss": 0.0188, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.1898520141839981, |
|
"learning_rate": 8.97483339729876e-06, |
|
"loss": 0.0152, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 8.168888888888889, |
|
"grad_norm": 0.15310876071453094, |
|
"learning_rate": 8.890989747054362e-06, |
|
"loss": 0.0157, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 8.177777777777777, |
|
"grad_norm": 0.37408533692359924, |
|
"learning_rate": 8.80750133924198e-06, |
|
"loss": 0.0161, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 8.186666666666667, |
|
"grad_norm": 0.31413111090660095, |
|
"learning_rate": 8.724368895324525e-06, |
|
"loss": 0.0144, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 8.195555555555556, |
|
"grad_norm": 0.24376501142978668, |
|
"learning_rate": 8.641593133688802e-06, |
|
"loss": 0.0182, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 8.204444444444444, |
|
"grad_norm": 0.26056551933288574, |
|
"learning_rate": 8.559174769639394e-06, |
|
"loss": 0.0181, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 8.213333333333333, |
|
"grad_norm": 0.15622368454933167, |
|
"learning_rate": 8.477114515392403e-06, |
|
"loss": 0.016, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 8.222222222222221, |
|
"grad_norm": 0.139377161860466, |
|
"learning_rate": 8.395413080069398e-06, |
|
"loss": 0.0183, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.231111111111112, |
|
"grad_norm": 0.34442803263664246, |
|
"learning_rate": 8.314071169691168e-06, |
|
"loss": 0.019, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 0.17998872697353363, |
|
"learning_rate": 8.23308948717168e-06, |
|
"loss": 0.0208, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 8.248888888888889, |
|
"grad_norm": 0.2519272565841675, |
|
"learning_rate": 8.152468732312041e-06, |
|
"loss": 0.0224, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 8.257777777777777, |
|
"grad_norm": 0.23206669092178345, |
|
"learning_rate": 8.072209601794373e-06, |
|
"loss": 0.0208, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 8.266666666666667, |
|
"grad_norm": 0.2630186378955841, |
|
"learning_rate": 7.99231278917585e-06, |
|
"loss": 0.021, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 8.275555555555556, |
|
"grad_norm": 0.20980530977249146, |
|
"learning_rate": 7.912778984882656e-06, |
|
"loss": 0.0136, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 8.284444444444444, |
|
"grad_norm": 0.3153038024902344, |
|
"learning_rate": 7.833608876204107e-06, |
|
"loss": 0.0154, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 8.293333333333333, |
|
"grad_norm": 0.21069422364234924, |
|
"learning_rate": 7.754803147286594e-06, |
|
"loss": 0.0149, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 8.302222222222222, |
|
"grad_norm": 0.19590310752391815, |
|
"learning_rate": 7.676362479127764e-06, |
|
"loss": 0.0196, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 8.311111111111112, |
|
"grad_norm": 0.17674367129802704, |
|
"learning_rate": 7.598287549570565e-06, |
|
"loss": 0.0184, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"grad_norm": 0.30226150155067444, |
|
"learning_rate": 7.5205790332974866e-06, |
|
"loss": 0.0188, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 8.328888888888889, |
|
"grad_norm": 0.21695910394191742, |
|
"learning_rate": 7.443237601824615e-06, |
|
"loss": 0.0139, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 8.337777777777777, |
|
"grad_norm": 0.23410043120384216, |
|
"learning_rate": 7.366263923495892e-06, |
|
"loss": 0.0216, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 8.346666666666668, |
|
"grad_norm": 0.2212994247674942, |
|
"learning_rate": 7.289658663477328e-06, |
|
"loss": 0.0204, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 8.355555555555556, |
|
"grad_norm": 0.3144269287586212, |
|
"learning_rate": 7.213422483751292e-06, |
|
"loss": 0.0168, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 8.364444444444445, |
|
"grad_norm": 0.27913928031921387, |
|
"learning_rate": 7.1375560431107106e-06, |
|
"loss": 0.0193, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 8.373333333333333, |
|
"grad_norm": 0.22302371263504028, |
|
"learning_rate": 7.062059997153442e-06, |
|
"loss": 0.0172, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 8.382222222222222, |
|
"grad_norm": 0.21350453794002533, |
|
"learning_rate": 6.986934998276573e-06, |
|
"loss": 0.0249, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 8.391111111111112, |
|
"grad_norm": 0.20942294597625732, |
|
"learning_rate": 6.912181695670822e-06, |
|
"loss": 0.0214, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.3182884156703949, |
|
"learning_rate": 6.837800735314892e-06, |
|
"loss": 0.0193, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 8.408888888888889, |
|
"grad_norm": 0.2200809121131897, |
|
"learning_rate": 6.763792759969883e-06, |
|
"loss": 0.0224, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 8.417777777777777, |
|
"grad_norm": 0.23308202624320984, |
|
"learning_rate": 6.690158409173769e-06, |
|
"loss": 0.0179, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 8.426666666666666, |
|
"grad_norm": 0.13453629612922668, |
|
"learning_rate": 6.616898319235859e-06, |
|
"loss": 0.0185, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 8.435555555555556, |
|
"grad_norm": 0.1553480327129364, |
|
"learning_rate": 6.5440131232313186e-06, |
|
"loss": 0.0192, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 8.444444444444445, |
|
"grad_norm": 0.23109425604343414, |
|
"learning_rate": 6.471503450995637e-06, |
|
"loss": 0.019, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.453333333333333, |
|
"grad_norm": 0.11503564566373825, |
|
"learning_rate": 6.3993699291192366e-06, |
|
"loss": 0.0207, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 8.462222222222222, |
|
"grad_norm": 0.2218429446220398, |
|
"learning_rate": 6.327613180942049e-06, |
|
"loss": 0.0188, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 8.471111111111112, |
|
"grad_norm": 0.23887720704078674, |
|
"learning_rate": 6.256233826548147e-06, |
|
"loss": 0.0152, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"grad_norm": 0.17835436761379242, |
|
"learning_rate": 6.1852324827603395e-06, |
|
"loss": 0.014, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 8.488888888888889, |
|
"grad_norm": 0.23262593150138855, |
|
"learning_rate": 6.11460976313486e-06, |
|
"loss": 0.0171, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 8.497777777777777, |
|
"grad_norm": 0.24947932362556458, |
|
"learning_rate": 6.044366277956087e-06, |
|
"loss": 0.0198, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 8.506666666666666, |
|
"grad_norm": 0.18168048560619354, |
|
"learning_rate": 5.9745026342312715e-06, |
|
"loss": 0.0136, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 8.515555555555556, |
|
"grad_norm": 0.17935986816883087, |
|
"learning_rate": 5.905019435685238e-06, |
|
"loss": 0.0177, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 8.524444444444445, |
|
"grad_norm": 0.19676154851913452, |
|
"learning_rate": 5.8359172827552325e-06, |
|
"loss": 0.0141, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 8.533333333333333, |
|
"grad_norm": 0.17629384994506836, |
|
"learning_rate": 5.76719677258567e-06, |
|
"loss": 0.0151, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 8.542222222222222, |
|
"grad_norm": 0.1432098150253296, |
|
"learning_rate": 5.698858499023063e-06, |
|
"loss": 0.0135, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 8.551111111111112, |
|
"grad_norm": 0.19176144897937775, |
|
"learning_rate": 5.630903052610797e-06, |
|
"loss": 0.0186, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"grad_norm": 0.12425218522548676, |
|
"learning_rate": 5.5633310205840725e-06, |
|
"loss": 0.016, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 8.568888888888889, |
|
"grad_norm": 0.16470980644226074, |
|
"learning_rate": 5.496142986864822e-06, |
|
"loss": 0.0142, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 8.577777777777778, |
|
"grad_norm": 0.30990010499954224, |
|
"learning_rate": 5.429339532056687e-06, |
|
"loss": 0.0161, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 8.586666666666666, |
|
"grad_norm": 0.24246929585933685, |
|
"learning_rate": 5.362921233439971e-06, |
|
"loss": 0.0174, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 8.595555555555556, |
|
"grad_norm": 0.181803360581398, |
|
"learning_rate": 5.296888664966643e-06, |
|
"loss": 0.0165, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 8.604444444444445, |
|
"grad_norm": 0.1624002605676651, |
|
"learning_rate": 5.231242397255404e-06, |
|
"loss": 0.0154, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 8.613333333333333, |
|
"grad_norm": 0.186075821518898, |
|
"learning_rate": 5.165982997586771e-06, |
|
"loss": 0.016, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 8.622222222222222, |
|
"grad_norm": 0.19883862137794495, |
|
"learning_rate": 5.101111029898125e-06, |
|
"loss": 0.0174, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 8.63111111111111, |
|
"grad_norm": 0.20108523964881897, |
|
"learning_rate": 5.036627054778859e-06, |
|
"loss": 0.0163, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"grad_norm": 0.12472091615200043, |
|
"learning_rate": 4.97253162946556e-06, |
|
"loss": 0.0198, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 8.648888888888889, |
|
"grad_norm": 0.15161430835723877, |
|
"learning_rate": 4.9088253078371675e-06, |
|
"loss": 0.0173, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 8.657777777777778, |
|
"grad_norm": 0.21465007960796356, |
|
"learning_rate": 4.845508640410174e-06, |
|
"loss": 0.0192, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 8.666666666666666, |
|
"grad_norm": 0.12701228260993958, |
|
"learning_rate": 4.782582174333894e-06, |
|
"loss": 0.0159, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 8.675555555555556, |
|
"grad_norm": 0.4060904383659363, |
|
"learning_rate": 4.720046453385746e-06, |
|
"loss": 0.0201, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 8.684444444444445, |
|
"grad_norm": 0.17459413409233093, |
|
"learning_rate": 4.6579020179665086e-06, |
|
"loss": 0.0151, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 8.693333333333333, |
|
"grad_norm": 0.21617580950260162, |
|
"learning_rate": 4.5961494050957066e-06, |
|
"loss": 0.0152, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 8.702222222222222, |
|
"grad_norm": 0.1337454915046692, |
|
"learning_rate": 4.534789148406887e-06, |
|
"loss": 0.0141, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 8.71111111111111, |
|
"grad_norm": 0.11474966257810593, |
|
"learning_rate": 4.473821778143128e-06, |
|
"loss": 0.0201, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"grad_norm": 0.18956808745861053, |
|
"learning_rate": 4.413247821152333e-06, |
|
"loss": 0.0234, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 8.72888888888889, |
|
"grad_norm": 0.21951065957546234, |
|
"learning_rate": 4.353067800882793e-06, |
|
"loss": 0.0204, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 8.737777777777778, |
|
"grad_norm": 0.16629120707511902, |
|
"learning_rate": 4.293282237378532e-06, |
|
"loss": 0.0167, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 8.746666666666666, |
|
"grad_norm": 0.1400579959154129, |
|
"learning_rate": 4.233891647274951e-06, |
|
"loss": 0.0153, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 8.755555555555556, |
|
"grad_norm": 0.29963433742523193, |
|
"learning_rate": 4.174896543794266e-06, |
|
"loss": 0.0152, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 8.764444444444445, |
|
"grad_norm": 0.2799823582172394, |
|
"learning_rate": 4.116297436741129e-06, |
|
"loss": 0.0217, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 8.773333333333333, |
|
"grad_norm": 0.3716045022010803, |
|
"learning_rate": 4.0580948324981545e-06, |
|
"loss": 0.0164, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 8.782222222222222, |
|
"grad_norm": 0.19331613183021545, |
|
"learning_rate": 4.000289234021643e-06, |
|
"loss": 0.0165, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 8.79111111111111, |
|
"grad_norm": 0.18172837793827057, |
|
"learning_rate": 3.94288114083714e-06, |
|
"loss": 0.0168, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 0.16873294115066528, |
|
"learning_rate": 3.8858710490352e-06, |
|
"loss": 0.0203, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 8.80888888888889, |
|
"grad_norm": 0.38768407702445984, |
|
"learning_rate": 3.829259451266992e-06, |
|
"loss": 0.016, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 8.817777777777778, |
|
"grad_norm": 0.17706307768821716, |
|
"learning_rate": 3.773046836740174e-06, |
|
"loss": 0.0152, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 8.826666666666666, |
|
"grad_norm": 0.1904781311750412, |
|
"learning_rate": 3.7172336912145887e-06, |
|
"loss": 0.0148, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 8.835555555555555, |
|
"grad_norm": 0.23646555840969086, |
|
"learning_rate": 3.661820496998053e-06, |
|
"loss": 0.0185, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 8.844444444444445, |
|
"grad_norm": 0.10552993416786194, |
|
"learning_rate": 3.60680773294223e-06, |
|
"loss": 0.0156, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 8.853333333333333, |
|
"grad_norm": 0.1282564252614975, |
|
"learning_rate": 3.552195874438469e-06, |
|
"loss": 0.014, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 8.862222222222222, |
|
"grad_norm": 0.16613101959228516, |
|
"learning_rate": 3.497985393413722e-06, |
|
"loss": 0.0156, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 8.87111111111111, |
|
"grad_norm": 0.16166603565216064, |
|
"learning_rate": 3.444176758326434e-06, |
|
"loss": 0.017, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"grad_norm": 0.15295928716659546, |
|
"learning_rate": 3.3907704341625104e-06, |
|
"loss": 0.0141, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.28686997294425964, |
|
"learning_rate": 3.3377668824312867e-06, |
|
"loss": 0.0154, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.897777777777778, |
|
"grad_norm": 0.258746862411499, |
|
"learning_rate": 3.2851665611615788e-06, |
|
"loss": 0.0141, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 8.906666666666666, |
|
"grad_norm": 0.2104421854019165, |
|
"learning_rate": 3.2329699248976774e-06, |
|
"loss": 0.0147, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 8.915555555555555, |
|
"grad_norm": 0.2846212089061737, |
|
"learning_rate": 3.181177424695442e-06, |
|
"loss": 0.0146, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 8.924444444444445, |
|
"grad_norm": 0.22641883790493011, |
|
"learning_rate": 3.1297895081183857e-06, |
|
"loss": 0.0171, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 8.933333333333334, |
|
"grad_norm": 0.15959568321704865, |
|
"learning_rate": 3.0788066192338705e-06, |
|
"loss": 0.0139, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 8.942222222222222, |
|
"grad_norm": 0.12891852855682373, |
|
"learning_rate": 3.0282291986091724e-06, |
|
"loss": 0.0226, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 8.95111111111111, |
|
"grad_norm": 0.2029985785484314, |
|
"learning_rate": 2.9780576833077447e-06, |
|
"loss": 0.0178, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"grad_norm": 0.19217246770858765, |
|
"learning_rate": 2.928292506885416e-06, |
|
"loss": 0.0152, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 8.96888888888889, |
|
"grad_norm": 0.2340322881937027, |
|
"learning_rate": 2.8789340993866597e-06, |
|
"loss": 0.0167, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 8.977777777777778, |
|
"grad_norm": 0.15603481233119965, |
|
"learning_rate": 2.829982887340854e-06, |
|
"loss": 0.0147, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 8.986666666666666, |
|
"grad_norm": 0.1506328135728836, |
|
"learning_rate": 2.7814392937586144e-06, |
|
"loss": 0.0151, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 8.995555555555555, |
|
"grad_norm": 0.18605013191699982, |
|
"learning_rate": 2.7333037381281124e-06, |
|
"loss": 0.0131, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 9.004444444444445, |
|
"grad_norm": 0.25641930103302, |
|
"learning_rate": 2.6855766364115078e-06, |
|
"loss": 0.0164, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 9.013333333333334, |
|
"grad_norm": 0.15456029772758484, |
|
"learning_rate": 2.638258401041288e-06, |
|
"loss": 0.0164, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 9.022222222222222, |
|
"grad_norm": 0.11093217134475708, |
|
"learning_rate": 2.591349440916735e-06, |
|
"loss": 0.0148, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 9.03111111111111, |
|
"grad_norm": 0.2694683074951172, |
|
"learning_rate": 2.5448501614004085e-06, |
|
"loss": 0.016, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.21135805547237396, |
|
"learning_rate": 2.4987609643146003e-06, |
|
"loss": 0.0175, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 9.04888888888889, |
|
"grad_norm": 0.10743017494678497, |
|
"learning_rate": 2.45308224793791e-06, |
|
"loss": 0.0138, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 9.057777777777778, |
|
"grad_norm": 0.29398423433303833, |
|
"learning_rate": 2.4078144070017406e-06, |
|
"loss": 0.0186, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 9.066666666666666, |
|
"grad_norm": 0.17509834468364716, |
|
"learning_rate": 2.362957832686974e-06, |
|
"loss": 0.0169, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 9.075555555555555, |
|
"grad_norm": 0.24791115522384644, |
|
"learning_rate": 2.3185129126204963e-06, |
|
"loss": 0.0176, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 9.084444444444445, |
|
"grad_norm": 0.20842622220516205, |
|
"learning_rate": 2.274480030871945e-06, |
|
"loss": 0.0197, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 9.093333333333334, |
|
"grad_norm": 0.14471065998077393, |
|
"learning_rate": 2.230859567950283e-06, |
|
"loss": 0.016, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 9.102222222222222, |
|
"grad_norm": 0.15523673593997955, |
|
"learning_rate": 2.1876519008006137e-06, |
|
"loss": 0.0153, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 9.11111111111111, |
|
"grad_norm": 0.12204235047101974, |
|
"learning_rate": 2.144857402800843e-06, |
|
"loss": 0.0135, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"grad_norm": 0.15432211756706238, |
|
"learning_rate": 2.1024764437585163e-06, |
|
"loss": 0.0194, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 9.12888888888889, |
|
"grad_norm": 0.22348755598068237, |
|
"learning_rate": 2.0605093899075667e-06, |
|
"loss": 0.015, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 9.137777777777778, |
|
"grad_norm": 0.17628537118434906, |
|
"learning_rate": 2.0189566039051922e-06, |
|
"loss": 0.0152, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 9.146666666666667, |
|
"grad_norm": 0.16253896057605743, |
|
"learning_rate": 1.977818444828694e-06, |
|
"loss": 0.0181, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 9.155555555555555, |
|
"grad_norm": 0.16251306235790253, |
|
"learning_rate": 1.9370952681724007e-06, |
|
"loss": 0.0175, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 9.164444444444445, |
|
"grad_norm": 0.23469984531402588, |
|
"learning_rate": 1.8967874258445761e-06, |
|
"loss": 0.0167, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 9.173333333333334, |
|
"grad_norm": 0.20014360547065735, |
|
"learning_rate": 1.8568952661643713e-06, |
|
"loss": 0.0139, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 9.182222222222222, |
|
"grad_norm": 0.18267478048801422, |
|
"learning_rate": 1.8174191338588387e-06, |
|
"loss": 0.0146, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 9.19111111111111, |
|
"grad_norm": 0.1568797081708908, |
|
"learning_rate": 1.7783593700599454e-06, |
|
"loss": 0.0165, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 0.1912783533334732, |
|
"learning_rate": 1.7397163123016036e-06, |
|
"loss": 0.0184, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 9.20888888888889, |
|
"grad_norm": 0.27481314539909363, |
|
"learning_rate": 1.7014902945167944e-06, |
|
"loss": 0.0161, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 9.217777777777778, |
|
"grad_norm": 0.1612808257341385, |
|
"learning_rate": 1.6636816470346317e-06, |
|
"loss": 0.0165, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 9.226666666666667, |
|
"grad_norm": 0.2516586184501648, |
|
"learning_rate": 1.6262906965775593e-06, |
|
"loss": 0.0177, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 9.235555555555555, |
|
"grad_norm": 0.17888052761554718, |
|
"learning_rate": 1.5893177662584912e-06, |
|
"loss": 0.0181, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 9.244444444444444, |
|
"grad_norm": 0.19422973692417145, |
|
"learning_rate": 1.5527631755780258e-06, |
|
"loss": 0.0154, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 9.253333333333334, |
|
"grad_norm": 0.16237683594226837, |
|
"learning_rate": 1.5166272404217086e-06, |
|
"loss": 0.0196, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 9.262222222222222, |
|
"grad_norm": 0.2033921778202057, |
|
"learning_rate": 1.4809102730572732e-06, |
|
"loss": 0.022, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 9.27111111111111, |
|
"grad_norm": 0.1451500952243805, |
|
"learning_rate": 1.44561258213195e-06, |
|
"loss": 0.0131, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"grad_norm": 0.1665751188993454, |
|
"learning_rate": 1.4107344726698057e-06, |
|
"loss": 0.0211, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 9.28888888888889, |
|
"grad_norm": 0.21271996200084686, |
|
"learning_rate": 1.3762762460691137e-06, |
|
"loss": 0.0155, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 9.297777777777778, |
|
"grad_norm": 0.3788555860519409, |
|
"learning_rate": 1.3422382000997325e-06, |
|
"loss": 0.0196, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 9.306666666666667, |
|
"grad_norm": 0.2871220111846924, |
|
"learning_rate": 1.3086206289005366e-06, |
|
"loss": 0.0146, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 9.315555555555555, |
|
"grad_norm": 0.1943928450345993, |
|
"learning_rate": 1.2754238229768845e-06, |
|
"loss": 0.0147, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 9.324444444444444, |
|
"grad_norm": 0.2475123405456543, |
|
"learning_rate": 1.2426480691981102e-06, |
|
"loss": 0.0161, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 9.333333333333334, |
|
"grad_norm": 0.28880929946899414, |
|
"learning_rate": 1.2102936507950246e-06, |
|
"loss": 0.0178, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.342222222222222, |
|
"grad_norm": 0.1512957513332367, |
|
"learning_rate": 1.1783608473574847e-06, |
|
"loss": 0.0174, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 9.351111111111111, |
|
"grad_norm": 0.17499439418315887, |
|
"learning_rate": 1.1468499348319617e-06, |
|
"loss": 0.0143, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"grad_norm": 0.19885167479515076, |
|
"learning_rate": 1.1157611855191985e-06, |
|
"loss": 0.0171, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 9.36888888888889, |
|
"grad_norm": 0.1282743364572525, |
|
"learning_rate": 1.0850948680717899e-06, |
|
"loss": 0.0156, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 9.377777777777778, |
|
"grad_norm": 0.21806924045085907, |
|
"learning_rate": 1.0548512474919226e-06, |
|
"loss": 0.0163, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 9.386666666666667, |
|
"grad_norm": 0.13541573286056519, |
|
"learning_rate": 1.0250305851290387e-06, |
|
"loss": 0.0197, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 9.395555555555555, |
|
"grad_norm": 0.22700132429599762, |
|
"learning_rate": 9.956331386776208e-07, |
|
"loss": 0.0139, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 9.404444444444444, |
|
"grad_norm": 0.16873204708099365, |
|
"learning_rate": 9.666591621749156e-07, |
|
"loss": 0.0161, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 9.413333333333334, |
|
"grad_norm": 0.14228011667728424, |
|
"learning_rate": 9.381089059987913e-07, |
|
"loss": 0.0133, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 9.422222222222222, |
|
"grad_norm": 0.18994258344173431, |
|
"learning_rate": 9.099826168655346e-07, |
|
"loss": 0.0164, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 9.431111111111111, |
|
"grad_norm": 0.15951943397521973, |
|
"learning_rate": 8.822805378277233e-07, |
|
"loss": 0.0153, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"grad_norm": 0.226401224732399, |
|
"learning_rate": 8.550029082721622e-07, |
|
"loss": 0.0188, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 9.448888888888888, |
|
"grad_norm": 0.1551145613193512, |
|
"learning_rate": 8.281499639177626e-07, |
|
"loss": 0.015, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 9.457777777777778, |
|
"grad_norm": 0.3820601999759674, |
|
"learning_rate": 8.017219368135431e-07, |
|
"loss": 0.0131, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 9.466666666666667, |
|
"grad_norm": 0.1285940557718277, |
|
"learning_rate": 7.757190553365878e-07, |
|
"loss": 0.0189, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 9.475555555555555, |
|
"grad_norm": 0.133507639169693, |
|
"learning_rate": 7.501415441901361e-07, |
|
"loss": 0.017, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 9.484444444444444, |
|
"grad_norm": 0.18716610968112946, |
|
"learning_rate": 7.249896244015675e-07, |
|
"loss": 0.0167, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 9.493333333333334, |
|
"grad_norm": 0.24179218709468842, |
|
"learning_rate": 7.002635133205315e-07, |
|
"loss": 0.0176, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 9.502222222222223, |
|
"grad_norm": 0.16301466524600983, |
|
"learning_rate": 6.759634246170765e-07, |
|
"loss": 0.0151, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 9.511111111111111, |
|
"grad_norm": 0.17607277631759644, |
|
"learning_rate": 6.520895682797789e-07, |
|
"loss": 0.0139, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"grad_norm": 0.15846852958202362, |
|
"learning_rate": 6.286421506139394e-07, |
|
"loss": 0.0148, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 9.528888888888888, |
|
"grad_norm": 0.22087305784225464, |
|
"learning_rate": 6.05621374239801e-07, |
|
"loss": 0.0142, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 9.537777777777778, |
|
"grad_norm": 0.1783401519060135, |
|
"learning_rate": 5.830274380907941e-07, |
|
"loss": 0.0162, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 9.546666666666667, |
|
"grad_norm": 0.19832254946231842, |
|
"learning_rate": 5.60860537411828e-07, |
|
"loss": 0.0186, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 9.555555555555555, |
|
"grad_norm": 0.18358613550662994, |
|
"learning_rate": 5.391208637576028e-07, |
|
"loss": 0.0136, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 9.564444444444444, |
|
"grad_norm": 0.25920259952545166, |
|
"learning_rate": 5.178086049909214e-07, |
|
"loss": 0.0181, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 9.573333333333334, |
|
"grad_norm": 0.20215286314487457, |
|
"learning_rate": 4.969239452811136e-07, |
|
"loss": 0.0127, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 9.582222222222223, |
|
"grad_norm": 0.13215485215187073, |
|
"learning_rate": 4.76467065102415e-07, |
|
"loss": 0.0174, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 9.591111111111111, |
|
"grad_norm": 0.24699951708316803, |
|
"learning_rate": 4.5643814123242924e-07, |
|
"loss": 0.0169, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 0.204008549451828, |
|
"learning_rate": 4.3683734675056266e-07, |
|
"loss": 0.0164, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 9.608888888888888, |
|
"grad_norm": 0.2307695597410202, |
|
"learning_rate": 4.176648510365699e-07, |
|
"loss": 0.0181, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 9.617777777777778, |
|
"grad_norm": 0.12950603663921356, |
|
"learning_rate": 3.989208197690719e-07, |
|
"loss": 0.0123, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 9.626666666666667, |
|
"grad_norm": 0.2674529552459717, |
|
"learning_rate": 3.806054149241234e-07, |
|
"loss": 0.0136, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 9.635555555555555, |
|
"grad_norm": 0.21486273407936096, |
|
"learning_rate": 3.6271879477380867e-07, |
|
"loss": 0.0167, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 9.644444444444444, |
|
"grad_norm": 0.12806425988674164, |
|
"learning_rate": 3.4526111388488714e-07, |
|
"loss": 0.0192, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 9.653333333333332, |
|
"grad_norm": 0.20233075320720673, |
|
"learning_rate": 3.282325231174499e-07, |
|
"loss": 0.0178, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 9.662222222222223, |
|
"grad_norm": 0.38600340485572815, |
|
"learning_rate": 3.116331696236263e-07, |
|
"loss": 0.0172, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 9.671111111111111, |
|
"grad_norm": 0.383775532245636, |
|
"learning_rate": 2.95463196846274e-07, |
|
"loss": 0.0221, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.29409053921699524, |
|
"learning_rate": 2.7972274451780746e-07, |
|
"loss": 0.0185, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 9.688888888888888, |
|
"grad_norm": 0.2864955961704254, |
|
"learning_rate": 2.644119486589158e-07, |
|
"loss": 0.0127, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 9.697777777777778, |
|
"grad_norm": 0.20553098618984222, |
|
"learning_rate": 2.495309415774527e-07, |
|
"loss": 0.02, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 9.706666666666667, |
|
"grad_norm": 0.17016050219535828, |
|
"learning_rate": 2.35079851867237e-07, |
|
"loss": 0.0153, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 9.715555555555556, |
|
"grad_norm": 0.27841511368751526, |
|
"learning_rate": 2.210588044069928e-07, |
|
"loss": 0.017, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 9.724444444444444, |
|
"grad_norm": 0.1952657848596573, |
|
"learning_rate": 2.0746792035922224e-07, |
|
"loss": 0.0181, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 9.733333333333333, |
|
"grad_norm": 0.1383320689201355, |
|
"learning_rate": 1.943073171692067e-07, |
|
"loss": 0.0165, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 9.742222222222223, |
|
"grad_norm": 0.09173349291086197, |
|
"learning_rate": 1.8157710856393506e-07, |
|
"loss": 0.0152, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 9.751111111111111, |
|
"grad_norm": 0.23548445105552673, |
|
"learning_rate": 1.69277404551188e-07, |
|
"loss": 0.0197, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"grad_norm": 0.18757130205631256, |
|
"learning_rate": 1.5740831141852207e-07, |
|
"loss": 0.0149, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 9.768888888888888, |
|
"grad_norm": 0.19751067459583282, |
|
"learning_rate": 1.4596993173239815e-07, |
|
"loss": 0.0167, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"grad_norm": 0.2124103605747223, |
|
"learning_rate": 1.349623643372766e-07, |
|
"loss": 0.0148, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.786666666666667, |
|
"grad_norm": 0.17946216464042664, |
|
"learning_rate": 1.2438570435475693e-07, |
|
"loss": 0.0158, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 9.795555555555556, |
|
"grad_norm": 0.09408942610025406, |
|
"learning_rate": 1.1424004318277282e-07, |
|
"loss": 0.0111, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 9.804444444444444, |
|
"grad_norm": 0.21881046891212463, |
|
"learning_rate": 1.0452546849478163e-07, |
|
"loss": 0.0199, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 9.813333333333333, |
|
"grad_norm": 0.14049401879310608, |
|
"learning_rate": 9.524206423903726e-08, |
|
"loss": 0.0159, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 9.822222222222223, |
|
"grad_norm": 0.24175013601779938, |
|
"learning_rate": 8.63899106378241e-08, |
|
"loss": 0.0164, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 9.831111111111111, |
|
"grad_norm": 0.1336895376443863, |
|
"learning_rate": 7.796908418679638e-08, |
|
"loss": 0.0154, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.2708650529384613, |
|
"learning_rate": 6.997965765430658e-08, |
|
"loss": 0.0222, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 9.848888888888888, |
|
"grad_norm": 0.23007069528102875, |
|
"learning_rate": 6.242170008077253e-08, |
|
"loss": 0.0151, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 9.857777777777777, |
|
"grad_norm": 0.26273036003112793, |
|
"learning_rate": 5.529527677808899e-08, |
|
"loss": 0.0136, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 9.866666666666667, |
|
"grad_norm": 0.2389335185289383, |
|
"learning_rate": 4.860044932905039e-08, |
|
"loss": 0.021, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 9.875555555555556, |
|
"grad_norm": 0.15268075466156006, |
|
"learning_rate": 4.2337275586840085e-08, |
|
"loss": 0.0205, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 9.884444444444444, |
|
"grad_norm": 0.18056434392929077, |
|
"learning_rate": 3.650580967449746e-08, |
|
"loss": 0.0164, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 9.893333333333333, |
|
"grad_norm": 0.18121588230133057, |
|
"learning_rate": 3.110610198449604e-08, |
|
"loss": 0.0171, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 9.902222222222223, |
|
"grad_norm": 0.18942371010780334, |
|
"learning_rate": 2.6138199178260548e-08, |
|
"loss": 0.0155, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 9.911111111111111, |
|
"grad_norm": 0.268178790807724, |
|
"learning_rate": 2.160214418578943e-08, |
|
"loss": 0.0133, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"grad_norm": 0.2843880355358124, |
|
"learning_rate": 1.749797620528848e-08, |
|
"loss": 0.0145, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 9.928888888888888, |
|
"grad_norm": 0.2600668966770172, |
|
"learning_rate": 1.3825730702815565e-08, |
|
"loss": 0.0175, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 9.937777777777779, |
|
"grad_norm": 0.16999982297420502, |
|
"learning_rate": 1.0585439411986419e-08, |
|
"loss": 0.0135, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 9.946666666666667, |
|
"grad_norm": 0.2422129362821579, |
|
"learning_rate": 7.777130333685988e-09, |
|
"loss": 0.0154, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 9.955555555555556, |
|
"grad_norm": 0.153123676776886, |
|
"learning_rate": 5.400827735851932e-09, |
|
"loss": 0.0223, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 9.964444444444444, |
|
"grad_norm": 0.15157046914100647, |
|
"learning_rate": 3.4565521532303747e-09, |
|
"loss": 0.0157, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 9.973333333333333, |
|
"grad_norm": 0.23080949485301971, |
|
"learning_rate": 1.9443203872371308e-09, |
|
"loss": 0.0182, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 9.982222222222223, |
|
"grad_norm": 0.17081643640995026, |
|
"learning_rate": 8.641455057800674e-10, |
|
"loss": 0.015, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 9.991111111111111, |
|
"grad_norm": 0.09965389221906662, |
|
"learning_rate": 2.1603684316473527e-10, |
|
"loss": 0.016, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.5681173801422119, |
|
"learning_rate": 0.0, |
|
"loss": 0.0161, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 11250, |
|
"total_flos": 9.618789059049738e+17, |
|
"train_loss": 0.03627930305798849, |
|
"train_runtime": 9056.7642, |
|
"train_samples_per_second": 33.516, |
|
"train_steps_per_second": 1.242 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 11250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.618789059049738e+17, |
|
"train_batch_size": 27, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|