|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994910941475827, |
|
"eval_steps": 500, |
|
"global_step": 491, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002035623409669211, |
|
"grad_norm": 2244783.3021161165, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 13.8236, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004071246819338422, |
|
"grad_norm": 4218192.586319844, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 13.7798, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0061068702290076335, |
|
"grad_norm": 4594205.112133389, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 13.844, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008142493638676845, |
|
"grad_norm": 11263009.553503217, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 13.8135, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010178117048346057, |
|
"grad_norm": 8938655.40470094, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 13.8361, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012213740458015267, |
|
"grad_norm": 3666969.2787727225, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 13.7754, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014249363867684479, |
|
"grad_norm": 2213140.2581639104, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 13.816, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01628498727735369, |
|
"grad_norm": 5204945.168357011, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 13.7919, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0183206106870229, |
|
"grad_norm": 3161273.4979725075, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 13.7921, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 4721972.526161844, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 13.8129, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022391857506361322, |
|
"grad_norm": 1993836.4706857507, |
|
"learning_rate": 1.1e-06, |
|
"loss": 13.7899, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.024427480916030534, |
|
"grad_norm": 2841663.624406631, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 13.8222, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.026463104325699746, |
|
"grad_norm": 1851625.2694659713, |
|
"learning_rate": 1.3e-06, |
|
"loss": 13.8051, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028498727735368958, |
|
"grad_norm": 1799567.0732118108, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 13.8232, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.030534351145038167, |
|
"grad_norm": 1739244.434987474, |
|
"learning_rate": 1.5e-06, |
|
"loss": 13.787, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03256997455470738, |
|
"grad_norm": 2049274.0550949178, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 13.8272, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03460559796437659, |
|
"grad_norm": 2196819.6130670137, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 13.7819, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0366412213740458, |
|
"grad_norm": 3122206.233837503, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 13.7931, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03867684478371501, |
|
"grad_norm": 6250105.385689335, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 13.8169, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 3333302.4953560205, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 13.7969, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.042748091603053436, |
|
"grad_norm": 1842864.15838755, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 13.7789, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.044783715012722644, |
|
"grad_norm": 2814803.976988681, |
|
"learning_rate": 2.2e-06, |
|
"loss": 13.8063, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04681933842239186, |
|
"grad_norm": 5480609.064869341, |
|
"learning_rate": 2.3000000000000004e-06, |
|
"loss": 13.8059, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04885496183206107, |
|
"grad_norm": 2330308.564996049, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 13.7774, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05089058524173028, |
|
"grad_norm": 1954680.474250264, |
|
"learning_rate": 2.5e-06, |
|
"loss": 13.8188, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05292620865139949, |
|
"grad_norm": 2348178.4882796686, |
|
"learning_rate": 2.6e-06, |
|
"loss": 13.7916, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0549618320610687, |
|
"grad_norm": 2063424.030791769, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 13.8352, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.056997455470737916, |
|
"grad_norm": 3096470.2787967124, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 13.814, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.059033078880407125, |
|
"grad_norm": 2313554.624060415, |
|
"learning_rate": 2.9e-06, |
|
"loss": 13.8149, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 2759287.606120093, |
|
"learning_rate": 3e-06, |
|
"loss": 13.7901, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06310432569974554, |
|
"grad_norm": 20688012.381375346, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 13.8016, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06513994910941476, |
|
"grad_norm": 2818955.21640328, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 13.8174, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06717557251908397, |
|
"grad_norm": 3642298.4739112426, |
|
"learning_rate": 3.3000000000000006e-06, |
|
"loss": 13.8439, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06921119592875317, |
|
"grad_norm": 4530146.859043687, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 13.798, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07124681933842239, |
|
"grad_norm": 3901948.6988777495, |
|
"learning_rate": 3.5e-06, |
|
"loss": 13.7871, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0732824427480916, |
|
"grad_norm": 2763745.5952281994, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 13.7899, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07531806615776081, |
|
"grad_norm": 6149669.662012621, |
|
"learning_rate": 3.7e-06, |
|
"loss": 13.7854, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07735368956743002, |
|
"grad_norm": 2493897.7698470936, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 13.7955, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07938931297709924, |
|
"grad_norm": 1961343.832258401, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 13.8098, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 2547368.4155277675, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 13.8144, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08346055979643766, |
|
"grad_norm": 12326034.534851212, |
|
"learning_rate": 4.1e-06, |
|
"loss": 13.7863, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08549618320610687, |
|
"grad_norm": 3960181.1514259093, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 13.766, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08753180661577609, |
|
"grad_norm": 10011098.88152794, |
|
"learning_rate": 4.3e-06, |
|
"loss": 13.8034, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08956743002544529, |
|
"grad_norm": 3789423.3303024317, |
|
"learning_rate": 4.4e-06, |
|
"loss": 13.791, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0916030534351145, |
|
"grad_norm": 3039428.004186018, |
|
"learning_rate": 4.5e-06, |
|
"loss": 13.7903, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09363867684478372, |
|
"grad_norm": 3356582.7961585973, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 13.8235, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09567430025445292, |
|
"grad_norm": 2820493.914527673, |
|
"learning_rate": 4.7e-06, |
|
"loss": 13.8022, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09770992366412214, |
|
"grad_norm": 3791222.908966115, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 13.7717, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09974554707379135, |
|
"grad_norm": 3283270.2287080307, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 13.787, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 2485970.4909266913, |
|
"learning_rate": 5e-06, |
|
"loss": 13.7976, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10381679389312977, |
|
"grad_norm": 4547104.520705372, |
|
"learning_rate": 5.1e-06, |
|
"loss": 13.7639, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10585241730279898, |
|
"grad_norm": 2824615.9607255333, |
|
"learning_rate": 5.2e-06, |
|
"loss": 13.7782, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1078880407124682, |
|
"grad_norm": 2095536.7289427207, |
|
"learning_rate": 5.300000000000001e-06, |
|
"loss": 13.8148, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1099236641221374, |
|
"grad_norm": 2351584.5173169677, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 13.7998, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11195928753180662, |
|
"grad_norm": 3228695.507820654, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 13.8394, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11399491094147583, |
|
"grad_norm": 9313699.37550104, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 13.7991, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11603053435114503, |
|
"grad_norm": 3293386.5704521113, |
|
"learning_rate": 5.7e-06, |
|
"loss": 13.7839, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11806615776081425, |
|
"grad_norm": 3718314.8927475032, |
|
"learning_rate": 5.8e-06, |
|
"loss": 13.811, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12010178117048347, |
|
"grad_norm": 3124256.4145811866, |
|
"learning_rate": 5.9e-06, |
|
"loss": 13.7796, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 3177179.019257336, |
|
"learning_rate": 6e-06, |
|
"loss": 13.7927, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12417302798982188, |
|
"grad_norm": 3023297.2648282214, |
|
"learning_rate": 6.1e-06, |
|
"loss": 13.7867, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12620865139949108, |
|
"grad_norm": 3214697.401730724, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 13.7771, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1282442748091603, |
|
"grad_norm": 5100755.95841697, |
|
"learning_rate": 6.300000000000001e-06, |
|
"loss": 13.8624, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13027989821882952, |
|
"grad_norm": 2723163.1725419424, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 13.8343, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13231552162849872, |
|
"grad_norm": 3190220.582667358, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 13.8278, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13435114503816795, |
|
"grad_norm": 4004364.9481327008, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 13.8181, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13638676844783715, |
|
"grad_norm": 3042780.596978967, |
|
"learning_rate": 6.700000000000001e-06, |
|
"loss": 13.8136, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13842239185750635, |
|
"grad_norm": 3848690.0005479343, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 13.8067, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14045801526717558, |
|
"grad_norm": 3081170.591628097, |
|
"learning_rate": 6.9e-06, |
|
"loss": 13.802, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 3465407.339307021, |
|
"learning_rate": 7e-06, |
|
"loss": 13.7681, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14452926208651398, |
|
"grad_norm": 2647645.8680279553, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 13.8244, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1465648854961832, |
|
"grad_norm": 2676701.9454676, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 13.8011, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1486005089058524, |
|
"grad_norm": 2103570.9894912024, |
|
"learning_rate": 7.3e-06, |
|
"loss": 13.7988, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15063613231552161, |
|
"grad_norm": 2809043.7511407104, |
|
"learning_rate": 7.4e-06, |
|
"loss": 13.8103, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15267175572519084, |
|
"grad_norm": 4000235.7083044164, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 13.8168, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15470737913486005, |
|
"grad_norm": 8459155.529969739, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 13.774, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15674300254452928, |
|
"grad_norm": 16399233.534973303, |
|
"learning_rate": 7.7e-06, |
|
"loss": 13.8219, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15877862595419848, |
|
"grad_norm": 2696806.469277922, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 13.7919, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16081424936386768, |
|
"grad_norm": 6832348.586697958, |
|
"learning_rate": 7.9e-06, |
|
"loss": 13.8085, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 2802377.0728139468, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 13.7758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1648854961832061, |
|
"grad_norm": 3693570.1419803873, |
|
"learning_rate": 8.1e-06, |
|
"loss": 13.8108, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1669211195928753, |
|
"grad_norm": 1899967.9199083971, |
|
"learning_rate": 8.2e-06, |
|
"loss": 13.7788, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16895674300254454, |
|
"grad_norm": 2632120.314827873, |
|
"learning_rate": 8.3e-06, |
|
"loss": 13.7979, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17099236641221374, |
|
"grad_norm": 2240799.146098359, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 13.7656, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17302798982188294, |
|
"grad_norm": 2881127.214705138, |
|
"learning_rate": 8.5e-06, |
|
"loss": 13.8209, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17506361323155217, |
|
"grad_norm": 4313944.882142538, |
|
"learning_rate": 8.6e-06, |
|
"loss": 13.8141, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17709923664122137, |
|
"grad_norm": 4205463.004393998, |
|
"learning_rate": 8.700000000000001e-06, |
|
"loss": 13.7967, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17913486005089058, |
|
"grad_norm": 2478941.649437644, |
|
"learning_rate": 8.8e-06, |
|
"loss": 13.7916, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1811704834605598, |
|
"grad_norm": 3528590.3048747736, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 13.7464, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 2318484.135702536, |
|
"learning_rate": 9e-06, |
|
"loss": 13.8293, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1852417302798982, |
|
"grad_norm": 2158526.8137466703, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 13.7882, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18727735368956744, |
|
"grad_norm": 3658620.230343455, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 13.7981, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18931297709923664, |
|
"grad_norm": 2456882.2321177353, |
|
"learning_rate": 9.3e-06, |
|
"loss": 13.8297, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19134860050890584, |
|
"grad_norm": 2990599.371813722, |
|
"learning_rate": 9.4e-06, |
|
"loss": 13.7921, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19338422391857507, |
|
"grad_norm": 5560006.043017588, |
|
"learning_rate": 9.5e-06, |
|
"loss": 13.8165, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19541984732824427, |
|
"grad_norm": 1762528.334237519, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 13.794, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19745547073791347, |
|
"grad_norm": 2238736.0246347915, |
|
"learning_rate": 9.7e-06, |
|
"loss": 13.8312, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1994910941475827, |
|
"grad_norm": 1991545.6391396692, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 13.7997, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2015267175572519, |
|
"grad_norm": 2812661.8889751355, |
|
"learning_rate": 9.9e-06, |
|
"loss": 13.7873, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 2569208.410955407, |
|
"learning_rate": 1e-05, |
|
"loss": 13.8016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20559796437659034, |
|
"grad_norm": 5078922.075783424, |
|
"learning_rate": 9.999838607294157e-06, |
|
"loss": 13.8041, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.20763358778625954, |
|
"grad_norm": 2652997.6261025066, |
|
"learning_rate": 9.999354439595668e-06, |
|
"loss": 13.7995, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.20966921119592874, |
|
"grad_norm": 7720533.0329790395, |
|
"learning_rate": 9.998547528160987e-06, |
|
"loss": 13.7946, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.21170483460559797, |
|
"grad_norm": 4996759.547611104, |
|
"learning_rate": 9.997417925081963e-06, |
|
"loss": 13.8414, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21374045801526717, |
|
"grad_norm": 2594583.6722655715, |
|
"learning_rate": 9.995965703282472e-06, |
|
"loss": 13.8074, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2157760814249364, |
|
"grad_norm": 2448594.3176417607, |
|
"learning_rate": 9.99419095651372e-06, |
|
"loss": 13.7975, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2178117048346056, |
|
"grad_norm": 2714056.8451403086, |
|
"learning_rate": 9.992093799348182e-06, |
|
"loss": 13.8033, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2198473282442748, |
|
"grad_norm": 2961036.7491148347, |
|
"learning_rate": 9.9896743671722e-06, |
|
"loss": 13.8288, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22188295165394403, |
|
"grad_norm": 4655525.14394145, |
|
"learning_rate": 9.986932816177258e-06, |
|
"loss": 13.809, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 4477853.58417314, |
|
"learning_rate": 9.98386932334989e-06, |
|
"loss": 13.7901, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22595419847328244, |
|
"grad_norm": 7786381.525914281, |
|
"learning_rate": 9.980484086460258e-06, |
|
"loss": 13.7985, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22798982188295167, |
|
"grad_norm": 2476772.377738302, |
|
"learning_rate": 9.976777324049374e-06, |
|
"loss": 13.8245, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.23002544529262087, |
|
"grad_norm": 2424723.3375852606, |
|
"learning_rate": 9.972749275415005e-06, |
|
"loss": 13.8106, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.23206106870229007, |
|
"grad_norm": 3908900.533498587, |
|
"learning_rate": 9.96840020059622e-06, |
|
"loss": 13.8147, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2340966921119593, |
|
"grad_norm": 6536739.236436032, |
|
"learning_rate": 9.963730380356599e-06, |
|
"loss": 13.7825, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2361323155216285, |
|
"grad_norm": 2895977.217837065, |
|
"learning_rate": 9.958740116166113e-06, |
|
"loss": 13.8305, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2381679389312977, |
|
"grad_norm": 3560241.3442100273, |
|
"learning_rate": 9.953429730181653e-06, |
|
"loss": 13.8031, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24020356234096693, |
|
"grad_norm": 1955074.0369023099, |
|
"learning_rate": 9.947799565226253e-06, |
|
"loss": 13.8003, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24223918575063613, |
|
"grad_norm": 3497318.257096594, |
|
"learning_rate": 9.94184998476693e-06, |
|
"loss": 13.8214, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 3346632.116332331, |
|
"learning_rate": 9.93558137289124e-06, |
|
"loss": 13.7768, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24631043256997456, |
|
"grad_norm": 2439047.5049070125, |
|
"learning_rate": 9.928994134282477e-06, |
|
"loss": 13.8308, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24834605597964376, |
|
"grad_norm": 2664458.243837202, |
|
"learning_rate": 9.922088694193546e-06, |
|
"loss": 13.8132, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.250381679389313, |
|
"grad_norm": 2619630.1649600505, |
|
"learning_rate": 9.91486549841951e-06, |
|
"loss": 13.8046, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.25241730279898217, |
|
"grad_norm": 6268665.590956273, |
|
"learning_rate": 9.907325013268816e-06, |
|
"loss": 13.8003, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2544529262086514, |
|
"grad_norm": 3719517.4601419703, |
|
"learning_rate": 9.899467725533181e-06, |
|
"loss": 13.8048, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2564885496183206, |
|
"grad_norm": 3607937.2215440352, |
|
"learning_rate": 9.89129414245618e-06, |
|
"loss": 13.7788, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2585241730279898, |
|
"grad_norm": 4184303.763179342, |
|
"learning_rate": 9.882804791700488e-06, |
|
"loss": 13.7775, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.26055979643765903, |
|
"grad_norm": 1838438.1959072966, |
|
"learning_rate": 9.87400022131382e-06, |
|
"loss": 13.8457, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.26259541984732826, |
|
"grad_norm": 2268135.323406917, |
|
"learning_rate": 9.864880999693551e-06, |
|
"loss": 13.7905, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 6234862.720815243, |
|
"learning_rate": 9.855447715550024e-06, |
|
"loss": 13.8322, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 2161673.2272078265, |
|
"learning_rate": 9.845700977868536e-06, |
|
"loss": 13.7802, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2687022900763359, |
|
"grad_norm": 2040946.090751483, |
|
"learning_rate": 9.835641415870038e-06, |
|
"loss": 13.8012, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.27073791348600507, |
|
"grad_norm": 1690704.8881864555, |
|
"learning_rate": 9.825269678970502e-06, |
|
"loss": 13.8144, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2727735368956743, |
|
"grad_norm": 3302151.7787588844, |
|
"learning_rate": 9.814586436738998e-06, |
|
"loss": 13.7961, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2748091603053435, |
|
"grad_norm": 2678767.7092372375, |
|
"learning_rate": 9.803592378854476e-06, |
|
"loss": 13.7924, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2768447837150127, |
|
"grad_norm": 1882847.5546937664, |
|
"learning_rate": 9.792288215061237e-06, |
|
"loss": 13.7571, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.27888040712468193, |
|
"grad_norm": 2535286.1758342357, |
|
"learning_rate": 9.780674675123113e-06, |
|
"loss": 13.8159, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.28091603053435116, |
|
"grad_norm": 2564638.3112862715, |
|
"learning_rate": 9.768752508776358e-06, |
|
"loss": 13.8283, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.28295165394402033, |
|
"grad_norm": 4141816.881587341, |
|
"learning_rate": 9.756522485681247e-06, |
|
"loss": 13.8205, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 2736144.4377185158, |
|
"learning_rate": 9.743985395372387e-06, |
|
"loss": 13.8191, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2870229007633588, |
|
"grad_norm": 2915473.6501582544, |
|
"learning_rate": 9.73114204720775e-06, |
|
"loss": 13.7825, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.28905852417302796, |
|
"grad_norm": 4206163.240001201, |
|
"learning_rate": 9.717993270316421e-06, |
|
"loss": 13.8325, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.2910941475826972, |
|
"grad_norm": 5230479.261609765, |
|
"learning_rate": 9.704539913545073e-06, |
|
"loss": 13.8205, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.2931297709923664, |
|
"grad_norm": 1844426.584030888, |
|
"learning_rate": 9.690782845403164e-06, |
|
"loss": 13.7856, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2951653944020356, |
|
"grad_norm": 2494575.837554648, |
|
"learning_rate": 9.676722954006878e-06, |
|
"loss": 13.7562, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2972010178117048, |
|
"grad_norm": 4691983.503821501, |
|
"learning_rate": 9.66236114702178e-06, |
|
"loss": 13.8174, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.29923664122137406, |
|
"grad_norm": 11448273.842583835, |
|
"learning_rate": 9.647698351604227e-06, |
|
"loss": 13.811, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.30127226463104323, |
|
"grad_norm": 2421950.649919781, |
|
"learning_rate": 9.632735514341508e-06, |
|
"loss": 13.798, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.30330788804071246, |
|
"grad_norm": 3423539.343520856, |
|
"learning_rate": 9.617473601190743e-06, |
|
"loss": 13.7918, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 3264738.7097397717, |
|
"learning_rate": 9.601913597416513e-06, |
|
"loss": 13.8008, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3073791348600509, |
|
"grad_norm": 1943889.953196763, |
|
"learning_rate": 9.586056507527266e-06, |
|
"loss": 13.7854, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3094147582697201, |
|
"grad_norm": 3137121.9149822216, |
|
"learning_rate": 9.569903355210457e-06, |
|
"loss": 13.8321, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3114503816793893, |
|
"grad_norm": 2080163.2784456573, |
|
"learning_rate": 9.55345518326647e-06, |
|
"loss": 13.7755, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.31348600508905855, |
|
"grad_norm": 1730160.4186251867, |
|
"learning_rate": 9.5367130535413e-06, |
|
"loss": 13.8155, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3155216284987277, |
|
"grad_norm": 4116918.3545234683, |
|
"learning_rate": 9.519678046857987e-06, |
|
"loss": 13.8182, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.31755725190839695, |
|
"grad_norm": 3736775.455979484, |
|
"learning_rate": 9.502351262946865e-06, |
|
"loss": 13.8155, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3195928753180662, |
|
"grad_norm": 4078409.8308530655, |
|
"learning_rate": 9.48473382037455e-06, |
|
"loss": 13.8245, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.32162849872773536, |
|
"grad_norm": 102844859.60289915, |
|
"learning_rate": 9.466826856471728e-06, |
|
"loss": 13.7664, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3236641221374046, |
|
"grad_norm": 2697351.488139713, |
|
"learning_rate": 9.448631527259749e-06, |
|
"loss": 13.824, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 2650130.4884775463, |
|
"learning_rate": 9.430149007375974e-06, |
|
"loss": 13.82, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.327735368956743, |
|
"grad_norm": 2848577.7461152556, |
|
"learning_rate": 9.411380489997962e-06, |
|
"loss": 13.7988, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3297709923664122, |
|
"grad_norm": 2491523.6147146528, |
|
"learning_rate": 9.392327186766434e-06, |
|
"loss": 13.7801, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.33180661577608145, |
|
"grad_norm": 2225342.8572617928, |
|
"learning_rate": 9.372990327707057e-06, |
|
"loss": 13.8134, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3338422391857506, |
|
"grad_norm": 3593367.5642024544, |
|
"learning_rate": 9.353371161151032e-06, |
|
"loss": 13.7934, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33587786259541985, |
|
"grad_norm": 3596895.2611031746, |
|
"learning_rate": 9.333470953654513e-06, |
|
"loss": 13.8181, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3379134860050891, |
|
"grad_norm": 1983403.4312289366, |
|
"learning_rate": 9.31329098991683e-06, |
|
"loss": 13.8016, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.33994910941475825, |
|
"grad_norm": 2840359.066969552, |
|
"learning_rate": 9.292832572697566e-06, |
|
"loss": 13.7929, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3419847328244275, |
|
"grad_norm": 2440280.3409614386, |
|
"learning_rate": 9.272097022732444e-06, |
|
"loss": 13.7855, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3440203562340967, |
|
"grad_norm": 3564583.9519162746, |
|
"learning_rate": 9.251085678648072e-06, |
|
"loss": 13.7962, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 3017839.1906693154, |
|
"learning_rate": 9.22979989687552e-06, |
|
"loss": 13.7882, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3480916030534351, |
|
"grad_norm": 1926367.5770608934, |
|
"learning_rate": 9.208241051562753e-06, |
|
"loss": 13.829, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.35012722646310435, |
|
"grad_norm": 2347553.991884138, |
|
"learning_rate": 9.186410534485924e-06, |
|
"loss": 13.7865, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3521628498727735, |
|
"grad_norm": 3954302.4427076643, |
|
"learning_rate": 9.164309754959523e-06, |
|
"loss": 13.8042, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.35419847328244275, |
|
"grad_norm": 3440382.5762445726, |
|
"learning_rate": 9.14194013974539e-06, |
|
"loss": 13.8065, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.356234096692112, |
|
"grad_norm": 3100692.355061626, |
|
"learning_rate": 9.11930313296062e-06, |
|
"loss": 13.8002, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35826972010178115, |
|
"grad_norm": 3335035.757879338, |
|
"learning_rate": 9.096400195984322e-06, |
|
"loss": 13.8002, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3603053435114504, |
|
"grad_norm": 2934238.222131686, |
|
"learning_rate": 9.073232807363283e-06, |
|
"loss": 13.7968, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3623409669211196, |
|
"grad_norm": 3863277.6264942884, |
|
"learning_rate": 9.049802462716521e-06, |
|
"loss": 13.8059, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3643765903307888, |
|
"grad_norm": 1979824.2277182264, |
|
"learning_rate": 9.026110674638722e-06, |
|
"loss": 13.7686, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 3941584.712432807, |
|
"learning_rate": 9.002158972602599e-06, |
|
"loss": 13.7605, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.36844783715012724, |
|
"grad_norm": 2814992.789501057, |
|
"learning_rate": 8.977948902860154e-06, |
|
"loss": 13.8302, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3704834605597964, |
|
"grad_norm": 2652186.9612782886, |
|
"learning_rate": 8.953482028342853e-06, |
|
"loss": 13.7782, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.37251908396946565, |
|
"grad_norm": 3385003.9687919975, |
|
"learning_rate": 8.92875992856073e-06, |
|
"loss": 13.8049, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3745547073791349, |
|
"grad_norm": 5173853.448674343, |
|
"learning_rate": 8.903784199500412e-06, |
|
"loss": 13.7785, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.37659033078880405, |
|
"grad_norm": 4211561.315304709, |
|
"learning_rate": 8.8785564535221e-06, |
|
"loss": 13.8245, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3786259541984733, |
|
"grad_norm": 2082428.2757180594, |
|
"learning_rate": 8.853078319255466e-06, |
|
"loss": 13.8218, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3806615776081425, |
|
"grad_norm": 2180497.2165646055, |
|
"learning_rate": 8.827351441494525e-06, |
|
"loss": 13.7686, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3826972010178117, |
|
"grad_norm": 2403683.8496971005, |
|
"learning_rate": 8.80137748109144e-06, |
|
"loss": 13.8504, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3847328244274809, |
|
"grad_norm": 2088585.2805628132, |
|
"learning_rate": 8.77515811484931e-06, |
|
"loss": 13.8108, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 2694457.991947191, |
|
"learning_rate": 8.748695035413925e-06, |
|
"loss": 13.8309, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3888040712468193, |
|
"grad_norm": 3397579.358486345, |
|
"learning_rate": 8.72198995116448e-06, |
|
"loss": 13.7726, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.39083969465648855, |
|
"grad_norm": 1991789.4895514157, |
|
"learning_rate": 8.695044586103297e-06, |
|
"loss": 13.7618, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3928753180661578, |
|
"grad_norm": 5556327.041255268, |
|
"learning_rate": 8.667860679744529e-06, |
|
"loss": 13.7844, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.39491094147582695, |
|
"grad_norm": 2177637.385711558, |
|
"learning_rate": 8.640439987001855e-06, |
|
"loss": 13.7805, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3969465648854962, |
|
"grad_norm": 5370184.851617085, |
|
"learning_rate": 8.612784278075195e-06, |
|
"loss": 13.7892, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3989821882951654, |
|
"grad_norm": 2473463.532725396, |
|
"learning_rate": 8.58489533833643e-06, |
|
"loss": 13.7826, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4010178117048346, |
|
"grad_norm": 3439577.606571749, |
|
"learning_rate": 8.556774968214134e-06, |
|
"loss": 13.8133, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4030534351145038, |
|
"grad_norm": 2463771.55347351, |
|
"learning_rate": 8.52842498307736e-06, |
|
"loss": 13.8139, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.40508905852417304, |
|
"grad_norm": 2610726.9405806856, |
|
"learning_rate": 8.499847213118431e-06, |
|
"loss": 13.7792, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 5126921.560500939, |
|
"learning_rate": 8.471043503234796e-06, |
|
"loss": 13.7862, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40916030534351144, |
|
"grad_norm": 2337790.258284094, |
|
"learning_rate": 8.442015712909926e-06, |
|
"loss": 13.8018, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4111959287531807, |
|
"grad_norm": 2269326.88734255, |
|
"learning_rate": 8.412765716093273e-06, |
|
"loss": 13.8081, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.41323155216284985, |
|
"grad_norm": 2482439.54650581, |
|
"learning_rate": 8.383295401079284e-06, |
|
"loss": 13.8155, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4152671755725191, |
|
"grad_norm": 2331248.12998744, |
|
"learning_rate": 8.353606670385514e-06, |
|
"loss": 13.7731, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4173027989821883, |
|
"grad_norm": 2419939.8754564477, |
|
"learning_rate": 8.3237014406298e-06, |
|
"loss": 13.8313, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4193384223918575, |
|
"grad_norm": 2294086.8182832007, |
|
"learning_rate": 8.293581642406517e-06, |
|
"loss": 13.836, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4213740458015267, |
|
"grad_norm": 3514387.9792135926, |
|
"learning_rate": 8.263249220161957e-06, |
|
"loss": 13.8349, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.42340966921119594, |
|
"grad_norm": 2691961.5452496265, |
|
"learning_rate": 8.232706132068806e-06, |
|
"loss": 13.8321, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.42544529262086517, |
|
"grad_norm": 2645633.6227109493, |
|
"learning_rate": 8.201954349899712e-06, |
|
"loss": 13.8001, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 3335230.860278415, |
|
"learning_rate": 8.17099585890001e-06, |
|
"loss": 13.8217, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42951653944020357, |
|
"grad_norm": 2566670.9506267244, |
|
"learning_rate": 8.139832657659557e-06, |
|
"loss": 13.7714, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4315521628498728, |
|
"grad_norm": 2370635.7525829547, |
|
"learning_rate": 8.108466757983695e-06, |
|
"loss": 13.7885, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.433587786259542, |
|
"grad_norm": 4441547.469959615, |
|
"learning_rate": 8.076900184763394e-06, |
|
"loss": 13.7823, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4356234096692112, |
|
"grad_norm": 24430772.164419036, |
|
"learning_rate": 8.04513497584452e-06, |
|
"loss": 13.846, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.43765903307888043, |
|
"grad_norm": 1935048.054024762, |
|
"learning_rate": 8.013173181896283e-06, |
|
"loss": 13.7878, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4396946564885496, |
|
"grad_norm": 1873118.933736968, |
|
"learning_rate": 7.981016866278843e-06, |
|
"loss": 13.7934, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.44173027989821884, |
|
"grad_norm": 2169804.42270086, |
|
"learning_rate": 7.94866810491012e-06, |
|
"loss": 13.7965, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.44376590330788807, |
|
"grad_norm": 3286778.9186343704, |
|
"learning_rate": 7.916128986131761e-06, |
|
"loss": 13.8332, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.44580152671755724, |
|
"grad_norm": 2593694.969662653, |
|
"learning_rate": 7.883401610574338e-06, |
|
"loss": 13.7957, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 4975950.65536953, |
|
"learning_rate": 7.850488091021726e-06, |
|
"loss": 13.8212, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4498727735368957, |
|
"grad_norm": 2795061.731221561, |
|
"learning_rate": 7.817390552274721e-06, |
|
"loss": 13.7835, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.45190839694656487, |
|
"grad_norm": 2386068.803973628, |
|
"learning_rate": 7.784111131013858e-06, |
|
"loss": 13.8274, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4539440203562341, |
|
"grad_norm": 4657398.876885557, |
|
"learning_rate": 7.750651975661471e-06, |
|
"loss": 13.7771, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.45597964376590333, |
|
"grad_norm": 3655891.188475978, |
|
"learning_rate": 7.717015246243012e-06, |
|
"loss": 13.8151, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.4580152671755725, |
|
"grad_norm": 3667526.462351342, |
|
"learning_rate": 7.683203114247587e-06, |
|
"loss": 13.7915, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.46005089058524173, |
|
"grad_norm": 3809298.1813372867, |
|
"learning_rate": 7.649217762487786e-06, |
|
"loss": 13.8205, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.46208651399491096, |
|
"grad_norm": 3288055.728671065, |
|
"learning_rate": 7.615061384958764e-06, |
|
"loss": 13.8062, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.46412213740458014, |
|
"grad_norm": 5662460.544065125, |
|
"learning_rate": 7.580736186696593e-06, |
|
"loss": 13.8049, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.46615776081424937, |
|
"grad_norm": 4421743.27796514, |
|
"learning_rate": 7.546244383635929e-06, |
|
"loss": 13.7686, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 2313331.4271743125, |
|
"learning_rate": 7.5115882024669375e-06, |
|
"loss": 13.8003, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.47022900763358777, |
|
"grad_norm": 6343328.905525712, |
|
"learning_rate": 7.476769880491561e-06, |
|
"loss": 13.7806, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.472264631043257, |
|
"grad_norm": 2640558.638662984, |
|
"learning_rate": 7.44179166547908e-06, |
|
"loss": 13.7999, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.47430025445292623, |
|
"grad_norm": 4310404.449472289, |
|
"learning_rate": 7.406655815520998e-06, |
|
"loss": 13.8118, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.4763358778625954, |
|
"grad_norm": 4518592.431753858, |
|
"learning_rate": 7.371364598885276e-06, |
|
"loss": 13.7864, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.47837150127226463, |
|
"grad_norm": 2098819.871080459, |
|
"learning_rate": 7.335920293869891e-06, |
|
"loss": 13.8016, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.48040712468193386, |
|
"grad_norm": 2463516.9812008953, |
|
"learning_rate": 7.300325188655762e-06, |
|
"loss": 13.7913, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.48244274809160304, |
|
"grad_norm": 3102860.0228757737, |
|
"learning_rate": 7.264581581159024e-06, |
|
"loss": 13.7706, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.48447837150127226, |
|
"grad_norm": 1914868.4483156833, |
|
"learning_rate": 7.2286917788826926e-06, |
|
"loss": 13.8093, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4865139949109415, |
|
"grad_norm": 2539581.4122914425, |
|
"learning_rate": 7.192658098767686e-06, |
|
"loss": 13.7854, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 3707976.67600207, |
|
"learning_rate": 7.1564828670432595e-06, |
|
"loss": 13.8176, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4905852417302799, |
|
"grad_norm": 3593935.7875790736, |
|
"learning_rate": 7.120168419076825e-06, |
|
"loss": 13.767, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4926208651399491, |
|
"grad_norm": 19371415.336839173, |
|
"learning_rate": 7.083717099223192e-06, |
|
"loss": 13.7774, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4946564885496183, |
|
"grad_norm": 2053853.5565662459, |
|
"learning_rate": 7.047131260673214e-06, |
|
"loss": 13.8225, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.49669211195928753, |
|
"grad_norm": 4077586.3030170733, |
|
"learning_rate": 7.010413265301888e-06, |
|
"loss": 13.7911, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.49872773536895676, |
|
"grad_norm": 2640760.978026206, |
|
"learning_rate": 6.97356548351586e-06, |
|
"loss": 13.8096, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.500763358778626, |
|
"grad_norm": 3012267.667522946, |
|
"learning_rate": 6.936590294100414e-06, |
|
"loss": 13.7508, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5027989821882952, |
|
"grad_norm": 2736053.147326933, |
|
"learning_rate": 6.899490084065897e-06, |
|
"loss": 13.8161, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5048346055979643, |
|
"grad_norm": 3879858.467910511, |
|
"learning_rate": 6.862267248493624e-06, |
|
"loss": 13.8145, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5068702290076336, |
|
"grad_norm": 2410669.072572543, |
|
"learning_rate": 6.824924190381257e-06, |
|
"loss": 13.7883, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 2792170.4070734098, |
|
"learning_rate": 6.7874633204876705e-06, |
|
"loss": 13.83, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.510941475826972, |
|
"grad_norm": 3054954.4878360187, |
|
"learning_rate": 6.7498870571773275e-06, |
|
"loss": 13.8138, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5129770992366413, |
|
"grad_norm": 13118018.692801312, |
|
"learning_rate": 6.712197826264154e-06, |
|
"loss": 13.7877, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5150127226463105, |
|
"grad_norm": 2274862.944795723, |
|
"learning_rate": 6.674398060854931e-06, |
|
"loss": 13.788, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5170483460559796, |
|
"grad_norm": 6493219.085795618, |
|
"learning_rate": 6.636490201192229e-06, |
|
"loss": 13.8056, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5190839694656488, |
|
"grad_norm": 2210871.447412882, |
|
"learning_rate": 6.5984766944968636e-06, |
|
"loss": 13.7964, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5211195928753181, |
|
"grad_norm": 2535457.1929967045, |
|
"learning_rate": 6.560359994809916e-06, |
|
"loss": 13.8025, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5231552162849873, |
|
"grad_norm": 3487433.3872100264, |
|
"learning_rate": 6.522142562834307e-06, |
|
"loss": 13.7752, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5251908396946565, |
|
"grad_norm": 2350043.7437156746, |
|
"learning_rate": 6.483826865775941e-06, |
|
"loss": 13.7891, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5272264631043257, |
|
"grad_norm": 3776676.8394483137, |
|
"learning_rate": 6.445415377184427e-06, |
|
"loss": 13.8172, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 3888987.2901650295, |
|
"learning_rate": 6.4069105767933944e-06, |
|
"loss": 13.7623, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5312977099236641, |
|
"grad_norm": 3340457.7579122144, |
|
"learning_rate": 6.368314950360416e-06, |
|
"loss": 13.8065, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 5982292.313514162, |
|
"learning_rate": 6.3296309895065215e-06, |
|
"loss": 13.7812, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5353689567430026, |
|
"grad_norm": 3219916.0058936477, |
|
"learning_rate": 6.290861191555359e-06, |
|
"loss": 13.745, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5374045801526718, |
|
"grad_norm": 11720301.310935492, |
|
"learning_rate": 6.252008059371968e-06, |
|
"loss": 13.8253, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.539440203562341, |
|
"grad_norm": 3118271.2300748057, |
|
"learning_rate": 6.213074101201202e-06, |
|
"loss": 13.7763, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5414758269720101, |
|
"grad_norm": 1697945.554145853, |
|
"learning_rate": 6.174061830505801e-06, |
|
"loss": 13.7883, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5435114503816794, |
|
"grad_norm": 2857758.0379691618, |
|
"learning_rate": 6.1349737658041385e-06, |
|
"loss": 13.7939, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5455470737913486, |
|
"grad_norm": 1961975.7441584785, |
|
"learning_rate": 6.095812430507627e-06, |
|
"loss": 13.7989, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5475826972010178, |
|
"grad_norm": 4193880.83293653, |
|
"learning_rate": 6.056580352757813e-06, |
|
"loss": 13.8241, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 3312744.2202664735, |
|
"learning_rate": 6.0172800652631706e-06, |
|
"loss": 13.7733, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5516539440203563, |
|
"grad_norm": 7858171.4628072195, |
|
"learning_rate": 5.977914105135594e-06, |
|
"loss": 13.8072, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5536895674300254, |
|
"grad_norm": 3941663.415453141, |
|
"learning_rate": 5.938485013726612e-06, |
|
"loss": 13.8103, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5557251908396946, |
|
"grad_norm": 3610433.10118229, |
|
"learning_rate": 5.898995336463326e-06, |
|
"loss": 13.807, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5577608142493639, |
|
"grad_norm": 4164601.1328435126, |
|
"learning_rate": 5.859447622684084e-06, |
|
"loss": 13.815, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5597964376590331, |
|
"grad_norm": 2548966.8285937863, |
|
"learning_rate": 5.819844425473899e-06, |
|
"loss": 13.7778, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5618320610687023, |
|
"grad_norm": 4078882.101176789, |
|
"learning_rate": 5.780188301499636e-06, |
|
"loss": 13.8193, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5638676844783715, |
|
"grad_norm": 2517308.8894206337, |
|
"learning_rate": 5.740481810844952e-06, |
|
"loss": 13.8063, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5659033078880407, |
|
"grad_norm": 2769966.422515691, |
|
"learning_rate": 5.700727516845038e-06, |
|
"loss": 13.8094, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5679389312977099, |
|
"grad_norm": 3505593.62357941, |
|
"learning_rate": 5.660927985921122e-06, |
|
"loss": 13.8098, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 2349437.1104983217, |
|
"learning_rate": 5.621085787414799e-06, |
|
"loss": 13.799, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5720101781170484, |
|
"grad_norm": 3132576.359530331, |
|
"learning_rate": 5.581203493422161e-06, |
|
"loss": 13.7891, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5740458015267176, |
|
"grad_norm": 4404000.913374095, |
|
"learning_rate": 5.541283678627742e-06, |
|
"loss": 13.7851, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5760814249363868, |
|
"grad_norm": 3276477.3200832414, |
|
"learning_rate": 5.501328920138314e-06, |
|
"loss": 13.8194, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5781170483460559, |
|
"grad_norm": 3546838.8556743674, |
|
"learning_rate": 5.46134179731651e-06, |
|
"loss": 13.8107, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5801526717557252, |
|
"grad_norm": 2115264.4674158324, |
|
"learning_rate": 5.421324891614312e-06, |
|
"loss": 13.7984, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5821882951653944, |
|
"grad_norm": 2235578.204459906, |
|
"learning_rate": 5.3812807864063946e-06, |
|
"loss": 13.8009, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5842239185750636, |
|
"grad_norm": 4052228.371342962, |
|
"learning_rate": 5.341212066823356e-06, |
|
"loss": 13.7885, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5862595419847328, |
|
"grad_norm": 2755225.8421591343, |
|
"learning_rate": 5.3011213195848245e-06, |
|
"loss": 13.7845, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5882951653944021, |
|
"grad_norm": 3206915.4484688323, |
|
"learning_rate": 5.26101113283247e-06, |
|
"loss": 13.8352, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5903307888040712, |
|
"grad_norm": 8291421.864476618, |
|
"learning_rate": 5.220884095962924e-06, |
|
"loss": 13.7882, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5923664122137404, |
|
"grad_norm": 4520092.054349328, |
|
"learning_rate": 5.1807427994606065e-06, |
|
"loss": 13.7911, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5944020356234097, |
|
"grad_norm": 1988928.2728027685, |
|
"learning_rate": 5.140589834730503e-06, |
|
"loss": 13.8379, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5964376590330789, |
|
"grad_norm": 4465132.927934143, |
|
"learning_rate": 5.100427793930862e-06, |
|
"loss": 13.7948, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5984732824427481, |
|
"grad_norm": 2286780.168218436, |
|
"learning_rate": 5.06025926980586e-06, |
|
"loss": 13.8048, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6005089058524173, |
|
"grad_norm": 2724184.449634601, |
|
"learning_rate": 5.0200868555182155e-06, |
|
"loss": 13.7999, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6025445292620865, |
|
"grad_norm": 2576691.827404387, |
|
"learning_rate": 4.979913144481785e-06, |
|
"loss": 13.8064, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6045801526717557, |
|
"grad_norm": 2744547.316751667, |
|
"learning_rate": 4.939740730194141e-06, |
|
"loss": 13.7882, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6066157760814249, |
|
"grad_norm": 2335607.7276287796, |
|
"learning_rate": 4.899572206069138e-06, |
|
"loss": 13.8027, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6086513994910941, |
|
"grad_norm": 2599599.7384826983, |
|
"learning_rate": 4.8594101652694996e-06, |
|
"loss": 13.8156, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 3536926.667490122, |
|
"learning_rate": 4.819257200539394e-06, |
|
"loss": 13.7784, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6127226463104326, |
|
"grad_norm": 3302834.606287667, |
|
"learning_rate": 4.779115904037079e-06, |
|
"loss": 13.8263, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6147582697201018, |
|
"grad_norm": 3042014.762667294, |
|
"learning_rate": 4.738988867167531e-06, |
|
"loss": 13.7901, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.616793893129771, |
|
"grad_norm": 2828614.249275787, |
|
"learning_rate": 4.698878680415176e-06, |
|
"loss": 13.8186, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6188295165394402, |
|
"grad_norm": 3470485.7180142673, |
|
"learning_rate": 4.6587879331766465e-06, |
|
"loss": 13.8252, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6208651399491094, |
|
"grad_norm": 2142989.454887267, |
|
"learning_rate": 4.618719213593605e-06, |
|
"loss": 13.8266, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6229007633587786, |
|
"grad_norm": 3599418.075481026, |
|
"learning_rate": 4.5786751083856895e-06, |
|
"loss": 13.7994, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6249363867684479, |
|
"grad_norm": 2468894.215654323, |
|
"learning_rate": 4.53865820268349e-06, |
|
"loss": 13.7975, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6269720101781171, |
|
"grad_norm": 3514891.7530831015, |
|
"learning_rate": 4.498671079861686e-06, |
|
"loss": 13.8089, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6290076335877862, |
|
"grad_norm": 2860617.0108131613, |
|
"learning_rate": 4.4587163213722595e-06, |
|
"loss": 13.8118, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6310432569974554, |
|
"grad_norm": 3631959.0225475803, |
|
"learning_rate": 4.41879650657784e-06, |
|
"loss": 13.7789, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6330788804071247, |
|
"grad_norm": 16505260.927290315, |
|
"learning_rate": 4.3789142125852015e-06, |
|
"loss": 13.7988, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6351145038167939, |
|
"grad_norm": 2464199.3044430655, |
|
"learning_rate": 4.339072014078879e-06, |
|
"loss": 13.8382, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6371501272264631, |
|
"grad_norm": 2003547.7230704648, |
|
"learning_rate": 4.299272483154963e-06, |
|
"loss": 13.8229, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6391857506361324, |
|
"grad_norm": 2815342.948838401, |
|
"learning_rate": 4.259518189155049e-06, |
|
"loss": 13.8061, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6412213740458015, |
|
"grad_norm": 3015740.318214435, |
|
"learning_rate": 4.219811698500365e-06, |
|
"loss": 13.8036, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6432569974554707, |
|
"grad_norm": 3876924.4779627738, |
|
"learning_rate": 4.1801555745261025e-06, |
|
"loss": 13.7914, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6452926208651399, |
|
"grad_norm": 3172609.6206479096, |
|
"learning_rate": 4.140552377315918e-06, |
|
"loss": 13.8449, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6473282442748092, |
|
"grad_norm": 4902114.509930776, |
|
"learning_rate": 4.101004663536675e-06, |
|
"loss": 13.7976, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6493638676844784, |
|
"grad_norm": 2691295.2268303274, |
|
"learning_rate": 4.061514986273391e-06, |
|
"loss": 13.8136, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 3460007.077345259, |
|
"learning_rate": 4.022085894864408e-06, |
|
"loss": 13.7403, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6534351145038167, |
|
"grad_norm": 2367194.347019021, |
|
"learning_rate": 3.982719934736832e-06, |
|
"loss": 13.7971, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.655470737913486, |
|
"grad_norm": 15501881.45680894, |
|
"learning_rate": 3.943419647242189e-06, |
|
"loss": 13.8015, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6575063613231552, |
|
"grad_norm": 2840321.3616006514, |
|
"learning_rate": 3.904187569492373e-06, |
|
"loss": 13.8104, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6595419847328244, |
|
"grad_norm": 2030054.0765901625, |
|
"learning_rate": 3.865026234195863e-06, |
|
"loss": 13.7948, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6615776081424937, |
|
"grad_norm": 2852479.1215876606, |
|
"learning_rate": 3.8259381694942e-06, |
|
"loss": 13.7901, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6636132315521629, |
|
"grad_norm": 3456741.004275556, |
|
"learning_rate": 3.786925898798801e-06, |
|
"loss": 13.7857, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.665648854961832, |
|
"grad_norm": 2066561.8194525177, |
|
"learning_rate": 3.7479919406280334e-06, |
|
"loss": 13.8063, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6676844783715012, |
|
"grad_norm": 3433587.7234547967, |
|
"learning_rate": 3.709138808444641e-06, |
|
"loss": 13.8119, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6697201017811705, |
|
"grad_norm": 3578864.1748913922, |
|
"learning_rate": 3.6703690104934806e-06, |
|
"loss": 13.7908, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 4610701.366359627, |
|
"learning_rate": 3.6316850496395863e-06, |
|
"loss": 13.7847, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6737913486005089, |
|
"grad_norm": 2286947.6993929525, |
|
"learning_rate": 3.5930894232066072e-06, |
|
"loss": 13.8097, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6758269720101782, |
|
"grad_norm": 2505249.1545348107, |
|
"learning_rate": 3.5545846228155743e-06, |
|
"loss": 13.7801, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6778625954198473, |
|
"grad_norm": 5630898.8033584, |
|
"learning_rate": 3.516173134224059e-06, |
|
"loss": 13.8135, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6798982188295165, |
|
"grad_norm": 5219141.786739386, |
|
"learning_rate": 3.477857437165694e-06, |
|
"loss": 13.8255, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6819338422391857, |
|
"grad_norm": 2124438.8087317753, |
|
"learning_rate": 3.4396400051900846e-06, |
|
"loss": 13.7879, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.683969465648855, |
|
"grad_norm": 2105149.4046267755, |
|
"learning_rate": 3.401523305503139e-06, |
|
"loss": 13.808, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6860050890585242, |
|
"grad_norm": 2809609.693441461, |
|
"learning_rate": 3.3635097988077724e-06, |
|
"loss": 13.8112, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6880407124681934, |
|
"grad_norm": 2457213.1931723696, |
|
"learning_rate": 3.3256019391450696e-06, |
|
"loss": 13.8489, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6900763358778625, |
|
"grad_norm": 12558459.530425403, |
|
"learning_rate": 3.287802173735848e-06, |
|
"loss": 13.8277, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 6967431.723878883, |
|
"learning_rate": 3.250112942822673e-06, |
|
"loss": 13.8185, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.694147582697201, |
|
"grad_norm": 3411907.5422053095, |
|
"learning_rate": 3.212536679512332e-06, |
|
"loss": 13.787, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6961832061068702, |
|
"grad_norm": 2429502.145664427, |
|
"learning_rate": 3.1750758096187446e-06, |
|
"loss": 13.8105, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6982188295165395, |
|
"grad_norm": 2327547.2445986923, |
|
"learning_rate": 3.137732751506376e-06, |
|
"loss": 13.7772, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7002544529262087, |
|
"grad_norm": 3569398.8894632743, |
|
"learning_rate": 3.1005099159341044e-06, |
|
"loss": 13.7938, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7022900763358778, |
|
"grad_norm": 3211149.457873932, |
|
"learning_rate": 3.0634097058995877e-06, |
|
"loss": 13.7915, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.704325699745547, |
|
"grad_norm": 3830655.2384712566, |
|
"learning_rate": 3.0264345164841426e-06, |
|
"loss": 13.8191, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7063613231552163, |
|
"grad_norm": 3823712.125917996, |
|
"learning_rate": 2.989586734698113e-06, |
|
"loss": 13.8023, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7083969465648855, |
|
"grad_norm": 32196514.721627507, |
|
"learning_rate": 2.9528687393267865e-06, |
|
"loss": 13.7764, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7104325699745547, |
|
"grad_norm": 1882821.5554029653, |
|
"learning_rate": 2.9162829007768103e-06, |
|
"loss": 13.8351, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.712468193384224, |
|
"grad_norm": 3350119.682457626, |
|
"learning_rate": 2.879831580923176e-06, |
|
"loss": 13.7957, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7145038167938931, |
|
"grad_norm": 3937737.379258324, |
|
"learning_rate": 2.843517132956742e-06, |
|
"loss": 13.8002, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7165394402035623, |
|
"grad_norm": 3146762.068649268, |
|
"learning_rate": 2.8073419012323154e-06, |
|
"loss": 13.7632, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7185750636132315, |
|
"grad_norm": 2767069.333419986, |
|
"learning_rate": 2.771308221117309e-06, |
|
"loss": 13.7933, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7206106870229008, |
|
"grad_norm": 4373321.192999916, |
|
"learning_rate": 2.7354184188409773e-06, |
|
"loss": 13.798, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.72264631043257, |
|
"grad_norm": 3450348.839131381, |
|
"learning_rate": 2.6996748113442397e-06, |
|
"loss": 13.8177, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7246819338422392, |
|
"grad_norm": 5591120.72637471, |
|
"learning_rate": 2.66407970613011e-06, |
|
"loss": 13.7984, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7267175572519083, |
|
"grad_norm": 1703861.5304970315, |
|
"learning_rate": 2.6286354011147252e-06, |
|
"loss": 13.8147, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7287531806615776, |
|
"grad_norm": 3465138.9565325286, |
|
"learning_rate": 2.593344184479003e-06, |
|
"loss": 13.8088, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7307888040712468, |
|
"grad_norm": 4545416.100109938, |
|
"learning_rate": 2.5582083345209217e-06, |
|
"loss": 13.8249, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 3516332.5584949586, |
|
"learning_rate": 2.5232301195084395e-06, |
|
"loss": 13.8055, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7348600508905853, |
|
"grad_norm": 2312331.751163826, |
|
"learning_rate": 2.488411797533064e-06, |
|
"loss": 13.8223, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7368956743002545, |
|
"grad_norm": 2685597.665166646, |
|
"learning_rate": 2.4537556163640726e-06, |
|
"loss": 13.8293, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7389312977099237, |
|
"grad_norm": 2522967.966999191, |
|
"learning_rate": 2.4192638133034074e-06, |
|
"loss": 13.7902, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7409669211195928, |
|
"grad_norm": 2195261.840643841, |
|
"learning_rate": 2.384938615041238e-06, |
|
"loss": 13.782, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7430025445292621, |
|
"grad_norm": 6598560.135727499, |
|
"learning_rate": 2.350782237512215e-06, |
|
"loss": 13.8163, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7450381679389313, |
|
"grad_norm": 2786370.46056762, |
|
"learning_rate": 2.316796885752415e-06, |
|
"loss": 13.7821, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7470737913486005, |
|
"grad_norm": 2943271.811966078, |
|
"learning_rate": 2.2829847537569904e-06, |
|
"loss": 13.7909, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7491094147582698, |
|
"grad_norm": 2462160.442139116, |
|
"learning_rate": 2.2493480243385298e-06, |
|
"loss": 13.814, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.751145038167939, |
|
"grad_norm": 2225343.862719593, |
|
"learning_rate": 2.2158888689861434e-06, |
|
"loss": 13.8084, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7531806615776081, |
|
"grad_norm": 2960178.644234803, |
|
"learning_rate": 2.182609447725279e-06, |
|
"loss": 13.8266, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7552162849872773, |
|
"grad_norm": 2914387.1052595368, |
|
"learning_rate": 2.149511908978275e-06, |
|
"loss": 13.8007, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7572519083969466, |
|
"grad_norm": 3028649.837352563, |
|
"learning_rate": 2.1165983894256647e-06, |
|
"loss": 13.7907, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7592875318066158, |
|
"grad_norm": 2648884.5001803297, |
|
"learning_rate": 2.0838710138682412e-06, |
|
"loss": 13.8052, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.761323155216285, |
|
"grad_norm": 3498649.6090777554, |
|
"learning_rate": 2.051331895089882e-06, |
|
"loss": 13.8098, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7633587786259542, |
|
"grad_norm": 23098352.355775494, |
|
"learning_rate": 2.0189831337211573e-06, |
|
"loss": 13.8393, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7653944020356234, |
|
"grad_norm": 2461084.0552417845, |
|
"learning_rate": 1.9868268181037186e-06, |
|
"loss": 13.7867, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7674300254452926, |
|
"grad_norm": 3735300.3296117457, |
|
"learning_rate": 1.9548650241554812e-06, |
|
"loss": 13.804, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7694656488549618, |
|
"grad_norm": 3008756.34059785, |
|
"learning_rate": 1.923099815236608e-06, |
|
"loss": 13.7861, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.771501272264631, |
|
"grad_norm": 7710036.601809266, |
|
"learning_rate": 1.8915332420163074e-06, |
|
"loss": 13.7991, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 3540790.6907704584, |
|
"learning_rate": 1.8601673423404449e-06, |
|
"loss": 13.7907, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7755725190839695, |
|
"grad_norm": 2333150.5964701963, |
|
"learning_rate": 1.8290041410999893e-06, |
|
"loss": 13.8226, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7776081424936386, |
|
"grad_norm": 3965290.2964736833, |
|
"learning_rate": 1.798045650100289e-06, |
|
"loss": 13.7884, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7796437659033079, |
|
"grad_norm": 2968777.429909849, |
|
"learning_rate": 1.7672938679311957e-06, |
|
"loss": 13.7859, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7816793893129771, |
|
"grad_norm": 5317126.450461407, |
|
"learning_rate": 1.736750779838044e-06, |
|
"loss": 13.8526, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7837150127226463, |
|
"grad_norm": 3055487.063269149, |
|
"learning_rate": 1.7064183575934856e-06, |
|
"loss": 13.8009, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7857506361323155, |
|
"grad_norm": 16905669.825724535, |
|
"learning_rate": 1.676298559370202e-06, |
|
"loss": 13.7807, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7877862595419848, |
|
"grad_norm": 5137867.698262086, |
|
"learning_rate": 1.6463933296144863e-06, |
|
"loss": 13.8198, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7898218829516539, |
|
"grad_norm": 3398913.1351794973, |
|
"learning_rate": 1.6167045989207185e-06, |
|
"loss": 13.7712, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7918575063613231, |
|
"grad_norm": 3925212.2891858686, |
|
"learning_rate": 1.5872342839067305e-06, |
|
"loss": 13.807, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 2924712.3948877575, |
|
"learning_rate": 1.5579842870900746e-06, |
|
"loss": 13.8241, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7959287531806616, |
|
"grad_norm": 5390343.102311131, |
|
"learning_rate": 1.5289564967652033e-06, |
|
"loss": 13.8485, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7979643765903308, |
|
"grad_norm": 3074687.848582143, |
|
"learning_rate": 1.5001527868815702e-06, |
|
"loss": 13.8125, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3184498.416088956, |
|
"learning_rate": 1.4715750169226417e-06, |
|
"loss": 13.766, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8020356234096692, |
|
"grad_norm": 2441964.563817395, |
|
"learning_rate": 1.4432250317858675e-06, |
|
"loss": 13.7805, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8040712468193384, |
|
"grad_norm": 2091387.9994893048, |
|
"learning_rate": 1.4151046616635727e-06, |
|
"loss": 13.7952, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8061068702290076, |
|
"grad_norm": 3508560.4814663967, |
|
"learning_rate": 1.3872157219248045e-06, |
|
"loss": 13.7889, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8081424936386769, |
|
"grad_norm": 3962771.0203186534, |
|
"learning_rate": 1.3595600129981469e-06, |
|
"loss": 13.7979, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8101781170483461, |
|
"grad_norm": 2731620.334807845, |
|
"learning_rate": 1.3321393202554739e-06, |
|
"loss": 13.7976, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8122137404580153, |
|
"grad_norm": 3197496.0139092538, |
|
"learning_rate": 1.3049554138967052e-06, |
|
"loss": 13.7901, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8142493638676844, |
|
"grad_norm": 7014943.992062835, |
|
"learning_rate": 1.278010048835523e-06, |
|
"loss": 13.7998, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8162849872773537, |
|
"grad_norm": 3316247.1212317836, |
|
"learning_rate": 1.2513049645860759e-06, |
|
"loss": 13.765, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8183206106870229, |
|
"grad_norm": 2783094.2613453907, |
|
"learning_rate": 1.224841885150691e-06, |
|
"loss": 13.8445, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8203562340966921, |
|
"grad_norm": 3355566.32780499, |
|
"learning_rate": 1.1986225189085627e-06, |
|
"loss": 13.8081, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8223918575063613, |
|
"grad_norm": 2212096.023617605, |
|
"learning_rate": 1.172648558505477e-06, |
|
"loss": 13.7965, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8244274809160306, |
|
"grad_norm": 1744005.9134531072, |
|
"learning_rate": 1.1469216807445348e-06, |
|
"loss": 13.827, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8264631043256997, |
|
"grad_norm": 5478757.4739052905, |
|
"learning_rate": 1.1214435464779006e-06, |
|
"loss": 13.8068, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8284987277353689, |
|
"grad_norm": 10845370.007264948, |
|
"learning_rate": 1.0962158004995893e-06, |
|
"loss": 13.8192, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8305343511450382, |
|
"grad_norm": 6268218.580873969, |
|
"learning_rate": 1.0712400714392723e-06, |
|
"loss": 13.7939, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8325699745547074, |
|
"grad_norm": 4049672.137206165, |
|
"learning_rate": 1.0465179716571467e-06, |
|
"loss": 13.7897, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8346055979643766, |
|
"grad_norm": 5672727.971393141, |
|
"learning_rate": 1.0220510971398473e-06, |
|
"loss": 13.8234, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8366412213740458, |
|
"grad_norm": 2826950.9583422337, |
|
"learning_rate": 9.978410273974015e-07, |
|
"loss": 13.7928, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.838676844783715, |
|
"grad_norm": 2758814.4659312656, |
|
"learning_rate": 9.738893253612808e-07, |
|
"loss": 13.802, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8407124681933842, |
|
"grad_norm": 2475383.7033035085, |
|
"learning_rate": 9.50197537283481e-07, |
|
"loss": 13.7978, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8427480916030534, |
|
"grad_norm": 3247115.9631803534, |
|
"learning_rate": 9.267671926367166e-07, |
|
"loss": 13.826, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8447837150127226, |
|
"grad_norm": 2633292.367379474, |
|
"learning_rate": 9.035998040156801e-07, |
|
"loss": 13.7931, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8468193384223919, |
|
"grad_norm": 2515821.989602333, |
|
"learning_rate": 8.806968670393801e-07, |
|
"loss": 13.7718, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8488549618320611, |
|
"grad_norm": 2652660.850368756, |
|
"learning_rate": 8.580598602546109e-07, |
|
"loss": 13.8055, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8508905852417303, |
|
"grad_norm": 2999838.4744108086, |
|
"learning_rate": 8.356902450404792e-07, |
|
"loss": 13.817, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8529262086513995, |
|
"grad_norm": 2359848.6979157086, |
|
"learning_rate": 8.135894655140758e-07, |
|
"loss": 13.79, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 3156996.0591459703, |
|
"learning_rate": 7.91758948437249e-07, |
|
"loss": 13.7984, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8569974554707379, |
|
"grad_norm": 3762598.8987431657, |
|
"learning_rate": 7.702001031244816e-07, |
|
"loss": 13.8018, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8590330788804071, |
|
"grad_norm": 1976772.2357703648, |
|
"learning_rate": 7.489143213519301e-07, |
|
"loss": 13.8201, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8610687022900764, |
|
"grad_norm": 2785725.350606155, |
|
"learning_rate": 7.279029772675572e-07, |
|
"loss": 13.7565, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8631043256997456, |
|
"grad_norm": 2620438.7904132027, |
|
"learning_rate": 7.071674273024353e-07, |
|
"loss": 13.793, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8651399491094147, |
|
"grad_norm": 2646276.309564974, |
|
"learning_rate": 6.86709010083172e-07, |
|
"loss": 13.8229, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.867175572519084, |
|
"grad_norm": 2628696.346363584, |
|
"learning_rate": 6.665290463454882e-07, |
|
"loss": 13.7809, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8692111959287532, |
|
"grad_norm": 3600670.0350286104, |
|
"learning_rate": 6.466288388489689e-07, |
|
"loss": 13.8044, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8712468193384224, |
|
"grad_norm": 2947142.074939093, |
|
"learning_rate": 6.270096722929442e-07, |
|
"loss": 13.8006, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8732824427480916, |
|
"grad_norm": 2029930.0771920187, |
|
"learning_rate": 6.076728132335669e-07, |
|
"loss": 13.796, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8753180661577609, |
|
"grad_norm": 2143832.4235748462, |
|
"learning_rate": 5.886195100020408e-07, |
|
"loss": 13.7854, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.87735368956743, |
|
"grad_norm": 13943126.109787703, |
|
"learning_rate": 5.698509926240275e-07, |
|
"loss": 13.7955, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8793893129770992, |
|
"grad_norm": 3462678.627784003, |
|
"learning_rate": 5.513684727402529e-07, |
|
"loss": 13.8238, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8814249363867684, |
|
"grad_norm": 2958565.3339085556, |
|
"learning_rate": 5.331731435282705e-07, |
|
"loss": 13.8227, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8834605597964377, |
|
"grad_norm": 2315016.094193514, |
|
"learning_rate": 5.152661796254505e-07, |
|
"loss": 13.7815, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8854961832061069, |
|
"grad_norm": 3124561.491445178, |
|
"learning_rate": 4.976487370531352e-07, |
|
"loss": 13.7972, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8875318066157761, |
|
"grad_norm": 2569217.587769061, |
|
"learning_rate": 4.803219531420128e-07, |
|
"loss": 13.7759, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8895674300254452, |
|
"grad_norm": 5211670.405327593, |
|
"learning_rate": 4.6328694645870254e-07, |
|
"loss": 13.7982, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8916030534351145, |
|
"grad_norm": 2303871.1225886643, |
|
"learning_rate": 4.46544816733529e-07, |
|
"loss": 13.7919, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8936386768447837, |
|
"grad_norm": 3417955.106120249, |
|
"learning_rate": 4.3009664478954384e-07, |
|
"loss": 13.8388, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8956743002544529, |
|
"grad_norm": 3491337.261636375, |
|
"learning_rate": 4.139434924727359e-07, |
|
"loss": 13.7903, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8977099236641222, |
|
"grad_norm": 2598696.463432398, |
|
"learning_rate": 3.9808640258348686e-07, |
|
"loss": 13.7935, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.8997455470737914, |
|
"grad_norm": 2406731.2012268724, |
|
"learning_rate": 3.825263988092587e-07, |
|
"loss": 13.8001, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9017811704834605, |
|
"grad_norm": 1850343.550069675, |
|
"learning_rate": 3.672644856584928e-07, |
|
"loss": 13.8345, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9038167938931297, |
|
"grad_norm": 2877460.3830578206, |
|
"learning_rate": 3.523016483957742e-07, |
|
"loss": 13.8395, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.905852417302799, |
|
"grad_norm": 3386724.5492421445, |
|
"learning_rate": 3.3763885297822153e-07, |
|
"loss": 13.8028, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9078880407124682, |
|
"grad_norm": 2849485.569450586, |
|
"learning_rate": 3.2327704599312283e-07, |
|
"loss": 13.8038, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9099236641221374, |
|
"grad_norm": 3175459.533705238, |
|
"learning_rate": 3.0921715459683753e-07, |
|
"loss": 13.8303, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9119592875318067, |
|
"grad_norm": 22724636.64948581, |
|
"learning_rate": 2.95460086454929e-07, |
|
"loss": 13.772, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9139949109414758, |
|
"grad_norm": 2467166.6966894856, |
|
"learning_rate": 2.820067296835799e-07, |
|
"loss": 13.7719, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 3412055.676564019, |
|
"learning_rate": 2.688579527922514e-07, |
|
"loss": 13.7956, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9180661577608142, |
|
"grad_norm": 2620791.7542605917, |
|
"learning_rate": 2.560146046276135e-07, |
|
"loss": 13.7717, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9201017811704835, |
|
"grad_norm": 2077929.3418459287, |
|
"learning_rate": 2.4347751431875453e-07, |
|
"loss": 13.7763, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9221374045801527, |
|
"grad_norm": 2370550.2619939703, |
|
"learning_rate": 2.3124749122364286e-07, |
|
"loss": 13.7964, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9241730279898219, |
|
"grad_norm": 5750923.606982284, |
|
"learning_rate": 2.1932532487688784e-07, |
|
"loss": 13.791, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.926208651399491, |
|
"grad_norm": 3460175.4976370293, |
|
"learning_rate": 2.0771178493876387e-07, |
|
"loss": 13.79, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9282442748091603, |
|
"grad_norm": 5901311.706134819, |
|
"learning_rate": 1.964076211455246e-07, |
|
"loss": 13.7797, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9302798982188295, |
|
"grad_norm": 5105079.708966371, |
|
"learning_rate": 1.8541356326100436e-07, |
|
"loss": 13.7849, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9323155216284987, |
|
"grad_norm": 4198563.584774799, |
|
"learning_rate": 1.7473032102949983e-07, |
|
"loss": 13.8415, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.934351145038168, |
|
"grad_norm": 3844225.7229981595, |
|
"learning_rate": 1.6435858412996275e-07, |
|
"loss": 13.766, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9363867684478372, |
|
"grad_norm": 3854801.945640343, |
|
"learning_rate": 1.542990221314644e-07, |
|
"loss": 13.7996, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9384223918575063, |
|
"grad_norm": 4666453.104767151, |
|
"learning_rate": 1.445522844499775e-07, |
|
"loss": 13.8, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9404580152671755, |
|
"grad_norm": 5472437.376052735, |
|
"learning_rate": 1.3511900030644954e-07, |
|
"loss": 13.7599, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9424936386768448, |
|
"grad_norm": 4682435.578567764, |
|
"learning_rate": 1.2599977868618052e-07, |
|
"loss": 13.7934, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.944529262086514, |
|
"grad_norm": 9272421.276272465, |
|
"learning_rate": 1.1719520829951203e-07, |
|
"loss": 13.8146, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9465648854961832, |
|
"grad_norm": 4195416.415355079, |
|
"learning_rate": 1.087058575438199e-07, |
|
"loss": 13.8253, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9486005089058525, |
|
"grad_norm": 3012392.88106662, |
|
"learning_rate": 1.0053227446681912e-07, |
|
"loss": 13.8254, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9506361323155216, |
|
"grad_norm": 4716265.36093074, |
|
"learning_rate": 9.267498673118547e-08, |
|
"loss": 13.8287, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9526717557251908, |
|
"grad_norm": 3046833.346747879, |
|
"learning_rate": 8.513450158049109e-08, |
|
"loss": 13.7954, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.95470737913486, |
|
"grad_norm": 3919156.6955382572, |
|
"learning_rate": 7.791130580645623e-08, |
|
"loss": 13.7727, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9567430025445293, |
|
"grad_norm": 2642703.3691420523, |
|
"learning_rate": 7.100586571752444e-08, |
|
"loss": 13.8211, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9587786259541985, |
|
"grad_norm": 2860021.0276084486, |
|
"learning_rate": 6.441862710876102e-08, |
|
"loss": 13.7861, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9608142493638677, |
|
"grad_norm": 2389117.180384712, |
|
"learning_rate": 5.815001523307162e-08, |
|
"loss": 13.7998, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9628498727735368, |
|
"grad_norm": 4343380.201960487, |
|
"learning_rate": 5.220043477374759e-08, |
|
"loss": 13.7891, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9648854961832061, |
|
"grad_norm": 5212705.489252775, |
|
"learning_rate": 4.657026981834623e-08, |
|
"loss": 13.8108, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9669211195928753, |
|
"grad_norm": 2520458.4461040264, |
|
"learning_rate": 4.125988383388957e-08, |
|
"loss": 13.8067, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9689567430025445, |
|
"grad_norm": 4182021.7452560714, |
|
"learning_rate": 3.626961964340203e-08, |
|
"loss": 13.7897, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.9709923664122138, |
|
"grad_norm": 10159149.969018588, |
|
"learning_rate": 3.159979940378088e-08, |
|
"loss": 13.8046, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.973027989821883, |
|
"grad_norm": 2510949.593741673, |
|
"learning_rate": 2.725072458499567e-08, |
|
"loss": 13.8275, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9750636132315522, |
|
"grad_norm": 2900958.891211133, |
|
"learning_rate": 2.3222675950627106e-08, |
|
"loss": 13.7972, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 3768468.1026937226, |
|
"learning_rate": 1.9515913539743247e-08, |
|
"loss": 13.8208, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9791348600508906, |
|
"grad_norm": 2563277.62892743, |
|
"learning_rate": 1.613067665010959e-08, |
|
"loss": 13.8265, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9811704834605598, |
|
"grad_norm": 23583570.860715404, |
|
"learning_rate": 1.3067183822742525e-08, |
|
"loss": 13.8262, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.983206106870229, |
|
"grad_norm": 8371515.456262343, |
|
"learning_rate": 1.0325632827801745e-08, |
|
"loss": 13.8356, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9852417302798983, |
|
"grad_norm": 4260314.378446997, |
|
"learning_rate": 7.906200651819907e-09, |
|
"loss": 13.8054, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9872773536895675, |
|
"grad_norm": 2235986.274852917, |
|
"learning_rate": 5.809043486279531e-09, |
|
"loss": 13.7764, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9893129770992366, |
|
"grad_norm": 2261687.826321509, |
|
"learning_rate": 4.034296717527752e-09, |
|
"loss": 13.8129, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9913486005089058, |
|
"grad_norm": 5923746.692619356, |
|
"learning_rate": 2.5820749180388573e-09, |
|
"loss": 13.843, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9933842239185751, |
|
"grad_norm": 2729193.3221547497, |
|
"learning_rate": 1.4524718390140913e-09, |
|
"loss": 13.8395, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9954198473282443, |
|
"grad_norm": 2703798.169864237, |
|
"learning_rate": 6.455604043331676e-10, |
|
"loss": 13.8165, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.9974554707379135, |
|
"grad_norm": 3350867.6861675973, |
|
"learning_rate": 1.6139270584358823e-10, |
|
"loss": 13.8103, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9994910941475827, |
|
"grad_norm": 1851326.2112600075, |
|
"learning_rate": 0.0, |
|
"loss": 13.8163, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9994910941475827, |
|
"eval_loss": 13.805337905883789, |
|
"eval_runtime": 382.78, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.572, |
|
"step": 491 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 491, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 100344791040000.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|