|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 346, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005780346820809248, |
|
"grad_norm": 12.858534195216157, |
|
"learning_rate": 1.111111111111111e-07, |
|
"loss": 1.0276, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011560693641618497, |
|
"grad_norm": 12.232356830857812, |
|
"learning_rate": 2.222222222222222e-07, |
|
"loss": 1.0802, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.017341040462427744, |
|
"grad_norm": 12.065559434019754, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 1.0239, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.023121387283236993, |
|
"grad_norm": 11.287800396105506, |
|
"learning_rate": 4.444444444444444e-07, |
|
"loss": 1.0354, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.028901734104046242, |
|
"grad_norm": 12.118961503435317, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 1.0642, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03468208092485549, |
|
"grad_norm": 10.614304274386905, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 1.0397, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04046242774566474, |
|
"grad_norm": 10.029891779857477, |
|
"learning_rate": 7.777777777777778e-07, |
|
"loss": 1.0264, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.046242774566473986, |
|
"grad_norm": 8.67060643578771, |
|
"learning_rate": 8.888888888888888e-07, |
|
"loss": 0.9512, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05202312138728324, |
|
"grad_norm": 7.757378710626247, |
|
"learning_rate": 1e-06, |
|
"loss": 0.9608, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.057803468208092484, |
|
"grad_norm": 7.142309385955806, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.9715, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06358381502890173, |
|
"grad_norm": 6.115644634164, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.9309, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06936416184971098, |
|
"grad_norm": 6.526598892938256, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": 0.9119, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07514450867052024, |
|
"grad_norm": 5.701678407172686, |
|
"learning_rate": 1.4444444444444443e-06, |
|
"loss": 0.9509, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08092485549132948, |
|
"grad_norm": 4.636147369080921, |
|
"learning_rate": 1.5555555555555556e-06, |
|
"loss": 0.9168, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08670520231213873, |
|
"grad_norm": 4.795029784944483, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.8854, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09248554913294797, |
|
"grad_norm": 4.272500187460561, |
|
"learning_rate": 1.7777777777777775e-06, |
|
"loss": 0.8752, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09826589595375723, |
|
"grad_norm": 4.381820648769374, |
|
"learning_rate": 1.8888888888888888e-06, |
|
"loss": 0.8719, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.10404624277456648, |
|
"grad_norm": 4.2844649887933635, |
|
"learning_rate": 2e-06, |
|
"loss": 0.8252, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10982658959537572, |
|
"grad_norm": 4.672757325891935, |
|
"learning_rate": 1.9999541310559686e-06, |
|
"loss": 0.7784, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11560693641618497, |
|
"grad_norm": 4.140790454113861, |
|
"learning_rate": 1.9998165284317942e-06, |
|
"loss": 0.7805, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12138728323699421, |
|
"grad_norm": 3.806744515631364, |
|
"learning_rate": 1.999587204750851e-06, |
|
"loss": 0.775, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12716763005780346, |
|
"grad_norm": 4.3316680977985635, |
|
"learning_rate": 1.99926618105081e-06, |
|
"loss": 0.757, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1329479768786127, |
|
"grad_norm": 6.658906531282003, |
|
"learning_rate": 1.9988534867817065e-06, |
|
"loss": 0.7762, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13872832369942195, |
|
"grad_norm": 3.2840929759235498, |
|
"learning_rate": 1.998349159803241e-06, |
|
"loss": 0.7427, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.14450867052023122, |
|
"grad_norm": 3.1908462214509865, |
|
"learning_rate": 1.9977532463813065e-06, |
|
"loss": 0.7354, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15028901734104047, |
|
"grad_norm": 3.3673488290162124, |
|
"learning_rate": 1.9970658011837403e-06, |
|
"loss": 0.6922, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.15606936416184972, |
|
"grad_norm": 3.682299563499068, |
|
"learning_rate": 1.9962868872753143e-06, |
|
"loss": 0.7066, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16184971098265896, |
|
"grad_norm": 3.039028308153685, |
|
"learning_rate": 1.9954165761119447e-06, |
|
"loss": 0.6644, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1676300578034682, |
|
"grad_norm": 3.0874127013971147, |
|
"learning_rate": 1.99445494753414e-06, |
|
"loss": 0.6828, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.17341040462427745, |
|
"grad_norm": 3.0558701865961884, |
|
"learning_rate": 1.9934020897596747e-06, |
|
"loss": 0.6854, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1791907514450867, |
|
"grad_norm": 2.6355155421023206, |
|
"learning_rate": 1.992258099375498e-06, |
|
"loss": 0.7024, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.18497109826589594, |
|
"grad_norm": 2.671560023832056, |
|
"learning_rate": 1.991023081328871e-06, |
|
"loss": 0.6726, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1907514450867052, |
|
"grad_norm": 2.5109346773716648, |
|
"learning_rate": 1.9896971489177416e-06, |
|
"loss": 0.6953, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19653179190751446, |
|
"grad_norm": 2.671094772757736, |
|
"learning_rate": 1.9882804237803485e-06, |
|
"loss": 0.6749, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2023121387283237, |
|
"grad_norm": 2.55280503478005, |
|
"learning_rate": 1.986773035884064e-06, |
|
"loss": 0.6313, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.20809248554913296, |
|
"grad_norm": 2.997693445850087, |
|
"learning_rate": 1.98517512351347e-06, |
|
"loss": 0.6912, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2138728323699422, |
|
"grad_norm": 2.594837372299819, |
|
"learning_rate": 1.9834868332576726e-06, |
|
"loss": 0.7095, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.21965317919075145, |
|
"grad_norm": 2.248796606760258, |
|
"learning_rate": 1.981708319996855e-06, |
|
"loss": 0.6278, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2254335260115607, |
|
"grad_norm": 2.4139138839205456, |
|
"learning_rate": 1.9798397468880667e-06, |
|
"loss": 0.6601, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.23121387283236994, |
|
"grad_norm": 5.055501773819069, |
|
"learning_rate": 1.977881285350259e-06, |
|
"loss": 0.6615, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23699421965317918, |
|
"grad_norm": 2.3632268984701055, |
|
"learning_rate": 1.975833115048557e-06, |
|
"loss": 0.6483, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.24277456647398843, |
|
"grad_norm": 2.830666537018327, |
|
"learning_rate": 1.973695423877779e-06, |
|
"loss": 0.6755, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.24855491329479767, |
|
"grad_norm": 5.0327630976034765, |
|
"learning_rate": 1.9714684079451977e-06, |
|
"loss": 0.6932, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2543352601156069, |
|
"grad_norm": 2.5324019369470454, |
|
"learning_rate": 1.9691522715525517e-06, |
|
"loss": 0.6662, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.26011560693641617, |
|
"grad_norm": 4.5913801929017515, |
|
"learning_rate": 1.9667472271773023e-06, |
|
"loss": 0.642, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2658959537572254, |
|
"grad_norm": 2.5796270825002288, |
|
"learning_rate": 1.964253495453141e-06, |
|
"loss": 0.6501, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.27167630057803466, |
|
"grad_norm": 3.180271404782544, |
|
"learning_rate": 1.9616713051497493e-06, |
|
"loss": 0.6556, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2774566473988439, |
|
"grad_norm": 2.4116071032049335, |
|
"learning_rate": 1.959000893151813e-06, |
|
"loss": 0.6506, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2832369942196532, |
|
"grad_norm": 2.483272208248862, |
|
"learning_rate": 1.9562425044372884e-06, |
|
"loss": 0.6329, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.28901734104046245, |
|
"grad_norm": 2.3308296183451187, |
|
"learning_rate": 1.9533963920549303e-06, |
|
"loss": 0.6388, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2947976878612717, |
|
"grad_norm": 2.622711013779675, |
|
"learning_rate": 1.950462817101079e-06, |
|
"loss": 0.6474, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.30057803468208094, |
|
"grad_norm": 2.1446274232932554, |
|
"learning_rate": 1.947442048695704e-06, |
|
"loss": 0.6592, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3063583815028902, |
|
"grad_norm": 3.3168218759230275, |
|
"learning_rate": 1.9443343639577202e-06, |
|
"loss": 0.6504, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.31213872832369943, |
|
"grad_norm": 2.256777697180995, |
|
"learning_rate": 1.9411400479795615e-06, |
|
"loss": 0.6297, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3179190751445087, |
|
"grad_norm": 2.1765404003111746, |
|
"learning_rate": 1.93785939380103e-06, |
|
"loss": 0.6336, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3236994219653179, |
|
"grad_norm": 2.2217137619742906, |
|
"learning_rate": 1.934492702382411e-06, |
|
"loss": 0.6076, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.32947976878612717, |
|
"grad_norm": 5.281605039359051, |
|
"learning_rate": 1.931040282576865e-06, |
|
"loss": 0.6413, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3352601156069364, |
|
"grad_norm": 2.681122616975899, |
|
"learning_rate": 1.927502451102095e-06, |
|
"loss": 0.6327, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.34104046242774566, |
|
"grad_norm": 2.3609007898823515, |
|
"learning_rate": 1.9238795325112867e-06, |
|
"loss": 0.6184, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3468208092485549, |
|
"grad_norm": 2.6838363995230226, |
|
"learning_rate": 1.9201718591633418e-06, |
|
"loss": 0.6032, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35260115606936415, |
|
"grad_norm": 2.4540324898863677, |
|
"learning_rate": 1.9163797711923823e-06, |
|
"loss": 0.5896, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3583815028901734, |
|
"grad_norm": 2.298112606896863, |
|
"learning_rate": 1.91250361647655e-06, |
|
"loss": 0.6412, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.36416184971098264, |
|
"grad_norm": 2.2321684695854094, |
|
"learning_rate": 1.9085437506060924e-06, |
|
"loss": 0.5847, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3699421965317919, |
|
"grad_norm": 2.1730547121243453, |
|
"learning_rate": 1.9045005368507417e-06, |
|
"loss": 0.6202, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.37572254335260113, |
|
"grad_norm": 2.648301208745142, |
|
"learning_rate": 1.9003743461263883e-06, |
|
"loss": 0.6246, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3815028901734104, |
|
"grad_norm": 2.6270984399658377, |
|
"learning_rate": 1.8961655569610556e-06, |
|
"loss": 0.6336, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3872832369942196, |
|
"grad_norm": 2.10356408342345, |
|
"learning_rate": 1.8918745554601724e-06, |
|
"loss": 0.6125, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3930635838150289, |
|
"grad_norm": 2.0797656753425504, |
|
"learning_rate": 1.8875017352711545e-06, |
|
"loss": 0.6071, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3988439306358382, |
|
"grad_norm": 2.3796052834987647, |
|
"learning_rate": 1.8830474975472903e-06, |
|
"loss": 0.6139, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4046242774566474, |
|
"grad_norm": 2.195962808705189, |
|
"learning_rate": 1.8785122509109423e-06, |
|
"loss": 0.6376, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.41040462427745666, |
|
"grad_norm": 3.372968127914077, |
|
"learning_rate": 1.8738964114160583e-06, |
|
"loss": 0.6012, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4161849710982659, |
|
"grad_norm": 2.4690022952436745, |
|
"learning_rate": 1.8692004025100051e-06, |
|
"loss": 0.6102, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.42196531791907516, |
|
"grad_norm": 2.9541703010934683, |
|
"learning_rate": 1.8644246549947224e-06, |
|
"loss": 0.5802, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4277456647398844, |
|
"grad_norm": 2.219684411723982, |
|
"learning_rate": 1.859569606987201e-06, |
|
"loss": 0.6019, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.43352601156069365, |
|
"grad_norm": 3.2503821713287238, |
|
"learning_rate": 1.8546357038792918e-06, |
|
"loss": 0.6044, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4393063583815029, |
|
"grad_norm": 2.3126944705292267, |
|
"learning_rate": 1.8496233982968455e-06, |
|
"loss": 0.6194, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.44508670520231214, |
|
"grad_norm": 2.2966676588642807, |
|
"learning_rate": 1.8445331500581904e-06, |
|
"loss": 0.6529, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4508670520231214, |
|
"grad_norm": 2.2129730164894132, |
|
"learning_rate": 1.83936542613195e-06, |
|
"loss": 0.6032, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.45664739884393063, |
|
"grad_norm": 2.4969392606595764, |
|
"learning_rate": 1.8341207005942032e-06, |
|
"loss": 0.6165, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4624277456647399, |
|
"grad_norm": 3.5760934505103146, |
|
"learning_rate": 1.8287994545849945e-06, |
|
"loss": 0.6135, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4682080924855491, |
|
"grad_norm": 2.5392579698029802, |
|
"learning_rate": 1.8234021762641945e-06, |
|
"loss": 0.6132, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.47398843930635837, |
|
"grad_norm": 2.3783180990515156, |
|
"learning_rate": 1.8179293607667177e-06, |
|
"loss": 0.6026, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4797687861271676, |
|
"grad_norm": 2.3017213740362674, |
|
"learning_rate": 1.8123815101570995e-06, |
|
"loss": 0.6068, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.48554913294797686, |
|
"grad_norm": 3.136830371172065, |
|
"learning_rate": 1.806759133383438e-06, |
|
"loss": 0.6198, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4913294797687861, |
|
"grad_norm": 2.1335031297446356, |
|
"learning_rate": 1.8010627462307046e-06, |
|
"loss": 0.6317, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.49710982658959535, |
|
"grad_norm": 2.375395675829993, |
|
"learning_rate": 1.7952928712734265e-06, |
|
"loss": 0.5952, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5028901734104047, |
|
"grad_norm": 2.338668259539757, |
|
"learning_rate": 1.789450037827746e-06, |
|
"loss": 0.6005, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5086705202312138, |
|
"grad_norm": 2.0920820695913602, |
|
"learning_rate": 1.783534781902864e-06, |
|
"loss": 0.6108, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5144508670520231, |
|
"grad_norm": 2.6397590095844516, |
|
"learning_rate": 1.7775476461518666e-06, |
|
"loss": 0.597, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5202312138728323, |
|
"grad_norm": 2.2731273093251465, |
|
"learning_rate": 1.771489179821943e-06, |
|
"loss": 0.6205, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5260115606936416, |
|
"grad_norm": 2.8578178406983854, |
|
"learning_rate": 1.765359938703999e-06, |
|
"loss": 0.624, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5317919075144508, |
|
"grad_norm": 2.1683178371524385, |
|
"learning_rate": 1.7591604850816704e-06, |
|
"loss": 0.6008, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5375722543352601, |
|
"grad_norm": 2.0931424578479216, |
|
"learning_rate": 1.7528913876797397e-06, |
|
"loss": 0.5781, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5433526011560693, |
|
"grad_norm": 2.046087261006992, |
|
"learning_rate": 1.7465532216119624e-06, |
|
"loss": 0.5942, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5491329479768786, |
|
"grad_norm": 2.0926363828893666, |
|
"learning_rate": 1.740146568328308e-06, |
|
"loss": 0.5845, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5549132947976878, |
|
"grad_norm": 2.2826438145157253, |
|
"learning_rate": 1.7336720155616185e-06, |
|
"loss": 0.5771, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5606936416184971, |
|
"grad_norm": 2.1729270459605075, |
|
"learning_rate": 1.7271301572736903e-06, |
|
"loss": 0.6072, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5664739884393064, |
|
"grad_norm": 2.1958221501396804, |
|
"learning_rate": 1.7205215936007869e-06, |
|
"loss": 0.5795, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5722543352601156, |
|
"grad_norm": 2.474781726279538, |
|
"learning_rate": 1.713846930798583e-06, |
|
"loss": 0.5904, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5780346820809249, |
|
"grad_norm": 2.7388227639736753, |
|
"learning_rate": 1.7071067811865474e-06, |
|
"loss": 0.5781, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5838150289017341, |
|
"grad_norm": 2.0078080924277963, |
|
"learning_rate": 1.700301763091771e-06, |
|
"loss": 0.6035, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5895953757225434, |
|
"grad_norm": 2.057681142337808, |
|
"learning_rate": 1.6934325007922417e-06, |
|
"loss": 0.5993, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5953757225433526, |
|
"grad_norm": 2.129426765761644, |
|
"learning_rate": 1.6864996244595755e-06, |
|
"loss": 0.556, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6011560693641619, |
|
"grad_norm": 2.135875027874883, |
|
"learning_rate": 1.6795037701012055e-06, |
|
"loss": 0.6003, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6069364161849711, |
|
"grad_norm": 2.254631204567563, |
|
"learning_rate": 1.6724455795020357e-06, |
|
"loss": 0.5819, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6127167630057804, |
|
"grad_norm": 2.369367886126561, |
|
"learning_rate": 1.665325700165565e-06, |
|
"loss": 0.5825, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6184971098265896, |
|
"grad_norm": 3.8018094119209382, |
|
"learning_rate": 1.6581447852544877e-06, |
|
"loss": 0.5584, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6242774566473989, |
|
"grad_norm": 2.2913674882572663, |
|
"learning_rate": 1.6509034935307714e-06, |
|
"loss": 0.6143, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.630057803468208, |
|
"grad_norm": 3.297808364709142, |
|
"learning_rate": 1.6436024892952253e-06, |
|
"loss": 0.567, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6358381502890174, |
|
"grad_norm": 2.1684588873033563, |
|
"learning_rate": 1.6362424423265597e-06, |
|
"loss": 0.5895, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6416184971098265, |
|
"grad_norm": 2.1187621296515484, |
|
"learning_rate": 1.6288240278199393e-06, |
|
"loss": 0.5775, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6473988439306358, |
|
"grad_norm": 2.0285848144538057, |
|
"learning_rate": 1.6213479263250432e-06, |
|
"loss": 0.5953, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.653179190751445, |
|
"grad_norm": 2.3252936492887484, |
|
"learning_rate": 1.6138148236836337e-06, |
|
"loss": 0.5773, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6589595375722543, |
|
"grad_norm": 3.325919273645213, |
|
"learning_rate": 1.606225410966638e-06, |
|
"loss": 0.5909, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6647398843930635, |
|
"grad_norm": 2.269916885150113, |
|
"learning_rate": 1.5985803844107502e-06, |
|
"loss": 0.5843, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6705202312138728, |
|
"grad_norm": 2.3232673917342397, |
|
"learning_rate": 1.5908804453545606e-06, |
|
"loss": 0.5978, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6763005780346821, |
|
"grad_norm": 3.4581168744539714, |
|
"learning_rate": 1.5831263001742165e-06, |
|
"loss": 0.5804, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6820809248554913, |
|
"grad_norm": 2.0671980165005728, |
|
"learning_rate": 1.5753186602186206e-06, |
|
"loss": 0.5725, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6878612716763006, |
|
"grad_norm": 2.3838726654032674, |
|
"learning_rate": 1.5674582417441731e-06, |
|
"loss": 0.5808, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6936416184971098, |
|
"grad_norm": 2.2314237741431664, |
|
"learning_rate": 1.559545765849064e-06, |
|
"loss": 0.5965, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6994219653179191, |
|
"grad_norm": 2.846135318582479, |
|
"learning_rate": 1.5515819584071214e-06, |
|
"loss": 0.5988, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7052023121387283, |
|
"grad_norm": 2.0012935495630853, |
|
"learning_rate": 1.5435675500012212e-06, |
|
"loss": 0.5819, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7109826589595376, |
|
"grad_norm": 2.217041365110391, |
|
"learning_rate": 1.535503275856264e-06, |
|
"loss": 0.5976, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7167630057803468, |
|
"grad_norm": 2.032603573737414, |
|
"learning_rate": 1.5273898757717292e-06, |
|
"loss": 0.6074, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7225433526011561, |
|
"grad_norm": 2.3350139620743864, |
|
"learning_rate": 1.5192280940538055e-06, |
|
"loss": 0.5844, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7283236994219653, |
|
"grad_norm": 2.714512688497629, |
|
"learning_rate": 1.5110186794471103e-06, |
|
"loss": 0.5884, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7341040462427746, |
|
"grad_norm": 2.6037265515423984, |
|
"learning_rate": 1.502762385066002e-06, |
|
"loss": 0.5895, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7398843930635838, |
|
"grad_norm": 2.2313177421020667, |
|
"learning_rate": 1.49445996832549e-06, |
|
"loss": 0.5997, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7456647398843931, |
|
"grad_norm": 1.964807059869446, |
|
"learning_rate": 1.4861121908717526e-06, |
|
"loss": 0.5605, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7514450867052023, |
|
"grad_norm": 2.3890635979269703, |
|
"learning_rate": 1.4777198185122628e-06, |
|
"loss": 0.5816, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7572254335260116, |
|
"grad_norm": 2.3155649540352687, |
|
"learning_rate": 1.469283621145537e-06, |
|
"loss": 0.5742, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7630057803468208, |
|
"grad_norm": 2.224388579727507, |
|
"learning_rate": 1.4608043726905049e-06, |
|
"loss": 0.6063, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7687861271676301, |
|
"grad_norm": 2.049353631849029, |
|
"learning_rate": 1.4522828510155121e-06, |
|
"loss": 0.588, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7745664739884393, |
|
"grad_norm": 2.3198548514734147, |
|
"learning_rate": 1.4437198378669597e-06, |
|
"loss": 0.5607, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7803468208092486, |
|
"grad_norm": 2.0249418082784727, |
|
"learning_rate": 1.4351161187975902e-06, |
|
"loss": 0.5691, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7861271676300579, |
|
"grad_norm": 2.3694044016660745, |
|
"learning_rate": 1.4264724830944197e-06, |
|
"loss": 0.5925, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.791907514450867, |
|
"grad_norm": 2.249961753553928, |
|
"learning_rate": 1.4177897237063335e-06, |
|
"loss": 0.5661, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7976878612716763, |
|
"grad_norm": 2.4659041973808735, |
|
"learning_rate": 1.40906863717134e-06, |
|
"loss": 0.5408, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8034682080924855, |
|
"grad_norm": 2.0311840956249068, |
|
"learning_rate": 1.4003100235434998e-06, |
|
"loss": 0.5382, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.8092485549132948, |
|
"grad_norm": 2.176204821196535, |
|
"learning_rate": 1.391514686319529e-06, |
|
"loss": 0.5819, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.815028901734104, |
|
"grad_norm": 2.5319891016136324, |
|
"learning_rate": 1.3826834323650898e-06, |
|
"loss": 0.5612, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8208092485549133, |
|
"grad_norm": 2.2126657534234933, |
|
"learning_rate": 1.3738170718407686e-06, |
|
"loss": 0.561, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8265895953757225, |
|
"grad_norm": 2.039076902248122, |
|
"learning_rate": 1.3649164181277553e-06, |
|
"loss": 0.5701, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8323699421965318, |
|
"grad_norm": 1.967704448699255, |
|
"learning_rate": 1.3559822877532232e-06, |
|
"loss": 0.5919, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.838150289017341, |
|
"grad_norm": 2.3220611845701677, |
|
"learning_rate": 1.3470155003154248e-06, |
|
"loss": 0.5737, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8439306358381503, |
|
"grad_norm": 2.0704795325160252, |
|
"learning_rate": 1.3380168784085026e-06, |
|
"loss": 0.5594, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8497109826589595, |
|
"grad_norm": 2.1453836439669374, |
|
"learning_rate": 1.3289872475470256e-06, |
|
"loss": 0.5531, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8554913294797688, |
|
"grad_norm": 2.2895757955477274, |
|
"learning_rate": 1.3199274360902588e-06, |
|
"loss": 0.6007, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.861271676300578, |
|
"grad_norm": 5.3794532392535634, |
|
"learning_rate": 1.310838275166172e-06, |
|
"loss": 0.5744, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.8670520231213873, |
|
"grad_norm": 2.3811778937325054, |
|
"learning_rate": 1.3017205985951924e-06, |
|
"loss": 0.5676, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8728323699421965, |
|
"grad_norm": 2.106851655179906, |
|
"learning_rate": 1.2925752428137125e-06, |
|
"loss": 0.6148, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8786127167630058, |
|
"grad_norm": 2.0537876367376664, |
|
"learning_rate": 1.2834030467973571e-06, |
|
"loss": 0.5762, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.884393063583815, |
|
"grad_norm": 2.2071876047592727, |
|
"learning_rate": 1.274204851984018e-06, |
|
"loss": 0.5758, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8901734104046243, |
|
"grad_norm": 2.13960824519687, |
|
"learning_rate": 1.264981502196662e-06, |
|
"loss": 0.5445, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8959537572254336, |
|
"grad_norm": 2.4673552744509157, |
|
"learning_rate": 1.255733843565918e-06, |
|
"loss": 0.6071, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9017341040462428, |
|
"grad_norm": 1.9871480766650433, |
|
"learning_rate": 1.2464627244524593e-06, |
|
"loss": 0.576, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.9075144508670521, |
|
"grad_norm": 2.19749359656742, |
|
"learning_rate": 1.237168995369173e-06, |
|
"loss": 0.5799, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.9132947976878613, |
|
"grad_norm": 2.1193685610704476, |
|
"learning_rate": 1.2278535089031377e-06, |
|
"loss": 0.5879, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9190751445086706, |
|
"grad_norm": 2.1184302830201744, |
|
"learning_rate": 1.2185171196374078e-06, |
|
"loss": 0.5372, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9248554913294798, |
|
"grad_norm": 2.0407022944958033, |
|
"learning_rate": 1.2091606840726167e-06, |
|
"loss": 0.585, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.930635838150289, |
|
"grad_norm": 2.168905043199937, |
|
"learning_rate": 1.1997850605484032e-06, |
|
"loss": 0.5604, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9364161849710982, |
|
"grad_norm": 2.138142429910041, |
|
"learning_rate": 1.1903911091646684e-06, |
|
"loss": 0.593, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9421965317919075, |
|
"grad_norm": 2.4892163650092516, |
|
"learning_rate": 1.1809796917026728e-06, |
|
"loss": 0.6056, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.9479768786127167, |
|
"grad_norm": 1.9576416066436553, |
|
"learning_rate": 1.1715516715459784e-06, |
|
"loss": 0.564, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.953757225433526, |
|
"grad_norm": 2.046605869264703, |
|
"learning_rate": 1.1621079136012425e-06, |
|
"loss": 0.5769, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9595375722543352, |
|
"grad_norm": 2.094005375428543, |
|
"learning_rate": 1.1526492842188744e-06, |
|
"loss": 0.5847, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.9653179190751445, |
|
"grad_norm": 2.070280693183011, |
|
"learning_rate": 1.143176651113558e-06, |
|
"loss": 0.5564, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9710982658959537, |
|
"grad_norm": 2.2148708162128212, |
|
"learning_rate": 1.1336908832846483e-06, |
|
"loss": 0.5263, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.976878612716763, |
|
"grad_norm": 2.1683895469622496, |
|
"learning_rate": 1.124192850936453e-06, |
|
"loss": 0.5375, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9826589595375722, |
|
"grad_norm": 2.0595174642305216, |
|
"learning_rate": 1.1146834253984005e-06, |
|
"loss": 0.5859, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9884393063583815, |
|
"grad_norm": 2.4501932894172653, |
|
"learning_rate": 1.1051634790451058e-06, |
|
"loss": 0.5525, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9942196531791907, |
|
"grad_norm": 2.073182251777239, |
|
"learning_rate": 1.0956338852163423e-06, |
|
"loss": 0.5797, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.1339124056307996, |
|
"learning_rate": 1.0860955181369217e-06, |
|
"loss": 0.5445, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.0057803468208093, |
|
"grad_norm": 1.889514155050403, |
|
"learning_rate": 1.076549252836496e-06, |
|
"loss": 0.4816, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.0115606936416186, |
|
"grad_norm": 1.9256918662534441, |
|
"learning_rate": 1.0669959650692818e-06, |
|
"loss": 0.5073, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0173410404624277, |
|
"grad_norm": 1.9262060377414805, |
|
"learning_rate": 1.0574365312337234e-06, |
|
"loss": 0.5085, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.023121387283237, |
|
"grad_norm": 2.123630125385134, |
|
"learning_rate": 1.047871828292092e-06, |
|
"loss": 0.4866, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0289017341040463, |
|
"grad_norm": 2.188774815332412, |
|
"learning_rate": 1.0383027336900353e-06, |
|
"loss": 0.511, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0346820809248556, |
|
"grad_norm": 2.0846346019353064, |
|
"learning_rate": 1.028730125276083e-06, |
|
"loss": 0.4731, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.0404624277456647, |
|
"grad_norm": 2.0139582535739042, |
|
"learning_rate": 1.0191548812211142e-06, |
|
"loss": 0.4332, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.046242774566474, |
|
"grad_norm": 2.2271331676676516, |
|
"learning_rate": 1.0095778799377959e-06, |
|
"loss": 0.4548, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.0520231213872833, |
|
"grad_norm": 2.143584429924543, |
|
"learning_rate": 1e-06, |
|
"loss": 0.5114, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.0578034682080926, |
|
"grad_norm": 1.9376604838146982, |
|
"learning_rate": 9.904221200622043e-07, |
|
"loss": 0.4628, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.0635838150289016, |
|
"grad_norm": 1.9520446252559986, |
|
"learning_rate": 9.80845118778886e-07, |
|
"loss": 0.4619, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.069364161849711, |
|
"grad_norm": 1.8153622741750448, |
|
"learning_rate": 9.71269874723917e-07, |
|
"loss": 0.4859, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0751445086705202, |
|
"grad_norm": 2.0497117157639395, |
|
"learning_rate": 9.616972663099646e-07, |
|
"loss": 0.4682, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0809248554913296, |
|
"grad_norm": 1.9422240692097994, |
|
"learning_rate": 9.521281717079081e-07, |
|
"loss": 0.4862, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0867052023121386, |
|
"grad_norm": 2.0311048893088515, |
|
"learning_rate": 9.425634687662766e-07, |
|
"loss": 0.5078, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.092485549132948, |
|
"grad_norm": 2.573871459192162, |
|
"learning_rate": 9.330040349307183e-07, |
|
"loss": 0.4822, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0982658959537572, |
|
"grad_norm": 2.179513115768778, |
|
"learning_rate": 9.234507471635042e-07, |
|
"loss": 0.4986, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1040462427745665, |
|
"grad_norm": 2.2820802238060143, |
|
"learning_rate": 9.139044818630783e-07, |
|
"loss": 0.4922, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.1098265895953756, |
|
"grad_norm": 2.0023850906436405, |
|
"learning_rate": 9.043661147836578e-07, |
|
"loss": 0.4494, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.115606936416185, |
|
"grad_norm": 1.9343529979889897, |
|
"learning_rate": 8.948365209548941e-07, |
|
"loss": 0.4656, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.1213872832369942, |
|
"grad_norm": 2.2579648984673053, |
|
"learning_rate": 8.853165746015995e-07, |
|
"loss": 0.4793, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.1271676300578035, |
|
"grad_norm": 2.4825890947622167, |
|
"learning_rate": 8.758071490635468e-07, |
|
"loss": 0.473, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1329479768786128, |
|
"grad_norm": 2.543399308768905, |
|
"learning_rate": 8.663091167153514e-07, |
|
"loss": 0.5026, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.138728323699422, |
|
"grad_norm": 2.196355049811149, |
|
"learning_rate": 8.568233488864419e-07, |
|
"loss": 0.4909, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.1445086705202312, |
|
"grad_norm": 1.8400038174670734, |
|
"learning_rate": 8.473507157811254e-07, |
|
"loss": 0.4852, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1502890173410405, |
|
"grad_norm": 2.0651998028564305, |
|
"learning_rate": 8.378920863987575e-07, |
|
"loss": 0.4798, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.1560693641618498, |
|
"grad_norm": 1.7845307873709155, |
|
"learning_rate": 8.284483284540216e-07, |
|
"loss": 0.4613, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1618497109826589, |
|
"grad_norm": 1.895202511289734, |
|
"learning_rate": 8.190203082973271e-07, |
|
"loss": 0.5084, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.1676300578034682, |
|
"grad_norm": 2.051181108200922, |
|
"learning_rate": 8.096088908353315e-07, |
|
"loss": 0.4672, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.1734104046242775, |
|
"grad_norm": 2.1934950530726445, |
|
"learning_rate": 8.002149394515972e-07, |
|
"loss": 0.4895, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.1791907514450868, |
|
"grad_norm": 2.3380870740629587, |
|
"learning_rate": 7.908393159273836e-07, |
|
"loss": 0.4666, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.1849710982658959, |
|
"grad_norm": 1.9114359910024667, |
|
"learning_rate": 7.814828803625925e-07, |
|
"loss": 0.4974, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1907514450867052, |
|
"grad_norm": 2.054987235601307, |
|
"learning_rate": 7.721464910968626e-07, |
|
"loss": 0.4499, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.1965317919075145, |
|
"grad_norm": 2.0694031895214904, |
|
"learning_rate": 7.628310046308272e-07, |
|
"loss": 0.4853, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.2023121387283238, |
|
"grad_norm": 1.9497478503918264, |
|
"learning_rate": 7.53537275547541e-07, |
|
"loss": 0.4701, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.208092485549133, |
|
"grad_norm": 2.0471759991586578, |
|
"learning_rate": 7.442661564340822e-07, |
|
"loss": 0.4672, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.2138728323699421, |
|
"grad_norm": 2.1249362738674336, |
|
"learning_rate": 7.350184978033385e-07, |
|
"loss": 0.4671, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2196531791907514, |
|
"grad_norm": 2.1197465337178047, |
|
"learning_rate": 7.257951480159819e-07, |
|
"loss": 0.462, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.2254335260115607, |
|
"grad_norm": 1.851706914708072, |
|
"learning_rate": 7.165969532026429e-07, |
|
"loss": 0.4583, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.2312138728323698, |
|
"grad_norm": 2.0504382329015627, |
|
"learning_rate": 7.074247571862877e-07, |
|
"loss": 0.4698, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2369942196531791, |
|
"grad_norm": 1.9545030444177458, |
|
"learning_rate": 6.982794014048077e-07, |
|
"loss": 0.4832, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.2427745664739884, |
|
"grad_norm": 1.8828619878855422, |
|
"learning_rate": 6.891617248338282e-07, |
|
"loss": 0.4664, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.2485549132947977, |
|
"grad_norm": 1.8683663335668868, |
|
"learning_rate": 6.800725639097411e-07, |
|
"loss": 0.4735, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.254335260115607, |
|
"grad_norm": 2.0704005138553505, |
|
"learning_rate": 6.710127524529745e-07, |
|
"loss": 0.5023, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.260115606936416, |
|
"grad_norm": 2.272926759535781, |
|
"learning_rate": 6.619831215914973e-07, |
|
"loss": 0.4699, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.2658959537572254, |
|
"grad_norm": 2.091914123638361, |
|
"learning_rate": 6.52984499684575e-07, |
|
"loss": 0.4573, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.2716763005780347, |
|
"grad_norm": 1.995494161392343, |
|
"learning_rate": 6.440177122467768e-07, |
|
"loss": 0.4873, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2774566473988438, |
|
"grad_norm": 2.041258798930975, |
|
"learning_rate": 6.350835818722449e-07, |
|
"loss": 0.4936, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.2832369942196533, |
|
"grad_norm": 1.9625875782578102, |
|
"learning_rate": 6.261829281592312e-07, |
|
"loss": 0.4748, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.2890173410404624, |
|
"grad_norm": 1.9356789263214738, |
|
"learning_rate": 6.173165676349102e-07, |
|
"loss": 0.4803, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.2947976878612717, |
|
"grad_norm": 1.942573776135112, |
|
"learning_rate": 6.084853136804711e-07, |
|
"loss": 0.4635, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.300578034682081, |
|
"grad_norm": 2.02929712164466, |
|
"learning_rate": 5.996899764565005e-07, |
|
"loss": 0.476, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.30635838150289, |
|
"grad_norm": 2.082626277063179, |
|
"learning_rate": 5.9093136282866e-07, |
|
"loss": 0.4856, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.3121387283236994, |
|
"grad_norm": 2.1016956249931775, |
|
"learning_rate": 5.822102762936666e-07, |
|
"loss": 0.506, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.3179190751445087, |
|
"grad_norm": 1.9312802366426565, |
|
"learning_rate": 5.735275169055803e-07, |
|
"loss": 0.4749, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.323699421965318, |
|
"grad_norm": 2.061206641650247, |
|
"learning_rate": 5.648838812024099e-07, |
|
"loss": 0.4512, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.3294797687861273, |
|
"grad_norm": 1.9814472946523185, |
|
"learning_rate": 5.562801621330402e-07, |
|
"loss": 0.4776, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3352601156069364, |
|
"grad_norm": 1.8543611294874782, |
|
"learning_rate": 5.477171489844881e-07, |
|
"loss": 0.5093, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.3410404624277457, |
|
"grad_norm": 1.9906348566872387, |
|
"learning_rate": 5.391956273094951e-07, |
|
"loss": 0.4852, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.346820809248555, |
|
"grad_norm": 1.9315896688508982, |
|
"learning_rate": 5.307163788544629e-07, |
|
"loss": 0.4608, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.352601156069364, |
|
"grad_norm": 1.8959836891458495, |
|
"learning_rate": 5.222801814877369e-07, |
|
"loss": 0.449, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.3583815028901733, |
|
"grad_norm": 1.8907180611172163, |
|
"learning_rate": 5.138878091282471e-07, |
|
"loss": 0.4458, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.3641618497109826, |
|
"grad_norm": 1.9420465209074669, |
|
"learning_rate": 5.055400316745095e-07, |
|
"loss": 0.4756, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.369942196531792, |
|
"grad_norm": 1.94702430879285, |
|
"learning_rate": 4.972376149339978e-07, |
|
"loss": 0.457, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.3757225433526012, |
|
"grad_norm": 2.5499359496821277, |
|
"learning_rate": 4.889813205528894e-07, |
|
"loss": 0.4758, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.3815028901734103, |
|
"grad_norm": 1.9308947290061484, |
|
"learning_rate": 4.807719059461942e-07, |
|
"loss": 0.4611, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.3872832369942196, |
|
"grad_norm": 1.9505629087051528, |
|
"learning_rate": 4.7261012422827074e-07, |
|
"loss": 0.4719, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.393063583815029, |
|
"grad_norm": 2.267217020333816, |
|
"learning_rate": 4.6449672414373597e-07, |
|
"loss": 0.4802, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.3988439306358382, |
|
"grad_norm": 2.152359255572803, |
|
"learning_rate": 4.5643244999877896e-07, |
|
"loss": 0.4635, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.4046242774566475, |
|
"grad_norm": 1.9757352489131026, |
|
"learning_rate": 4.4841804159287857e-07, |
|
"loss": 0.4716, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.4104046242774566, |
|
"grad_norm": 2.0536715397019942, |
|
"learning_rate": 4.40454234150936e-07, |
|
"loss": 0.473, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.416184971098266, |
|
"grad_norm": 1.9534035655257582, |
|
"learning_rate": 4.3254175825582693e-07, |
|
"loss": 0.4803, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4219653179190752, |
|
"grad_norm": 1.9057679988988923, |
|
"learning_rate": 4.246813397813794e-07, |
|
"loss": 0.4601, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.4277456647398843, |
|
"grad_norm": 1.9037622302779358, |
|
"learning_rate": 4.1687369982578346e-07, |
|
"loss": 0.4527, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.4335260115606936, |
|
"grad_norm": 2.9426991633061945, |
|
"learning_rate": 4.0911955464543976e-07, |
|
"loss": 0.4833, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.439306358381503, |
|
"grad_norm": 1.9189468581058093, |
|
"learning_rate": 4.014196155892502e-07, |
|
"loss": 0.4573, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.4450867052023122, |
|
"grad_norm": 2.0139556098301186, |
|
"learning_rate": 3.9377458903336223e-07, |
|
"loss": 0.4758, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4508670520231215, |
|
"grad_norm": 2.0450079177733276, |
|
"learning_rate": 3.861851763163665e-07, |
|
"loss": 0.4663, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.4566473988439306, |
|
"grad_norm": 3.137157219646671, |
|
"learning_rate": 3.786520736749571e-07, |
|
"loss": 0.4744, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.4624277456647399, |
|
"grad_norm": 2.107997689195662, |
|
"learning_rate": 3.71175972180061e-07, |
|
"loss": 0.4675, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.4682080924855492, |
|
"grad_norm": 2.0169649469031823, |
|
"learning_rate": 3.6375755767344043e-07, |
|
"loss": 0.4654, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.4739884393063583, |
|
"grad_norm": 1.9965328414982166, |
|
"learning_rate": 3.563975107047747e-07, |
|
"loss": 0.4788, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.4797687861271676, |
|
"grad_norm": 1.909053888926535, |
|
"learning_rate": 3.4909650646922894e-07, |
|
"loss": 0.4864, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.4855491329479769, |
|
"grad_norm": 2.1284220971629098, |
|
"learning_rate": 3.4185521474551247e-07, |
|
"loss": 0.464, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.4913294797687862, |
|
"grad_norm": 2.033246742298473, |
|
"learning_rate": 3.3467429983443476e-07, |
|
"loss": 0.469, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.4971098265895955, |
|
"grad_norm": 2.1479521556785355, |
|
"learning_rate": 3.2755442049796425e-07, |
|
"loss": 0.4889, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.5028901734104045, |
|
"grad_norm": 2.0332714192782615, |
|
"learning_rate": 3.204962298987944e-07, |
|
"loss": 0.4686, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5086705202312138, |
|
"grad_norm": 1.9047792343443468, |
|
"learning_rate": 3.135003755404244e-07, |
|
"loss": 0.4657, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.5144508670520231, |
|
"grad_norm": 1.9650730838682373, |
|
"learning_rate": 3.065674992077584e-07, |
|
"loss": 0.4754, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.5202312138728322, |
|
"grad_norm": 2.419000097894264, |
|
"learning_rate": 2.9969823690822904e-07, |
|
"loss": 0.4646, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.5260115606936417, |
|
"grad_norm": 1.971230314254081, |
|
"learning_rate": 2.9289321881345254e-07, |
|
"loss": 0.4667, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.5317919075144508, |
|
"grad_norm": 2.121737526252635, |
|
"learning_rate": 2.861530692014169e-07, |
|
"loss": 0.4674, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.5375722543352601, |
|
"grad_norm": 2.0795424904002515, |
|
"learning_rate": 2.7947840639921303e-07, |
|
"loss": 0.5152, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.5433526011560694, |
|
"grad_norm": 1.9716533778945817, |
|
"learning_rate": 2.728698427263096e-07, |
|
"loss": 0.4774, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.5491329479768785, |
|
"grad_norm": 1.8977607875089808, |
|
"learning_rate": 2.6632798443838145e-07, |
|
"loss": 0.4514, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.5549132947976878, |
|
"grad_norm": 1.9253571622286325, |
|
"learning_rate": 2.598534316716917e-07, |
|
"loss": 0.4607, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.560693641618497, |
|
"grad_norm": 2.078466033180376, |
|
"learning_rate": 2.534467783880373e-07, |
|
"loss": 0.4883, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5664739884393064, |
|
"grad_norm": 2.5971424603146436, |
|
"learning_rate": 2.4710861232026013e-07, |
|
"loss": 0.4746, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.5722543352601157, |
|
"grad_norm": 1.9666452852090528, |
|
"learning_rate": 2.408395149183294e-07, |
|
"loss": 0.5058, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.5780346820809248, |
|
"grad_norm": 2.035908272574928, |
|
"learning_rate": 2.346400612960009e-07, |
|
"loss": 0.4849, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.583815028901734, |
|
"grad_norm": 2.0288243942210378, |
|
"learning_rate": 2.28510820178057e-07, |
|
"loss": 0.4778, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.5895953757225434, |
|
"grad_norm": 1.9735647171840094, |
|
"learning_rate": 2.2245235384813332e-07, |
|
"loss": 0.4851, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.5953757225433525, |
|
"grad_norm": 2.0907181002682877, |
|
"learning_rate": 2.164652180971358e-07, |
|
"loss": 0.4604, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.601156069364162, |
|
"grad_norm": 2.020880323505046, |
|
"learning_rate": 2.1054996217225385e-07, |
|
"loss": 0.4629, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.606936416184971, |
|
"grad_norm": 2.259358632099597, |
|
"learning_rate": 2.0470712872657348e-07, |
|
"loss": 0.4806, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.6127167630057804, |
|
"grad_norm": 2.8709064155831023, |
|
"learning_rate": 1.9893725376929504e-07, |
|
"loss": 0.4324, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.6184971098265897, |
|
"grad_norm": 2.073098194342015, |
|
"learning_rate": 1.9324086661656168e-07, |
|
"loss": 0.4731, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6242774566473988, |
|
"grad_norm": 2.1188843044480965, |
|
"learning_rate": 1.8761848984290062e-07, |
|
"loss": 0.4616, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.630057803468208, |
|
"grad_norm": 1.9759976408035527, |
|
"learning_rate": 1.8207063923328235e-07, |
|
"loss": 0.481, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.6358381502890174, |
|
"grad_norm": 2.046719438470121, |
|
"learning_rate": 1.7659782373580555e-07, |
|
"loss": 0.4666, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.6416184971098264, |
|
"grad_norm": 1.9283494713604754, |
|
"learning_rate": 1.712005454150055e-07, |
|
"loss": 0.4581, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.647398843930636, |
|
"grad_norm": 1.9357329743637328, |
|
"learning_rate": 1.658792994057968e-07, |
|
"loss": 0.4426, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.653179190751445, |
|
"grad_norm": 1.942353186463166, |
|
"learning_rate": 1.6063457386805003e-07, |
|
"loss": 0.4805, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.6589595375722543, |
|
"grad_norm": 2.1127349981346053, |
|
"learning_rate": 1.554668499418097e-07, |
|
"loss": 0.4699, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.6647398843930636, |
|
"grad_norm": 1.947713066714842, |
|
"learning_rate": 1.503766017031547e-07, |
|
"loss": 0.4709, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.6705202312138727, |
|
"grad_norm": 1.931903633238329, |
|
"learning_rate": 1.4536429612070843e-07, |
|
"loss": 0.4887, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.6763005780346822, |
|
"grad_norm": 1.9911200450774031, |
|
"learning_rate": 1.4043039301279903e-07, |
|
"loss": 0.4904, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6820809248554913, |
|
"grad_norm": 1.9439202742202357, |
|
"learning_rate": 1.3557534500527768e-07, |
|
"loss": 0.4531, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.6878612716763006, |
|
"grad_norm": 1.9164172510841055, |
|
"learning_rate": 1.3079959748999493e-07, |
|
"loss": 0.4563, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.69364161849711, |
|
"grad_norm": 2.0311554173466666, |
|
"learning_rate": 1.2610358858394188e-07, |
|
"loss": 0.4828, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.699421965317919, |
|
"grad_norm": 1.9998690205184846, |
|
"learning_rate": 1.2148774908905778e-07, |
|
"loss": 0.4786, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.7052023121387283, |
|
"grad_norm": 4.169828381438711, |
|
"learning_rate": 1.169525024527096e-07, |
|
"loss": 0.4677, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.7109826589595376, |
|
"grad_norm": 1.8989428779004813, |
|
"learning_rate": 1.1249826472884571e-07, |
|
"loss": 0.4401, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.7167630057803467, |
|
"grad_norm": 2.1129726622598093, |
|
"learning_rate": 1.0812544453982764e-07, |
|
"loss": 0.4903, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.7225433526011562, |
|
"grad_norm": 2.0629596593806934, |
|
"learning_rate": 1.038344430389445e-07, |
|
"loss": 0.4925, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.7283236994219653, |
|
"grad_norm": 2.0320095664676665, |
|
"learning_rate": 9.962565387361166e-08, |
|
"loss": 0.4614, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.7341040462427746, |
|
"grad_norm": 3.0540554132959183, |
|
"learning_rate": 9.549946314925839e-08, |
|
"loss": 0.4964, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.739884393063584, |
|
"grad_norm": 2.016687599667707, |
|
"learning_rate": 9.145624939390761e-08, |
|
"loss": 0.4527, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.745664739884393, |
|
"grad_norm": 1.8452228441209821, |
|
"learning_rate": 8.749638352345001e-08, |
|
"loss": 0.4878, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.7514450867052023, |
|
"grad_norm": 2.385887245808352, |
|
"learning_rate": 8.362022880761776e-08, |
|
"loss": 0.4974, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.7572254335260116, |
|
"grad_norm": 1.9832314558835376, |
|
"learning_rate": 7.982814083665823e-08, |
|
"loss": 0.4668, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.7630057803468207, |
|
"grad_norm": 2.564651988633928, |
|
"learning_rate": 7.612046748871326e-08, |
|
"loss": 0.4604, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.7687861271676302, |
|
"grad_norm": 2.135369579624982, |
|
"learning_rate": 7.249754889790538e-08, |
|
"loss": 0.4981, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.7745664739884393, |
|
"grad_norm": 2.167361327770021, |
|
"learning_rate": 6.895971742313467e-08, |
|
"loss": 0.4484, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.7803468208092486, |
|
"grad_norm": 1.824815893708037, |
|
"learning_rate": 6.550729761758899e-08, |
|
"loss": 0.467, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.7861271676300579, |
|
"grad_norm": 2.1085663211118777, |
|
"learning_rate": 6.21406061989701e-08, |
|
"loss": 0.4705, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.791907514450867, |
|
"grad_norm": 2.00560428209146, |
|
"learning_rate": 5.885995202043847e-08, |
|
"loss": 0.4708, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7976878612716765, |
|
"grad_norm": 2.076853013886715, |
|
"learning_rate": 5.5665636042279696e-08, |
|
"loss": 0.4676, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.8034682080924855, |
|
"grad_norm": 1.9453450448573704, |
|
"learning_rate": 5.2557951304295747e-08, |
|
"loss": 0.4671, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.8092485549132948, |
|
"grad_norm": 2.3993736624613256, |
|
"learning_rate": 4.953718289892106e-08, |
|
"loss": 0.4652, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.8150289017341041, |
|
"grad_norm": 1.930078423666107, |
|
"learning_rate": 4.6603607945069456e-08, |
|
"loss": 0.5143, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.8208092485549132, |
|
"grad_norm": 1.9424714850195004, |
|
"learning_rate": 4.375749556271169e-08, |
|
"loss": 0.4978, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.8265895953757225, |
|
"grad_norm": 1.9534413743723884, |
|
"learning_rate": 4.099910684818697e-08, |
|
"loss": 0.46, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.8323699421965318, |
|
"grad_norm": 2.0016352335992575, |
|
"learning_rate": 3.8328694850250475e-08, |
|
"loss": 0.4765, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.838150289017341, |
|
"grad_norm": 2.009683412643769, |
|
"learning_rate": 3.574650454685901e-08, |
|
"loss": 0.4611, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.8439306358381504, |
|
"grad_norm": 1.9761898032699974, |
|
"learning_rate": 3.325277282269756e-08, |
|
"loss": 0.4924, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.8497109826589595, |
|
"grad_norm": 1.9213759865585416, |
|
"learning_rate": 3.08477284474481e-08, |
|
"loss": 0.4635, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.8554913294797688, |
|
"grad_norm": 2.1643446222196188, |
|
"learning_rate": 2.8531592054802157e-08, |
|
"loss": 0.49, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.861271676300578, |
|
"grad_norm": 1.930410206836703, |
|
"learning_rate": 2.6304576122221034e-08, |
|
"loss": 0.4694, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.8670520231213872, |
|
"grad_norm": 2.012454921101362, |
|
"learning_rate": 2.4166884951442702e-08, |
|
"loss": 0.4542, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.8728323699421965, |
|
"grad_norm": 2.211103638969066, |
|
"learning_rate": 2.211871464974091e-08, |
|
"loss": 0.4755, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8786127167630058, |
|
"grad_norm": 2.1913424628785356, |
|
"learning_rate": 2.0160253111933145e-08, |
|
"loss": 0.4583, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.8843930635838149, |
|
"grad_norm": 2.407050555682028, |
|
"learning_rate": 1.8291680003145073e-08, |
|
"loss": 0.4787, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.8901734104046244, |
|
"grad_norm": 1.9694723993714849, |
|
"learning_rate": 1.6513166742327168e-08, |
|
"loss": 0.476, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.8959537572254335, |
|
"grad_norm": 2.0948624133751363, |
|
"learning_rate": 1.482487648653008e-08, |
|
"loss": 0.477, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.9017341040462428, |
|
"grad_norm": 1.9404754923970793, |
|
"learning_rate": 1.3226964115936045e-08, |
|
"loss": 0.4892, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.907514450867052, |
|
"grad_norm": 2.6043671145247234, |
|
"learning_rate": 1.1719576219651584e-08, |
|
"loss": 0.4509, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9132947976878611, |
|
"grad_norm": 2.4502507284918273, |
|
"learning_rate": 1.0302851082258367e-08, |
|
"loss": 0.4524, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.9190751445086707, |
|
"grad_norm": 1.9921214924857538, |
|
"learning_rate": 8.97691867112882e-09, |
|
"loss": 0.4883, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.9248554913294798, |
|
"grad_norm": 1.9469866747763405, |
|
"learning_rate": 7.741900624501974e-09, |
|
"loss": 0.4591, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.930635838150289, |
|
"grad_norm": 2.0065329448073155, |
|
"learning_rate": 6.5979102403249664e-09, |
|
"loss": 0.4609, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.9364161849710984, |
|
"grad_norm": 5.000176973794001, |
|
"learning_rate": 5.54505246585979e-09, |
|
"loss": 0.4556, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.9421965317919074, |
|
"grad_norm": 2.1392791718201787, |
|
"learning_rate": 4.583423888055105e-09, |
|
"loss": 0.4574, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.9479768786127167, |
|
"grad_norm": 1.8931847358388805, |
|
"learning_rate": 3.713112724685663e-09, |
|
"loss": 0.4902, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.953757225433526, |
|
"grad_norm": 1.905567476983522, |
|
"learning_rate": 2.934198816259559e-09, |
|
"loss": 0.4923, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.9595375722543351, |
|
"grad_norm": 2.01728565941551, |
|
"learning_rate": 2.246753618693753e-09, |
|
"loss": 0.4533, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.9653179190751446, |
|
"grad_norm": 1.7782643184255449, |
|
"learning_rate": 1.6508401967588736e-09, |
|
"loss": 0.4655, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9710982658959537, |
|
"grad_norm": 2.154854344890582, |
|
"learning_rate": 1.146513218293621e-09, |
|
"loss": 0.4473, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.976878612716763, |
|
"grad_norm": 2.1149088823103908, |
|
"learning_rate": 7.338189491900015e-10, |
|
"loss": 0.4753, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.9826589595375723, |
|
"grad_norm": 2.06214678310316, |
|
"learning_rate": 4.1279524914861194e-10, |
|
"loss": 0.4521, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.9884393063583814, |
|
"grad_norm": 2.0644533457335825, |
|
"learning_rate": 1.834715682056398e-10, |
|
"loss": 0.459, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.9942196531791907, |
|
"grad_norm": 2.137977836744415, |
|
"learning_rate": 4.586894403146857e-11, |
|
"loss": 0.5149, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7708373617580409, |
|
"learning_rate": 0.0, |
|
"loss": 0.3975, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 346, |
|
"total_flos": 1102036773634048.0, |
|
"train_loss": 0.5610263916109338, |
|
"train_runtime": 4562.3378, |
|
"train_samples_per_second": 4.828, |
|
"train_steps_per_second": 0.076 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 346, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1102036773634048.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|