|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9836065573770494, |
|
"eval_steps": 500, |
|
"global_step": 426, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00702576112412178, |
|
"grad_norm": 0.14412136375904083, |
|
"learning_rate": 2.3255813953488374e-07, |
|
"loss": 0.7429, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01405152224824356, |
|
"grad_norm": 0.12180829793214798, |
|
"learning_rate": 4.651162790697675e-07, |
|
"loss": 0.6834, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02107728337236534, |
|
"grad_norm": 0.13547690212726593, |
|
"learning_rate": 6.976744186046513e-07, |
|
"loss": 0.7261, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02810304449648712, |
|
"grad_norm": 0.15328572690486908, |
|
"learning_rate": 9.30232558139535e-07, |
|
"loss": 0.7679, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0351288056206089, |
|
"grad_norm": 0.11058026552200317, |
|
"learning_rate": 1.1627906976744188e-06, |
|
"loss": 0.642, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04215456674473068, |
|
"grad_norm": 0.1449006348848343, |
|
"learning_rate": 1.3953488372093025e-06, |
|
"loss": 0.7518, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04918032786885246, |
|
"grad_norm": 0.14010243117809296, |
|
"learning_rate": 1.6279069767441862e-06, |
|
"loss": 0.734, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05620608899297424, |
|
"grad_norm": 0.11249666661024094, |
|
"learning_rate": 1.86046511627907e-06, |
|
"loss": 0.6573, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06323185011709602, |
|
"grad_norm": 0.12850059568881989, |
|
"learning_rate": 2.0930232558139536e-06, |
|
"loss": 0.6856, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0702576112412178, |
|
"grad_norm": 0.09605734795331955, |
|
"learning_rate": 2.3255813953488376e-06, |
|
"loss": 0.6657, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07728337236533958, |
|
"grad_norm": 0.09640911221504211, |
|
"learning_rate": 2.558139534883721e-06, |
|
"loss": 0.7243, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08430913348946135, |
|
"grad_norm": 0.09078703820705414, |
|
"learning_rate": 2.790697674418605e-06, |
|
"loss": 0.7013, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09133489461358314, |
|
"grad_norm": 0.09065765142440796, |
|
"learning_rate": 3.0232558139534885e-06, |
|
"loss": 0.6744, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"grad_norm": 0.06950970739126205, |
|
"learning_rate": 3.2558139534883724e-06, |
|
"loss": 0.6745, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1053864168618267, |
|
"grad_norm": 0.07783018797636032, |
|
"learning_rate": 3.4883720930232564e-06, |
|
"loss": 0.6294, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11241217798594848, |
|
"grad_norm": 0.0778842344880104, |
|
"learning_rate": 3.72093023255814e-06, |
|
"loss": 0.609, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11943793911007025, |
|
"grad_norm": 0.07574747502803802, |
|
"learning_rate": 3.953488372093024e-06, |
|
"loss": 0.5765, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.12646370023419204, |
|
"grad_norm": 0.07703674584627151, |
|
"learning_rate": 4.186046511627907e-06, |
|
"loss": 0.6541, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13348946135831383, |
|
"grad_norm": 0.06555178016424179, |
|
"learning_rate": 4.418604651162791e-06, |
|
"loss": 0.5956, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1405152224824356, |
|
"grad_norm": 0.08275623619556427, |
|
"learning_rate": 4.651162790697675e-06, |
|
"loss": 0.7029, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14754098360655737, |
|
"grad_norm": 0.0675705149769783, |
|
"learning_rate": 4.883720930232559e-06, |
|
"loss": 0.5811, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.15456674473067916, |
|
"grad_norm": 0.07332336902618408, |
|
"learning_rate": 5.116279069767442e-06, |
|
"loss": 0.6043, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.16159250585480095, |
|
"grad_norm": 0.0802861675620079, |
|
"learning_rate": 5.348837209302326e-06, |
|
"loss": 0.6869, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1686182669789227, |
|
"grad_norm": 0.06729163229465485, |
|
"learning_rate": 5.58139534883721e-06, |
|
"loss": 0.576, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1756440281030445, |
|
"grad_norm": 0.06569469720125198, |
|
"learning_rate": 5.8139534883720935e-06, |
|
"loss": 0.6487, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.18266978922716628, |
|
"grad_norm": 0.06604806333780289, |
|
"learning_rate": 6.046511627906977e-06, |
|
"loss": 0.585, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.18969555035128804, |
|
"grad_norm": 0.06028835102915764, |
|
"learning_rate": 6.279069767441861e-06, |
|
"loss": 0.606, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.19672131147540983, |
|
"grad_norm": 0.05667126551270485, |
|
"learning_rate": 6.511627906976745e-06, |
|
"loss": 0.5471, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.20374707259953162, |
|
"grad_norm": 0.06196806579828262, |
|
"learning_rate": 6.744186046511628e-06, |
|
"loss": 0.5737, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2107728337236534, |
|
"grad_norm": 0.058749496936798096, |
|
"learning_rate": 6.976744186046513e-06, |
|
"loss": 0.5741, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21779859484777517, |
|
"grad_norm": 0.059330519288778305, |
|
"learning_rate": 7.209302325581395e-06, |
|
"loss": 0.5934, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.22482435597189696, |
|
"grad_norm": 0.0617949403822422, |
|
"learning_rate": 7.44186046511628e-06, |
|
"loss": 0.5195, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.23185011709601874, |
|
"grad_norm": 0.0625167116522789, |
|
"learning_rate": 7.674418604651164e-06, |
|
"loss": 0.5721, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2388758782201405, |
|
"grad_norm": 0.0701775774359703, |
|
"learning_rate": 7.906976744186048e-06, |
|
"loss": 0.6073, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2459016393442623, |
|
"grad_norm": 0.06266116350889206, |
|
"learning_rate": 8.139534883720931e-06, |
|
"loss": 0.556, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2529274004683841, |
|
"grad_norm": 0.06269484013319016, |
|
"learning_rate": 8.372093023255815e-06, |
|
"loss": 0.5486, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.25995316159250587, |
|
"grad_norm": 0.05984916910529137, |
|
"learning_rate": 8.604651162790698e-06, |
|
"loss": 0.5585, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.26697892271662765, |
|
"grad_norm": 0.0559409074485302, |
|
"learning_rate": 8.837209302325582e-06, |
|
"loss": 0.4833, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.27400468384074944, |
|
"grad_norm": 0.056493211537599564, |
|
"learning_rate": 9.069767441860465e-06, |
|
"loss": 0.4957, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2810304449648712, |
|
"grad_norm": 0.0535312257707119, |
|
"learning_rate": 9.30232558139535e-06, |
|
"loss": 0.5069, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28805620608899296, |
|
"grad_norm": 0.05835776776075363, |
|
"learning_rate": 9.534883720930234e-06, |
|
"loss": 0.5302, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.29508196721311475, |
|
"grad_norm": 0.06827262043952942, |
|
"learning_rate": 9.767441860465117e-06, |
|
"loss": 0.5361, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.30210772833723654, |
|
"grad_norm": 0.06111191585659981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5522, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3091334894613583, |
|
"grad_norm": 0.05481801554560661, |
|
"learning_rate": 9.99983179466314e-06, |
|
"loss": 0.4842, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3161592505854801, |
|
"grad_norm": 0.06906605511903763, |
|
"learning_rate": 9.999327189969768e-06, |
|
"loss": 0.4833, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3231850117096019, |
|
"grad_norm": 0.05364421010017395, |
|
"learning_rate": 9.998486219870769e-06, |
|
"loss": 0.5137, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.33021077283372363, |
|
"grad_norm": 0.06280182301998138, |
|
"learning_rate": 9.997308940948405e-06, |
|
"loss": 0.5609, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.3372365339578454, |
|
"grad_norm": 0.0569726936519146, |
|
"learning_rate": 9.995795432412513e-06, |
|
"loss": 0.5416, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3442622950819672, |
|
"grad_norm": 0.06173605099320412, |
|
"learning_rate": 9.993945796095183e-06, |
|
"loss": 0.5162, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.351288056206089, |
|
"grad_norm": 0.05827682837843895, |
|
"learning_rate": 9.991760156443892e-06, |
|
"loss": 0.4773, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3583138173302108, |
|
"grad_norm": 0.06012466922402382, |
|
"learning_rate": 9.989238660513141e-06, |
|
"loss": 0.5915, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.36533957845433257, |
|
"grad_norm": 0.05899444967508316, |
|
"learning_rate": 9.98638147795456e-06, |
|
"loss": 0.5253, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.37236533957845436, |
|
"grad_norm": 0.05364922806620598, |
|
"learning_rate": 9.983188801005492e-06, |
|
"loss": 0.486, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3793911007025761, |
|
"grad_norm": 0.05417551472783089, |
|
"learning_rate": 9.979660844476056e-06, |
|
"loss": 0.5291, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3864168618266979, |
|
"grad_norm": 0.062476933002471924, |
|
"learning_rate": 9.975797845734699e-06, |
|
"loss": 0.548, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 0.06160496175289154, |
|
"learning_rate": 9.971600064692222e-06, |
|
"loss": 0.4919, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.40046838407494145, |
|
"grad_norm": 0.054618533700704575, |
|
"learning_rate": 9.967067783784297e-06, |
|
"loss": 0.5071, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.40749414519906324, |
|
"grad_norm": 0.059714607894420624, |
|
"learning_rate": 9.962201307952455e-06, |
|
"loss": 0.5347, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.41451990632318503, |
|
"grad_norm": 0.06326267123222351, |
|
"learning_rate": 9.957000964623585e-06, |
|
"loss": 0.5288, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.4215456674473068, |
|
"grad_norm": 0.05612269043922424, |
|
"learning_rate": 9.951467103687879e-06, |
|
"loss": 0.4878, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.04823453351855278, |
|
"learning_rate": 9.945600097475322e-06, |
|
"loss": 0.4699, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.43559718969555034, |
|
"grad_norm": 0.057434167712926865, |
|
"learning_rate": 9.939400340730611e-06, |
|
"loss": 0.5389, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4426229508196721, |
|
"grad_norm": 0.052870023995637894, |
|
"learning_rate": 9.932868250586619e-06, |
|
"loss": 0.4969, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4496487119437939, |
|
"grad_norm": 0.050160668790340424, |
|
"learning_rate": 9.926004266536314e-06, |
|
"loss": 0.4506, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4566744730679157, |
|
"grad_norm": 0.054149653762578964, |
|
"learning_rate": 9.918808850403192e-06, |
|
"loss": 0.5076, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4637002341920375, |
|
"grad_norm": 0.056766077876091, |
|
"learning_rate": 9.911282486310214e-06, |
|
"loss": 0.4922, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4707259953161593, |
|
"grad_norm": 0.06275495141744614, |
|
"learning_rate": 9.903425680647225e-06, |
|
"loss": 0.5696, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.477751756440281, |
|
"grad_norm": 0.05460723116993904, |
|
"learning_rate": 9.895238962036878e-06, |
|
"loss": 0.441, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4847775175644028, |
|
"grad_norm": 0.06439048796892166, |
|
"learning_rate": 9.88672288129908e-06, |
|
"loss": 0.5162, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 0.0556609220802784, |
|
"learning_rate": 9.877878011413924e-06, |
|
"loss": 0.4695, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49882903981264637, |
|
"grad_norm": 0.0601162388920784, |
|
"learning_rate": 9.868704947483134e-06, |
|
"loss": 0.526, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.5058548009367682, |
|
"grad_norm": 0.05364784598350525, |
|
"learning_rate": 9.859204306690038e-06, |
|
"loss": 0.4963, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5128805620608899, |
|
"grad_norm": 0.052927836775779724, |
|
"learning_rate": 9.849376728258024e-06, |
|
"loss": 0.514, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.5199063231850117, |
|
"grad_norm": 0.052183471620082855, |
|
"learning_rate": 9.839222873407553e-06, |
|
"loss": 0.484, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5269320843091335, |
|
"grad_norm": 0.056657999753952026, |
|
"learning_rate": 9.828743425311654e-06, |
|
"loss": 0.4871, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5339578454332553, |
|
"grad_norm": 0.056676190346479416, |
|
"learning_rate": 9.817939089049964e-06, |
|
"loss": 0.4665, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5409836065573771, |
|
"grad_norm": 0.05315388739109039, |
|
"learning_rate": 9.806810591561295e-06, |
|
"loss": 0.4489, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5480093676814989, |
|
"grad_norm": 0.05231834575533867, |
|
"learning_rate": 9.795358681594712e-06, |
|
"loss": 0.4952, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5550351288056206, |
|
"grad_norm": 0.06096820533275604, |
|
"learning_rate": 9.783584129659162e-06, |
|
"loss": 0.5192, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5620608899297423, |
|
"grad_norm": 0.07792366296052933, |
|
"learning_rate": 9.771487727971642e-06, |
|
"loss": 0.5, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5690866510538641, |
|
"grad_norm": 0.05434305965900421, |
|
"learning_rate": 9.759070290403873e-06, |
|
"loss": 0.4835, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5761124121779859, |
|
"grad_norm": 0.049161121249198914, |
|
"learning_rate": 9.746332652427566e-06, |
|
"loss": 0.4817, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5831381733021077, |
|
"grad_norm": 0.06948748230934143, |
|
"learning_rate": 9.733275671058195e-06, |
|
"loss": 0.5137, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5901639344262295, |
|
"grad_norm": 0.05787486582994461, |
|
"learning_rate": 9.71990022479734e-06, |
|
"loss": 0.4925, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5971896955503513, |
|
"grad_norm": 0.0546451136469841, |
|
"learning_rate": 9.70620721357358e-06, |
|
"loss": 0.4758, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6042154566744731, |
|
"grad_norm": 0.05614368990063667, |
|
"learning_rate": 9.69219755868194e-06, |
|
"loss": 0.4793, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6112412177985949, |
|
"grad_norm": 0.055385395884513855, |
|
"learning_rate": 9.677872202721906e-06, |
|
"loss": 0.512, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.6182669789227166, |
|
"grad_norm": 0.05747217312455177, |
|
"learning_rate": 9.663232109534011e-06, |
|
"loss": 0.5597, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6252927400468384, |
|
"grad_norm": 0.05803421884775162, |
|
"learning_rate": 9.648278264134977e-06, |
|
"loss": 0.4618, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.6323185011709602, |
|
"grad_norm": 0.05690561234951019, |
|
"learning_rate": 9.633011672651443e-06, |
|
"loss": 0.4276, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.639344262295082, |
|
"grad_norm": 0.05551106855273247, |
|
"learning_rate": 9.617433362252277e-06, |
|
"loss": 0.5031, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.6463700234192038, |
|
"grad_norm": 0.04912833124399185, |
|
"learning_rate": 9.601544381079457e-06, |
|
"loss": 0.4666, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6533957845433255, |
|
"grad_norm": 0.0519762858748436, |
|
"learning_rate": 9.585345798177557e-06, |
|
"loss": 0.522, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6604215456674473, |
|
"grad_norm": 0.06260818988084793, |
|
"learning_rate": 9.56883870342181e-06, |
|
"loss": 0.4578, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.667447306791569, |
|
"grad_norm": 0.06875967979431152, |
|
"learning_rate": 9.552024207444794e-06, |
|
"loss": 0.4448, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6744730679156908, |
|
"grad_norm": 0.058077067136764526, |
|
"learning_rate": 9.534903441561693e-06, |
|
"loss": 0.5177, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6814988290398126, |
|
"grad_norm": 0.06001827120780945, |
|
"learning_rate": 9.517477557694182e-06, |
|
"loss": 0.5171, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6885245901639344, |
|
"grad_norm": 0.05816899985074997, |
|
"learning_rate": 9.499747728292928e-06, |
|
"loss": 0.5271, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6955503512880562, |
|
"grad_norm": 0.054729413241147995, |
|
"learning_rate": 9.481715146258699e-06, |
|
"loss": 0.446, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.702576112412178, |
|
"grad_norm": 0.055416759103536606, |
|
"learning_rate": 9.463381024862116e-06, |
|
"loss": 0.5345, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7096018735362998, |
|
"grad_norm": 0.06506048887968063, |
|
"learning_rate": 9.444746597662e-06, |
|
"loss": 0.5212, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.7166276346604216, |
|
"grad_norm": 0.052193962037563324, |
|
"learning_rate": 9.425813118422393e-06, |
|
"loss": 0.4809, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.7236533957845434, |
|
"grad_norm": 0.056404754519462585, |
|
"learning_rate": 9.406581861028199e-06, |
|
"loss": 0.5527, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.7306791569086651, |
|
"grad_norm": 0.05873854085803032, |
|
"learning_rate": 9.387054119399466e-06, |
|
"loss": 0.4389, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7377049180327869, |
|
"grad_norm": 0.05391126498579979, |
|
"learning_rate": 9.36723120740434e-06, |
|
"loss": 0.4684, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.7447306791569087, |
|
"grad_norm": 0.06326638162136078, |
|
"learning_rate": 9.347114458770656e-06, |
|
"loss": 0.4912, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7517564402810304, |
|
"grad_norm": 0.05993535369634628, |
|
"learning_rate": 9.326705226996207e-06, |
|
"loss": 0.4747, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.7587822014051522, |
|
"grad_norm": 0.06042395904660225, |
|
"learning_rate": 9.306004885257675e-06, |
|
"loss": 0.477, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.765807962529274, |
|
"grad_norm": 0.059888120740652084, |
|
"learning_rate": 9.28501482631824e-06, |
|
"loss": 0.4948, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.7728337236533958, |
|
"grad_norm": 0.05696633458137512, |
|
"learning_rate": 9.26373646243388e-06, |
|
"loss": 0.5053, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7798594847775175, |
|
"grad_norm": 0.05638626217842102, |
|
"learning_rate": 9.242171225258336e-06, |
|
"loss": 0.4918, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 0.05654750391840935, |
|
"learning_rate": 9.220320565746806e-06, |
|
"loss": 0.4604, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7939110070257611, |
|
"grad_norm": 0.04910074546933174, |
|
"learning_rate": 9.198185954058305e-06, |
|
"loss": 0.486, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.8009367681498829, |
|
"grad_norm": 0.05476020276546478, |
|
"learning_rate": 9.175768879456759e-06, |
|
"loss": 0.4701, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8079625292740047, |
|
"grad_norm": 0.05878998339176178, |
|
"learning_rate": 9.153070850210803e-06, |
|
"loss": 0.4583, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.8149882903981265, |
|
"grad_norm": 0.050011828541755676, |
|
"learning_rate": 9.130093393492302e-06, |
|
"loss": 0.441, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.8220140515222483, |
|
"grad_norm": 0.0516488291323185, |
|
"learning_rate": 9.106838055273589e-06, |
|
"loss": 0.4663, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.8290398126463701, |
|
"grad_norm": 0.058606114238500595, |
|
"learning_rate": 9.083306400223465e-06, |
|
"loss": 0.5017, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8360655737704918, |
|
"grad_norm": 0.05311114713549614, |
|
"learning_rate": 9.059500011601919e-06, |
|
"loss": 0.4615, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.8430913348946136, |
|
"grad_norm": 0.05270574986934662, |
|
"learning_rate": 9.035420491153596e-06, |
|
"loss": 0.4469, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8501170960187353, |
|
"grad_norm": 0.05161169916391373, |
|
"learning_rate": 9.011069459000035e-06, |
|
"loss": 0.4882, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.0560365691781044, |
|
"learning_rate": 8.986448553530665e-06, |
|
"loss": 0.4454, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8641686182669789, |
|
"grad_norm": 0.050510723143815994, |
|
"learning_rate": 8.961559431292562e-06, |
|
"loss": 0.4535, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.8711943793911007, |
|
"grad_norm": 0.05732515826821327, |
|
"learning_rate": 8.936403766879003e-06, |
|
"loss": 0.4369, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8782201405152225, |
|
"grad_norm": 0.05295110121369362, |
|
"learning_rate": 8.910983252816794e-06, |
|
"loss": 0.4286, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8852459016393442, |
|
"grad_norm": 0.050234604626894, |
|
"learning_rate": 8.885299599452381e-06, |
|
"loss": 0.4578, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.892271662763466, |
|
"grad_norm": 0.06070755422115326, |
|
"learning_rate": 8.859354534836797e-06, |
|
"loss": 0.4594, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.8992974238875878, |
|
"grad_norm": 0.05537045747041702, |
|
"learning_rate": 8.833149804609372e-06, |
|
"loss": 0.425, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9063231850117096, |
|
"grad_norm": 0.051650241017341614, |
|
"learning_rate": 8.806687171880298e-06, |
|
"loss": 0.4714, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.9133489461358314, |
|
"grad_norm": 0.051121823489665985, |
|
"learning_rate": 8.779968417111991e-06, |
|
"loss": 0.4549, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9203747072599532, |
|
"grad_norm": 0.056628625839948654, |
|
"learning_rate": 8.752995337999316e-06, |
|
"loss": 0.5337, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.927400468384075, |
|
"grad_norm": 0.05635831505060196, |
|
"learning_rate": 8.725769749348612e-06, |
|
"loss": 0.4747, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9344262295081968, |
|
"grad_norm": 0.055249571800231934, |
|
"learning_rate": 8.698293482955605e-06, |
|
"loss": 0.4773, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.9414519906323185, |
|
"grad_norm": 0.056761760264635086, |
|
"learning_rate": 8.670568387482153e-06, |
|
"loss": 0.4751, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.9484777517564403, |
|
"grad_norm": 0.06461669504642487, |
|
"learning_rate": 8.642596328331864e-06, |
|
"loss": 0.4715, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.955503512880562, |
|
"grad_norm": 0.054414063692092896, |
|
"learning_rate": 8.614379187524593e-06, |
|
"loss": 0.4489, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9625292740046838, |
|
"grad_norm": 0.053149014711380005, |
|
"learning_rate": 8.585918863569806e-06, |
|
"loss": 0.4493, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.9695550351288056, |
|
"grad_norm": 0.052316196262836456, |
|
"learning_rate": 8.55721727133886e-06, |
|
"loss": 0.4691, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9765807962529274, |
|
"grad_norm": 0.052749183028936386, |
|
"learning_rate": 8.528276341936146e-06, |
|
"loss": 0.4877, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 0.05662725865840912, |
|
"learning_rate": 8.499098022569177e-06, |
|
"loss": 0.4579, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.990632318501171, |
|
"grad_norm": 0.05714595317840576, |
|
"learning_rate": 8.469684276417568e-06, |
|
"loss": 0.5069, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.9976580796252927, |
|
"grad_norm": 0.05158586427569389, |
|
"learning_rate": 8.440037082500953e-06, |
|
"loss": 0.4544, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.05158586427569389, |
|
"learning_rate": 8.410158435545825e-06, |
|
"loss": 0.5067, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.0070257611241218, |
|
"grad_norm": 0.10291286557912827, |
|
"learning_rate": 8.380050345851338e-06, |
|
"loss": 0.3535, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.0140515222482436, |
|
"grad_norm": 0.047048892825841904, |
|
"learning_rate": 8.349714839154035e-06, |
|
"loss": 0.3635, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0210772833723654, |
|
"grad_norm": 0.052344005554914474, |
|
"learning_rate": 8.319153956491567e-06, |
|
"loss": 0.3643, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.0281030444964872, |
|
"grad_norm": 0.050871554762125015, |
|
"learning_rate": 8.288369754065362e-06, |
|
"loss": 0.3487, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.035128805620609, |
|
"grad_norm": 0.04831859841942787, |
|
"learning_rate": 8.257364303102275e-06, |
|
"loss": 0.3836, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.0421545667447307, |
|
"grad_norm": 0.05533618479967117, |
|
"learning_rate": 8.226139689715233e-06, |
|
"loss": 0.3699, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.0491803278688525, |
|
"grad_norm": 0.05083422362804413, |
|
"learning_rate": 8.19469801476288e-06, |
|
"loss": 0.4104, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0562060889929743, |
|
"grad_norm": 0.05335497856140137, |
|
"learning_rate": 8.16304139370823e-06, |
|
"loss": 0.3209, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.063231850117096, |
|
"grad_norm": 0.054009512066841125, |
|
"learning_rate": 8.131171956476328e-06, |
|
"loss": 0.3853, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.0702576112412179, |
|
"grad_norm": 0.049055177718400955, |
|
"learning_rate": 8.09909184731094e-06, |
|
"loss": 0.3542, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.0772833723653397, |
|
"grad_norm": 0.06105168163776398, |
|
"learning_rate": 8.066803224630295e-06, |
|
"loss": 0.3527, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.0843091334894615, |
|
"grad_norm": 0.05668722093105316, |
|
"learning_rate": 8.034308260881854e-06, |
|
"loss": 0.3725, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.0913348946135832, |
|
"grad_norm": 0.05870070680975914, |
|
"learning_rate": 8.00160914239615e-06, |
|
"loss": 0.3502, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.098360655737705, |
|
"grad_norm": 0.05561830475926399, |
|
"learning_rate": 7.968708069239672e-06, |
|
"loss": 0.4132, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.1053864168618266, |
|
"grad_norm": 0.05985680967569351, |
|
"learning_rate": 7.935607255066867e-06, |
|
"loss": 0.387, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1124121779859484, |
|
"grad_norm": 0.05309848487377167, |
|
"learning_rate": 7.902308926971166e-06, |
|
"loss": 0.3512, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.1194379391100702, |
|
"grad_norm": 0.057192280888557434, |
|
"learning_rate": 7.868815325335168e-06, |
|
"loss": 0.3755, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.126463700234192, |
|
"grad_norm": 0.06615495681762695, |
|
"learning_rate": 7.835128703679896e-06, |
|
"loss": 0.3666, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.1334894613583137, |
|
"grad_norm": 0.05387312173843384, |
|
"learning_rate": 7.801251328513164e-06, |
|
"loss": 0.3669, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.1405152224824355, |
|
"grad_norm": 0.06274469196796417, |
|
"learning_rate": 7.767185479177092e-06, |
|
"loss": 0.3513, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.1475409836065573, |
|
"grad_norm": 0.06123442202806473, |
|
"learning_rate": 7.732933447694748e-06, |
|
"loss": 0.3789, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.154566744730679, |
|
"grad_norm": 0.05250508710741997, |
|
"learning_rate": 7.698497538615928e-06, |
|
"loss": 0.337, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.161592505854801, |
|
"grad_norm": 0.05276589095592499, |
|
"learning_rate": 7.663880068862106e-06, |
|
"loss": 0.3281, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.1686182669789227, |
|
"grad_norm": 0.05875389277935028, |
|
"learning_rate": 7.629083367570547e-06, |
|
"loss": 0.3786, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.1756440281030445, |
|
"grad_norm": 0.06692057847976685, |
|
"learning_rate": 7.594109775937595e-06, |
|
"loss": 0.3656, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.1826697892271663, |
|
"grad_norm": 0.05434219911694527, |
|
"learning_rate": 7.558961647061156e-06, |
|
"loss": 0.3733, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.189695550351288, |
|
"grad_norm": 0.06000547111034393, |
|
"learning_rate": 7.5236413457823745e-06, |
|
"loss": 0.3174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1967213114754098, |
|
"grad_norm": 0.05446619912981987, |
|
"learning_rate": 7.488151248526518e-06, |
|
"loss": 0.3304, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.2037470725995316, |
|
"grad_norm": 0.05152672156691551, |
|
"learning_rate": 7.452493743143092e-06, |
|
"loss": 0.3353, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2107728337236534, |
|
"grad_norm": 0.06070106849074364, |
|
"learning_rate": 7.416671228745181e-06, |
|
"loss": 0.4031, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.2177985948477752, |
|
"grad_norm": 0.05588310956954956, |
|
"learning_rate": 7.380686115548024e-06, |
|
"loss": 0.3465, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.224824355971897, |
|
"grad_norm": 0.05709127336740494, |
|
"learning_rate": 7.344540824706855e-06, |
|
"loss": 0.3529, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.2318501170960188, |
|
"grad_norm": 0.05777303874492645, |
|
"learning_rate": 7.3082377881540025e-06, |
|
"loss": 0.3622, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.2388758782201406, |
|
"grad_norm": 0.05582602322101593, |
|
"learning_rate": 7.271779448435265e-06, |
|
"loss": 0.3663, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.2459016393442623, |
|
"grad_norm": 0.060280896723270416, |
|
"learning_rate": 7.235168258545569e-06, |
|
"loss": 0.3681, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.2529274004683841, |
|
"grad_norm": 0.059871841222047806, |
|
"learning_rate": 7.198406681763925e-06, |
|
"loss": 0.3706, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.259953161592506, |
|
"grad_norm": 0.06064627692103386, |
|
"learning_rate": 7.161497191487693e-06, |
|
"loss": 0.379, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2669789227166277, |
|
"grad_norm": 0.04873732104897499, |
|
"learning_rate": 7.124442271066174e-06, |
|
"loss": 0.3558, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.2740046838407495, |
|
"grad_norm": 0.06199304386973381, |
|
"learning_rate": 7.087244413633516e-06, |
|
"loss": 0.3575, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.281030444964871, |
|
"grad_norm": 0.058650556951761246, |
|
"learning_rate": 7.049906121940974e-06, |
|
"loss": 0.3685, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.288056206088993, |
|
"grad_norm": 0.06006557121872902, |
|
"learning_rate": 7.012429908188523e-06, |
|
"loss": 0.345, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.2950819672131146, |
|
"grad_norm": 0.059959061443805695, |
|
"learning_rate": 6.9748182938558225e-06, |
|
"loss": 0.3252, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.3021077283372366, |
|
"grad_norm": 0.05382518842816353, |
|
"learning_rate": 6.937073809532581e-06, |
|
"loss": 0.4011, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.3091334894613582, |
|
"grad_norm": 0.05975024029612541, |
|
"learning_rate": 6.899198994748274e-06, |
|
"loss": 0.3351, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.3161592505854802, |
|
"grad_norm": 0.05733481049537659, |
|
"learning_rate": 6.861196397801297e-06, |
|
"loss": 0.309, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.3231850117096018, |
|
"grad_norm": 0.051539335399866104, |
|
"learning_rate": 6.823068575587496e-06, |
|
"loss": 0.3394, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.3302107728337236, |
|
"grad_norm": 0.05479830130934715, |
|
"learning_rate": 6.784818093428144e-06, |
|
"loss": 0.3243, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.3372365339578454, |
|
"grad_norm": 0.05627552792429924, |
|
"learning_rate": 6.746447524897335e-06, |
|
"loss": 0.3534, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.3442622950819672, |
|
"grad_norm": 0.05136909708380699, |
|
"learning_rate": 6.70795945164883e-06, |
|
"loss": 0.36, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.351288056206089, |
|
"grad_norm": 0.06510506570339203, |
|
"learning_rate": 6.6693564632423626e-06, |
|
"loss": 0.3617, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.3583138173302107, |
|
"grad_norm": 0.06433955579996109, |
|
"learning_rate": 6.630641156969397e-06, |
|
"loss": 0.334, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.3653395784543325, |
|
"grad_norm": 0.05501256510615349, |
|
"learning_rate": 6.591816137678388e-06, |
|
"loss": 0.3502, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.3723653395784543, |
|
"grad_norm": 0.06037002056837082, |
|
"learning_rate": 6.552884017599517e-06, |
|
"loss": 0.3673, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.379391100702576, |
|
"grad_norm": 0.06693354994058609, |
|
"learning_rate": 6.513847416168929e-06, |
|
"loss": 0.3842, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.3864168618266979, |
|
"grad_norm": 0.06090663745999336, |
|
"learning_rate": 6.474708959852504e-06, |
|
"loss": 0.31, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.3934426229508197, |
|
"grad_norm": 0.05243751406669617, |
|
"learning_rate": 6.435471281969133e-06, |
|
"loss": 0.329, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.4004683840749415, |
|
"grad_norm": 0.057668376713991165, |
|
"learning_rate": 6.396137022513545e-06, |
|
"loss": 0.3504, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4074941451990632, |
|
"grad_norm": 0.05978507921099663, |
|
"learning_rate": 6.3567088279786885e-06, |
|
"loss": 0.3664, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.414519906323185, |
|
"grad_norm": 0.0557040311396122, |
|
"learning_rate": 6.317189351177657e-06, |
|
"loss": 0.3667, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.4215456674473068, |
|
"grad_norm": 0.06627603620290756, |
|
"learning_rate": 6.277581251065217e-06, |
|
"loss": 0.3627, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.05487382784485817, |
|
"learning_rate": 6.237887192558894e-06, |
|
"loss": 0.3806, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.4355971896955504, |
|
"grad_norm": 0.06525876373052597, |
|
"learning_rate": 6.198109846359682e-06, |
|
"loss": 0.3483, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.4426229508196722, |
|
"grad_norm": 0.061048902571201324, |
|
"learning_rate": 6.15825188877235e-06, |
|
"loss": 0.4125, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.449648711943794, |
|
"grad_norm": 0.057914849370718, |
|
"learning_rate": 6.118316001525368e-06, |
|
"loss": 0.3748, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.4566744730679158, |
|
"grad_norm": 0.0596294067800045, |
|
"learning_rate": 6.078304871590485e-06, |
|
"loss": 0.3302, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.4637002341920375, |
|
"grad_norm": 0.056739531457424164, |
|
"learning_rate": 6.038221191001935e-06, |
|
"loss": 0.3529, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.4707259953161593, |
|
"grad_norm": 0.050509966909885406, |
|
"learning_rate": 5.998067656675318e-06, |
|
"loss": 0.3776, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4777517564402811, |
|
"grad_norm": 0.05876694247126579, |
|
"learning_rate": 5.95784697022614e-06, |
|
"loss": 0.3552, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.4847775175644027, |
|
"grad_norm": 0.05890351161360741, |
|
"learning_rate": 5.917561837788046e-06, |
|
"loss": 0.3556, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.4918032786885247, |
|
"grad_norm": 0.05058097094297409, |
|
"learning_rate": 5.877214969830746e-06, |
|
"loss": 0.3184, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.4988290398126463, |
|
"grad_norm": 0.057646844536066055, |
|
"learning_rate": 5.836809080977644e-06, |
|
"loss": 0.3577, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.5058548009367683, |
|
"grad_norm": 0.05617011711001396, |
|
"learning_rate": 5.7963468898232026e-06, |
|
"loss": 0.351, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.5128805620608898, |
|
"grad_norm": 0.07923811674118042, |
|
"learning_rate": 5.755831118750016e-06, |
|
"loss": 0.3816, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.5199063231850118, |
|
"grad_norm": 0.05769532546401024, |
|
"learning_rate": 5.715264493745652e-06, |
|
"loss": 0.355, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.5269320843091334, |
|
"grad_norm": 0.05954836681485176, |
|
"learning_rate": 5.6746497442192425e-06, |
|
"loss": 0.3514, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.5339578454332554, |
|
"grad_norm": 0.056089069694280624, |
|
"learning_rate": 5.633989602817837e-06, |
|
"loss": 0.3369, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.540983606557377, |
|
"grad_norm": 0.0603008046746254, |
|
"learning_rate": 5.593286805242549e-06, |
|
"loss": 0.3669, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.548009367681499, |
|
"grad_norm": 0.06486702710390091, |
|
"learning_rate": 5.552544090064487e-06, |
|
"loss": 0.3657, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.5550351288056206, |
|
"grad_norm": 0.06320104748010635, |
|
"learning_rate": 5.5117641985405055e-06, |
|
"loss": 0.3354, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.5620608899297423, |
|
"grad_norm": 0.05017773061990738, |
|
"learning_rate": 5.47094987442876e-06, |
|
"loss": 0.3442, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.5690866510538641, |
|
"grad_norm": 0.05650470405817032, |
|
"learning_rate": 5.430103863804107e-06, |
|
"loss": 0.3522, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.576112412177986, |
|
"grad_norm": 0.061710115522146225, |
|
"learning_rate": 5.389228914873334e-06, |
|
"loss": 0.3705, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.5831381733021077, |
|
"grad_norm": 0.04975937306880951, |
|
"learning_rate": 5.348327777790262e-06, |
|
"loss": 0.3351, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.5901639344262295, |
|
"grad_norm": 0.054747324436903, |
|
"learning_rate": 5.307403204470711e-06, |
|
"loss": 0.3588, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.5971896955503513, |
|
"grad_norm": 0.06445419788360596, |
|
"learning_rate": 5.266457948407336e-06, |
|
"loss": 0.3728, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.604215456674473, |
|
"grad_norm": 0.0565023347735405, |
|
"learning_rate": 5.2254947644843735e-06, |
|
"loss": 0.3523, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.6112412177985949, |
|
"grad_norm": 0.05139541998505592, |
|
"learning_rate": 5.18451640879228e-06, |
|
"loss": 0.3392, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.6182669789227166, |
|
"grad_norm": 0.05746756121516228, |
|
"learning_rate": 5.14352563844231e-06, |
|
"loss": 0.2981, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.6252927400468384, |
|
"grad_norm": 0.05003352463245392, |
|
"learning_rate": 5.1025252113809945e-06, |
|
"loss": 0.3195, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.6323185011709602, |
|
"grad_norm": 0.05454004183411598, |
|
"learning_rate": 5.061517886204592e-06, |
|
"loss": 0.3319, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 0.054113056510686874, |
|
"learning_rate": 5.02050642197348e-06, |
|
"loss": 0.3514, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.6463700234192038, |
|
"grad_norm": 0.06152534484863281, |
|
"learning_rate": 4.979493578026523e-06, |
|
"loss": 0.3455, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.6533957845433256, |
|
"grad_norm": 0.05544520169496536, |
|
"learning_rate": 4.9384821137954106e-06, |
|
"loss": 0.3751, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.6604215456674472, |
|
"grad_norm": 0.07202091068029404, |
|
"learning_rate": 4.897474788619007e-06, |
|
"loss": 0.3418, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.6674473067915692, |
|
"grad_norm": 0.059407323598861694, |
|
"learning_rate": 4.856474361557692e-06, |
|
"loss": 0.3178, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.6744730679156907, |
|
"grad_norm": 0.051997967064380646, |
|
"learning_rate": 4.815483591207721e-06, |
|
"loss": 0.3754, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.6814988290398127, |
|
"grad_norm": 0.05671803653240204, |
|
"learning_rate": 4.774505235515628e-06, |
|
"loss": 0.3784, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6885245901639343, |
|
"grad_norm": 0.056249409914016724, |
|
"learning_rate": 4.733542051592665e-06, |
|
"loss": 0.3327, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.6955503512880563, |
|
"grad_norm": 0.05896645039319992, |
|
"learning_rate": 4.69259679552929e-06, |
|
"loss": 0.3588, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.7025761124121779, |
|
"grad_norm": 0.070353664457798, |
|
"learning_rate": 4.651672222209738e-06, |
|
"loss": 0.3816, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.7096018735362999, |
|
"grad_norm": 0.05775173380970955, |
|
"learning_rate": 4.6107710851266695e-06, |
|
"loss": 0.3384, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.7166276346604215, |
|
"grad_norm": 0.05771046131849289, |
|
"learning_rate": 4.5698961361958955e-06, |
|
"loss": 0.3377, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.7236533957845435, |
|
"grad_norm": 0.055200010538101196, |
|
"learning_rate": 4.529050125571241e-06, |
|
"loss": 0.3456, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.730679156908665, |
|
"grad_norm": 0.05298285186290741, |
|
"learning_rate": 4.488235801459495e-06, |
|
"loss": 0.3166, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.737704918032787, |
|
"grad_norm": 0.05973465368151665, |
|
"learning_rate": 4.447455909935513e-06, |
|
"loss": 0.3711, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.7447306791569086, |
|
"grad_norm": 0.07082070410251617, |
|
"learning_rate": 4.4067131947574515e-06, |
|
"loss": 0.335, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.7517564402810304, |
|
"grad_norm": 0.05250892415642738, |
|
"learning_rate": 4.3660103971821635e-06, |
|
"loss": 0.3443, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.7587822014051522, |
|
"grad_norm": 0.06379300355911255, |
|
"learning_rate": 4.3253502557807575e-06, |
|
"loss": 0.3399, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.765807962529274, |
|
"grad_norm": 0.058025211095809937, |
|
"learning_rate": 4.28473550625435e-06, |
|
"loss": 0.3706, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.7728337236533958, |
|
"grad_norm": 0.05636170506477356, |
|
"learning_rate": 4.244168881249986e-06, |
|
"loss": 0.3737, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.7798594847775175, |
|
"grad_norm": 0.05882354453206062, |
|
"learning_rate": 4.203653110176798e-06, |
|
"loss": 0.3033, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.7868852459016393, |
|
"grad_norm": 0.05535350739955902, |
|
"learning_rate": 4.163190919022357e-06, |
|
"loss": 0.338, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.7939110070257611, |
|
"grad_norm": 0.0554145909845829, |
|
"learning_rate": 4.122785030169256e-06, |
|
"loss": 0.371, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.800936768149883, |
|
"grad_norm": 0.05324379727244377, |
|
"learning_rate": 4.082438162211955e-06, |
|
"loss": 0.3402, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.8079625292740047, |
|
"grad_norm": 0.06222432479262352, |
|
"learning_rate": 4.042153029773861e-06, |
|
"loss": 0.3405, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.8149882903981265, |
|
"grad_norm": 0.054615411907434464, |
|
"learning_rate": 4.001932343324683e-06, |
|
"loss": 0.3555, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.8220140515222483, |
|
"grad_norm": 0.0694437026977539, |
|
"learning_rate": 3.961778808998066e-06, |
|
"loss": 0.3863, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.82903981264637, |
|
"grad_norm": 0.05631214752793312, |
|
"learning_rate": 3.921695128409517e-06, |
|
"loss": 0.38, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.8360655737704918, |
|
"grad_norm": 0.05278317630290985, |
|
"learning_rate": 3.8816839984746334e-06, |
|
"loss": 0.3073, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.8430913348946136, |
|
"grad_norm": 0.05563074350357056, |
|
"learning_rate": 3.841748111227652e-06, |
|
"loss": 0.3417, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.8501170960187352, |
|
"grad_norm": 0.0652734711766243, |
|
"learning_rate": 3.8018901536403198e-06, |
|
"loss": 0.3819, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 0.05986921489238739, |
|
"learning_rate": 3.762112807441108e-06, |
|
"loss": 0.3887, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.8641686182669788, |
|
"grad_norm": 0.060439128428697586, |
|
"learning_rate": 3.7224187489347847e-06, |
|
"loss": 0.3564, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.8711943793911008, |
|
"grad_norm": 0.06072353571653366, |
|
"learning_rate": 3.682810648822343e-06, |
|
"loss": 0.3868, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.8782201405152223, |
|
"grad_norm": 0.053765103220939636, |
|
"learning_rate": 3.6432911720213127e-06, |
|
"loss": 0.3699, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.8852459016393444, |
|
"grad_norm": 0.05896330624818802, |
|
"learning_rate": 3.6038629774864563e-06, |
|
"loss": 0.3384, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.892271662763466, |
|
"grad_norm": 0.06258895993232727, |
|
"learning_rate": 3.56452871803087e-06, |
|
"loss": 0.3837, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.899297423887588, |
|
"grad_norm": 0.06024617701768875, |
|
"learning_rate": 3.525291040147498e-06, |
|
"loss": 0.3078, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.9063231850117095, |
|
"grad_norm": 0.0663781389594078, |
|
"learning_rate": 3.486152583831072e-06, |
|
"loss": 0.3401, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.9133489461358315, |
|
"grad_norm": 0.0578744150698185, |
|
"learning_rate": 3.447115982400485e-06, |
|
"loss": 0.3572, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.920374707259953, |
|
"grad_norm": 0.05523526668548584, |
|
"learning_rate": 3.4081838623216124e-06, |
|
"loss": 0.381, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.927400468384075, |
|
"grad_norm": 0.0603664331138134, |
|
"learning_rate": 3.3693588430306035e-06, |
|
"loss": 0.3118, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.9344262295081966, |
|
"grad_norm": 0.05586825683712959, |
|
"learning_rate": 3.330643536757638e-06, |
|
"loss": 0.3449, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.9414519906323187, |
|
"grad_norm": 0.05573516711592674, |
|
"learning_rate": 3.2920405483511702e-06, |
|
"loss": 0.3405, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.9484777517564402, |
|
"grad_norm": 0.058013953268527985, |
|
"learning_rate": 3.253552475102668e-06, |
|
"loss": 0.3462, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.955503512880562, |
|
"grad_norm": 0.055303193628787994, |
|
"learning_rate": 3.215181906571858e-06, |
|
"loss": 0.3719, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.9625292740046838, |
|
"grad_norm": 0.05314116179943085, |
|
"learning_rate": 3.1769314244125056e-06, |
|
"loss": 0.3472, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.9695550351288056, |
|
"grad_norm": 0.06574741005897522, |
|
"learning_rate": 3.1388036021987047e-06, |
|
"loss": 0.3987, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.9765807962529274, |
|
"grad_norm": 0.053546082228422165, |
|
"learning_rate": 3.100801005251727e-06, |
|
"loss": 0.3384, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.9836065573770492, |
|
"grad_norm": 0.06009920313954353, |
|
"learning_rate": 3.0629261904674206e-06, |
|
"loss": 0.3563, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.990632318501171, |
|
"grad_norm": 0.05714387819170952, |
|
"learning_rate": 3.025181706144178e-06, |
|
"loss": 0.3169, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.9976580796252927, |
|
"grad_norm": 0.05756373330950737, |
|
"learning_rate": 2.987570091811479e-06, |
|
"loss": 0.3797, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.05756373330950737, |
|
"learning_rate": 2.9500938780590276e-06, |
|
"loss": 0.3294, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.0070257611241216, |
|
"grad_norm": 0.11346685141324997, |
|
"learning_rate": 2.9127555863664857e-06, |
|
"loss": 0.2824, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.0140515222482436, |
|
"grad_norm": 0.11799391359090805, |
|
"learning_rate": 2.8755577289338267e-06, |
|
"loss": 0.2677, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.021077283372365, |
|
"grad_norm": 0.0632392093539238, |
|
"learning_rate": 2.838502808512309e-06, |
|
"loss": 0.2369, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.028103044496487, |
|
"grad_norm": 0.07154154777526855, |
|
"learning_rate": 2.801593318236078e-06, |
|
"loss": 0.2623, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0351288056206087, |
|
"grad_norm": 0.060281310230493546, |
|
"learning_rate": 2.764831741454432e-06, |
|
"loss": 0.286, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.0421545667447307, |
|
"grad_norm": 0.06496189534664154, |
|
"learning_rate": 2.7282205515647348e-06, |
|
"loss": 0.2172, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.0491803278688523, |
|
"grad_norm": 0.05627848207950592, |
|
"learning_rate": 2.6917622118459975e-06, |
|
"loss": 0.2247, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.0562060889929743, |
|
"grad_norm": 0.061047762632369995, |
|
"learning_rate": 2.655459175293146e-06, |
|
"loss": 0.2094, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.063231850117096, |
|
"grad_norm": 0.05773235112428665, |
|
"learning_rate": 2.6193138844519785e-06, |
|
"loss": 0.273, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.070257611241218, |
|
"grad_norm": 0.0726071298122406, |
|
"learning_rate": 2.58332877125482e-06, |
|
"loss": 0.2392, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.0772833723653394, |
|
"grad_norm": 0.06737970560789108, |
|
"learning_rate": 2.5475062568569077e-06, |
|
"loss": 0.2721, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.0843091334894615, |
|
"grad_norm": 0.1003628745675087, |
|
"learning_rate": 2.511848751473485e-06, |
|
"loss": 0.2392, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.091334894613583, |
|
"grad_norm": 0.05960209295153618, |
|
"learning_rate": 2.476358654217627e-06, |
|
"loss": 0.2195, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.098360655737705, |
|
"grad_norm": 0.07600712776184082, |
|
"learning_rate": 2.4410383529388448e-06, |
|
"loss": 0.2397, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.1053864168618266, |
|
"grad_norm": 0.05939944460988045, |
|
"learning_rate": 2.405890224062406e-06, |
|
"loss": 0.2456, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.1124121779859486, |
|
"grad_norm": 0.05378476157784462, |
|
"learning_rate": 2.370916632429455e-06, |
|
"loss": 0.2124, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.11943793911007, |
|
"grad_norm": 0.07477736473083496, |
|
"learning_rate": 2.336119931137897e-06, |
|
"loss": 0.2426, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.126463700234192, |
|
"grad_norm": 0.06331060081720352, |
|
"learning_rate": 2.3015024613840742e-06, |
|
"loss": 0.2446, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.1334894613583137, |
|
"grad_norm": 0.059247083961963654, |
|
"learning_rate": 2.2670665523052534e-06, |
|
"loss": 0.2388, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.1405152224824358, |
|
"grad_norm": 0.055599454790353775, |
|
"learning_rate": 2.2328145208229096e-06, |
|
"loss": 0.2119, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.1475409836065573, |
|
"grad_norm": 0.05521732196211815, |
|
"learning_rate": 2.1987486714868384e-06, |
|
"loss": 0.2212, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.1545667447306793, |
|
"grad_norm": 0.0638333410024643, |
|
"learning_rate": 2.164871296320106e-06, |
|
"loss": 0.2423, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.161592505854801, |
|
"grad_norm": 0.06265348196029663, |
|
"learning_rate": 2.1311846746648325e-06, |
|
"loss": 0.214, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.168618266978923, |
|
"grad_norm": 0.0508870929479599, |
|
"learning_rate": 2.097691073028836e-06, |
|
"loss": 0.2307, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.1756440281030445, |
|
"grad_norm": 0.05442043021321297, |
|
"learning_rate": 2.064392744933135e-06, |
|
"loss": 0.2381, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.1826697892271665, |
|
"grad_norm": 0.06381048262119293, |
|
"learning_rate": 2.0312919307603286e-06, |
|
"loss": 0.2056, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.189695550351288, |
|
"grad_norm": 0.05382630601525307, |
|
"learning_rate": 1.998390857603853e-06, |
|
"loss": 0.2282, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.19672131147541, |
|
"grad_norm": 0.06103895604610443, |
|
"learning_rate": 1.965691739118146e-06, |
|
"loss": 0.2176, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.2037470725995316, |
|
"grad_norm": 0.07506411522626877, |
|
"learning_rate": 1.9331967753697077e-06, |
|
"loss": 0.2111, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.210772833723653, |
|
"grad_norm": 0.061480604112148285, |
|
"learning_rate": 1.9009081526890622e-06, |
|
"loss": 0.2162, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.217798594847775, |
|
"grad_norm": 0.0700734481215477, |
|
"learning_rate": 1.8688280435236732e-06, |
|
"loss": 0.2104, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.2248243559718968, |
|
"grad_norm": 0.06788410246372223, |
|
"learning_rate": 1.8369586062917693e-06, |
|
"loss": 0.2528, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.2318501170960188, |
|
"grad_norm": 0.06086277589201927, |
|
"learning_rate": 1.8053019852371195e-06, |
|
"loss": 0.2433, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.2388758782201403, |
|
"grad_norm": 0.06438933312892914, |
|
"learning_rate": 1.7738603102847696e-06, |
|
"loss": 0.2306, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.2459016393442623, |
|
"grad_norm": 0.05693851783871651, |
|
"learning_rate": 1.7426356968977265e-06, |
|
"loss": 0.2603, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.252927400468384, |
|
"grad_norm": 0.07242682576179504, |
|
"learning_rate": 1.711630245934638e-06, |
|
"loss": 0.2595, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.259953161592506, |
|
"grad_norm": 0.057473134249448776, |
|
"learning_rate": 1.6808460435084316e-06, |
|
"loss": 0.2465, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.2669789227166275, |
|
"grad_norm": 0.060898784548044205, |
|
"learning_rate": 1.6502851608459668e-06, |
|
"loss": 0.2364, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.2740046838407495, |
|
"grad_norm": 0.05470450222492218, |
|
"learning_rate": 1.6199496541486647e-06, |
|
"loss": 0.2162, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.281030444964871, |
|
"grad_norm": 0.058283645659685135, |
|
"learning_rate": 1.589841564454176e-06, |
|
"loss": 0.2432, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.288056206088993, |
|
"grad_norm": 0.05749303847551346, |
|
"learning_rate": 1.5599629174990482e-06, |
|
"loss": 0.2493, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.2950819672131146, |
|
"grad_norm": 0.0628993809223175, |
|
"learning_rate": 1.5303157235824323e-06, |
|
"loss": 0.227, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.3021077283372366, |
|
"grad_norm": 0.054379165172576904, |
|
"learning_rate": 1.5009019774308249e-06, |
|
"loss": 0.2256, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.309133489461358, |
|
"grad_norm": 0.0579175241291523, |
|
"learning_rate": 1.471723658063856e-06, |
|
"loss": 0.2536, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.3161592505854802, |
|
"grad_norm": 0.06285049021244049, |
|
"learning_rate": 1.4427827286611412e-06, |
|
"loss": 0.2514, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.323185011709602, |
|
"grad_norm": 0.06163005530834198, |
|
"learning_rate": 1.4140811364301931e-06, |
|
"loss": 0.1979, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.330210772833724, |
|
"grad_norm": 0.06147882342338562, |
|
"learning_rate": 1.385620812475409e-06, |
|
"loss": 0.2382, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.3372365339578454, |
|
"grad_norm": 0.06549369543790817, |
|
"learning_rate": 1.3574036716681366e-06, |
|
"loss": 0.2688, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.3442622950819674, |
|
"grad_norm": 0.05481060966849327, |
|
"learning_rate": 1.3294316125178474e-06, |
|
"loss": 0.2419, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.351288056206089, |
|
"grad_norm": 0.059850070625543594, |
|
"learning_rate": 1.301706517044395e-06, |
|
"loss": 0.2359, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.358313817330211, |
|
"grad_norm": 0.05873354524374008, |
|
"learning_rate": 1.2742302506513894e-06, |
|
"loss": 0.2394, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.3653395784543325, |
|
"grad_norm": 0.06211516261100769, |
|
"learning_rate": 1.247004662000686e-06, |
|
"loss": 0.2564, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.3723653395784545, |
|
"grad_norm": 0.06391850858926773, |
|
"learning_rate": 1.2200315828880094e-06, |
|
"loss": 0.241, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.379391100702576, |
|
"grad_norm": 0.07204084098339081, |
|
"learning_rate": 1.1933128281197042e-06, |
|
"loss": 0.2272, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.3864168618266977, |
|
"grad_norm": 0.06479175388813019, |
|
"learning_rate": 1.166850195390628e-06, |
|
"loss": 0.2684, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.3934426229508197, |
|
"grad_norm": 0.06499191373586655, |
|
"learning_rate": 1.1406454651632042e-06, |
|
"loss": 0.2646, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.4004683840749417, |
|
"grad_norm": 0.06663113832473755, |
|
"learning_rate": 1.1147004005476192e-06, |
|
"loss": 0.2644, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.4074941451990632, |
|
"grad_norm": 0.0672060027718544, |
|
"learning_rate": 1.089016747183208e-06, |
|
"loss": 0.2051, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.414519906323185, |
|
"grad_norm": 0.055617500096559525, |
|
"learning_rate": 1.063596233120997e-06, |
|
"loss": 0.2343, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.421545667447307, |
|
"grad_norm": 0.05767429992556572, |
|
"learning_rate": 1.03844056870744e-06, |
|
"loss": 0.248, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 0.05945609137415886, |
|
"learning_rate": 1.013551446469337e-06, |
|
"loss": 0.2329, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.4355971896955504, |
|
"grad_norm": 0.061543092131614685, |
|
"learning_rate": 9.889305409999656e-07, |
|
"loss": 0.2217, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.442622950819672, |
|
"grad_norm": 0.06861676275730133, |
|
"learning_rate": 9.64579508846405e-07, |
|
"loss": 0.2869, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.449648711943794, |
|
"grad_norm": 0.06629474461078644, |
|
"learning_rate": 9.40499988398082e-07, |
|
"loss": 0.2398, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.4566744730679155, |
|
"grad_norm": 0.06839589029550552, |
|
"learning_rate": 9.166935997765364e-07, |
|
"loss": 0.2631, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.4637002341920375, |
|
"grad_norm": 0.07068444043397903, |
|
"learning_rate": 8.93161944726414e-07, |
|
"loss": 0.2439, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.470725995316159, |
|
"grad_norm": 0.06256645917892456, |
|
"learning_rate": 8.699066065077005e-07, |
|
"loss": 0.243, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.477751756440281, |
|
"grad_norm": 0.053054191172122955, |
|
"learning_rate": 8.469291497891979e-07, |
|
"loss": 0.2405, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.4847775175644027, |
|
"grad_norm": 0.0644262358546257, |
|
"learning_rate": 8.242311205432418e-07, |
|
"loss": 0.2141, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.4918032786885247, |
|
"grad_norm": 0.05528811737895012, |
|
"learning_rate": 8.018140459416962e-07, |
|
"loss": 0.2511, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.4988290398126463, |
|
"grad_norm": 0.06793423742055893, |
|
"learning_rate": 7.796794342531949e-07, |
|
"loss": 0.2414, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.5058548009367683, |
|
"grad_norm": 0.06469738483428955, |
|
"learning_rate": 7.57828774741664e-07, |
|
"loss": 0.2389, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.51288056206089, |
|
"grad_norm": 0.05862313508987427, |
|
"learning_rate": 7.362635375661225e-07, |
|
"loss": 0.2483, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.519906323185012, |
|
"grad_norm": 0.06477522104978561, |
|
"learning_rate": 7.149851736817609e-07, |
|
"loss": 0.2725, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.5269320843091334, |
|
"grad_norm": 0.06782645732164383, |
|
"learning_rate": 6.939951147423269e-07, |
|
"loss": 0.2171, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.5339578454332554, |
|
"grad_norm": 0.06721869856119156, |
|
"learning_rate": 6.732947730037936e-07, |
|
"loss": 0.2272, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.540983606557377, |
|
"grad_norm": 0.06349222362041473, |
|
"learning_rate": 6.52885541229345e-07, |
|
"loss": 0.2361, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.548009367681499, |
|
"grad_norm": 0.0673484057188034, |
|
"learning_rate": 6.327687925956616e-07, |
|
"loss": 0.2242, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.5550351288056206, |
|
"grad_norm": 0.05453097075223923, |
|
"learning_rate": 6.12945880600535e-07, |
|
"loss": 0.2066, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.562060889929742, |
|
"grad_norm": 0.05890359729528427, |
|
"learning_rate": 5.93418138971803e-07, |
|
"loss": 0.27, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.569086651053864, |
|
"grad_norm": 0.06143670156598091, |
|
"learning_rate": 5.741868815776081e-07, |
|
"loss": 0.2532, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.576112412177986, |
|
"grad_norm": 0.06809256970882416, |
|
"learning_rate": 5.552534023380024e-07, |
|
"loss": 0.2507, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.5831381733021077, |
|
"grad_norm": 0.06404256820678711, |
|
"learning_rate": 5.366189751378858e-07, |
|
"loss": 0.2079, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.5901639344262293, |
|
"grad_norm": 0.05889306962490082, |
|
"learning_rate": 5.18284853741301e-07, |
|
"loss": 0.2592, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.5971896955503513, |
|
"grad_norm": 0.06670808047056198, |
|
"learning_rate": 5.002522717070751e-07, |
|
"loss": 0.2246, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.6042154566744733, |
|
"grad_norm": 0.060436200350522995, |
|
"learning_rate": 4.8252244230582e-07, |
|
"loss": 0.2291, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.611241217798595, |
|
"grad_norm": 0.06188951060175896, |
|
"learning_rate": 4.6509655843830827e-07, |
|
"loss": 0.2375, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.6182669789227164, |
|
"grad_norm": 0.06185289844870567, |
|
"learning_rate": 4.4797579255520585e-07, |
|
"loss": 0.2412, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.6252927400468384, |
|
"grad_norm": 0.061204344034194946, |
|
"learning_rate": 4.311612965781903e-07, |
|
"loss": 0.2453, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.6323185011709604, |
|
"grad_norm": 0.0526043102145195, |
|
"learning_rate": 4.1465420182244476e-07, |
|
"loss": 0.2399, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.639344262295082, |
|
"grad_norm": 0.06435679644346237, |
|
"learning_rate": 3.984556189205441e-07, |
|
"loss": 0.2267, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.6463700234192036, |
|
"grad_norm": 0.06311339884996414, |
|
"learning_rate": 3.8256663774772383e-07, |
|
"loss": 0.2584, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.6533957845433256, |
|
"grad_norm": 0.06288459897041321, |
|
"learning_rate": 3.669883273485575e-07, |
|
"loss": 0.2621, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.660421545667447, |
|
"grad_norm": 0.06034472957253456, |
|
"learning_rate": 3.5172173586502543e-07, |
|
"loss": 0.2132, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.667447306791569, |
|
"grad_norm": 0.06609684228897095, |
|
"learning_rate": 3.3676789046599045e-07, |
|
"loss": 0.219, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.6744730679156907, |
|
"grad_norm": 0.06798077374696732, |
|
"learning_rate": 3.2212779727809504e-07, |
|
"loss": 0.2571, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.6814988290398127, |
|
"grad_norm": 0.06018667295575142, |
|
"learning_rate": 3.0780244131806193e-07, |
|
"loss": 0.2567, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.6885245901639343, |
|
"grad_norm": 0.06950001418590546, |
|
"learning_rate": 2.937927864264206e-07, |
|
"loss": 0.2567, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.6955503512880563, |
|
"grad_norm": 0.06350544840097427, |
|
"learning_rate": 2.800997752026596e-07, |
|
"loss": 0.2359, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.702576112412178, |
|
"grad_norm": 0.06425828486680984, |
|
"learning_rate": 2.667243289418059e-07, |
|
"loss": 0.2229, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.7096018735363, |
|
"grad_norm": 0.061528194695711136, |
|
"learning_rate": 2.5366734757243496e-07, |
|
"loss": 0.1988, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.7166276346604215, |
|
"grad_norm": 0.06735736131668091, |
|
"learning_rate": 2.4092970959612885e-07, |
|
"loss": 0.2168, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.7236533957845435, |
|
"grad_norm": 0.051086440682411194, |
|
"learning_rate": 2.2851227202836002e-07, |
|
"loss": 0.2529, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.730679156908665, |
|
"grad_norm": 0.06252393126487732, |
|
"learning_rate": 2.1641587034083756e-07, |
|
"loss": 0.2351, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.737704918032787, |
|
"grad_norm": 0.05734705179929733, |
|
"learning_rate": 2.0464131840528978e-07, |
|
"loss": 0.2152, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.7447306791569086, |
|
"grad_norm": 0.055918820202350616, |
|
"learning_rate": 1.9318940843870594e-07, |
|
"loss": 0.2105, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.7517564402810306, |
|
"grad_norm": 0.05873579904437065, |
|
"learning_rate": 1.8206091095003543e-07, |
|
"loss": 0.2531, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.758782201405152, |
|
"grad_norm": 0.059994373470544815, |
|
"learning_rate": 1.7125657468834656e-07, |
|
"loss": 0.222, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.7658079625292737, |
|
"grad_norm": 0.05768108740448952, |
|
"learning_rate": 1.6077712659244792e-07, |
|
"loss": 0.2338, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.7728337236533958, |
|
"grad_norm": 0.0613434873521328, |
|
"learning_rate": 1.5062327174197645e-07, |
|
"loss": 0.2134, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.7798594847775178, |
|
"grad_norm": 0.0738491341471672, |
|
"learning_rate": 1.4079569330996412e-07, |
|
"loss": 0.2774, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.7868852459016393, |
|
"grad_norm": 0.0638870969414711, |
|
"learning_rate": 1.3129505251686603e-07, |
|
"loss": 0.2163, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.793911007025761, |
|
"grad_norm": 0.06510435044765472, |
|
"learning_rate": 1.2212198858607694e-07, |
|
"loss": 0.2268, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.800936768149883, |
|
"grad_norm": 0.060825150460004807, |
|
"learning_rate": 1.1327711870091963e-07, |
|
"loss": 0.2397, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.807962529274005, |
|
"grad_norm": 0.06411723047494888, |
|
"learning_rate": 1.0476103796312254e-07, |
|
"loss": 0.2509, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.8149882903981265, |
|
"grad_norm": 0.06250890344381332, |
|
"learning_rate": 9.657431935277629e-08, |
|
"loss": 0.2432, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.822014051522248, |
|
"grad_norm": 0.05637110397219658, |
|
"learning_rate": 8.871751368978554e-08, |
|
"loss": 0.2307, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.82903981264637, |
|
"grad_norm": 0.0648709312081337, |
|
"learning_rate": 8.119114959680929e-08, |
|
"loss": 0.2077, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.836065573770492, |
|
"grad_norm": 0.07058855891227722, |
|
"learning_rate": 7.399573346368871e-08, |
|
"loss": 0.2623, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.8430913348946136, |
|
"grad_norm": 0.06563594192266464, |
|
"learning_rate": 6.713174941338163e-08, |
|
"loss": 0.2555, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.850117096018735, |
|
"grad_norm": 0.06609778106212616, |
|
"learning_rate": 6.05996592693886e-08, |
|
"loss": 0.1752, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.053535059094429016, |
|
"learning_rate": 5.439990252467886e-08, |
|
"loss": 0.2673, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.8641686182669788, |
|
"grad_norm": 0.05849752202630043, |
|
"learning_rate": 4.853289631212066e-08, |
|
"loss": 0.2596, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.871194379391101, |
|
"grad_norm": 0.06690798699855804, |
|
"learning_rate": 4.299903537641703e-08, |
|
"loss": 0.2261, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.8782201405152223, |
|
"grad_norm": 0.06300397962331772, |
|
"learning_rate": 3.779869204754427e-08, |
|
"loss": 0.2502, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.8852459016393444, |
|
"grad_norm": 0.06787782162427902, |
|
"learning_rate": 3.2932216215704195e-08, |
|
"loss": 0.2246, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.892271662763466, |
|
"grad_norm": 0.05957074463367462, |
|
"learning_rate": 2.8399935307778516e-08, |
|
"loss": 0.2237, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.899297423887588, |
|
"grad_norm": 0.06010279804468155, |
|
"learning_rate": 2.420215426530259e-08, |
|
"loss": 0.2146, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.9063231850117095, |
|
"grad_norm": 0.07070771604776382, |
|
"learning_rate": 2.0339155523945164e-08, |
|
"loss": 0.2146, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.9133489461358315, |
|
"grad_norm": 0.06385600566864014, |
|
"learning_rate": 1.681119899450856e-08, |
|
"loss": 0.2336, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.920374707259953, |
|
"grad_norm": 0.06282834708690643, |
|
"learning_rate": 1.3618522045439897e-08, |
|
"loss": 0.2257, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.927400468384075, |
|
"grad_norm": 0.06092951446771622, |
|
"learning_rate": 1.0761339486859424e-08, |
|
"loss": 0.2299, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.9344262295081966, |
|
"grad_norm": 0.05854687839746475, |
|
"learning_rate": 8.239843556108739e-09, |
|
"loss": 0.191, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.9414519906323187, |
|
"grad_norm": 0.06888988614082336, |
|
"learning_rate": 6.054203904817812e-09, |
|
"loss": 0.2607, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.9484777517564402, |
|
"grad_norm": 0.059768833220005035, |
|
"learning_rate": 4.204567587486885e-09, |
|
"loss": 0.261, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.9555035128805622, |
|
"grad_norm": 0.06158865615725517, |
|
"learning_rate": 2.6910590515966117e-09, |
|
"loss": 0.2063, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.962529274004684, |
|
"grad_norm": 0.06061291694641113, |
|
"learning_rate": 1.5137801292325338e-09, |
|
"loss": 0.2551, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.9695550351288054, |
|
"grad_norm": 0.0802014172077179, |
|
"learning_rate": 6.728100302327844e-10, |
|
"loss": 0.2163, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.9765807962529274, |
|
"grad_norm": 0.05840716511011124, |
|
"learning_rate": 1.6820533686179308e-10, |
|
"loss": 0.2531, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.9836065573770494, |
|
"grad_norm": 0.06497234106063843, |
|
"learning_rate": 0.0, |
|
"loss": 0.2231, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.9836065573770494, |
|
"step": 426, |
|
"total_flos": 1.0634701552012493e+17, |
|
"train_loss": 0.3742912175230017, |
|
"train_runtime": 25220.8812, |
|
"train_samples_per_second": 0.406, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 426, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0634701552012493e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|