{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.23823704586063132, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015882469724042088, "grad_norm": 0.9020900726318359, "learning_rate": 8.000000000000001e-06, "loss": 1.462, "step": 1 }, { "epoch": 0.0031764939448084176, "grad_norm": 0.8714035749435425, "learning_rate": 1.6000000000000003e-05, "loss": 1.7768, "step": 2 }, { "epoch": 0.004764740917212627, "grad_norm": 0.6246618032455444, "learning_rate": 2.4e-05, "loss": 1.4764, "step": 3 }, { "epoch": 0.006352987889616835, "grad_norm": 0.6882991194725037, "learning_rate": 3.2000000000000005e-05, "loss": 1.3473, "step": 4 }, { "epoch": 0.007941234862021045, "grad_norm": 0.7609978914260864, "learning_rate": 4e-05, "loss": 1.4786, "step": 5 }, { "epoch": 0.009529481834425254, "grad_norm": 0.6454429030418396, "learning_rate": 3.993589743589744e-05, "loss": 1.5175, "step": 6 }, { "epoch": 0.011117728806829461, "grad_norm": 0.5323919057846069, "learning_rate": 3.9871794871794875e-05, "loss": 1.5005, "step": 7 }, { "epoch": 0.01270597577923367, "grad_norm": 0.49380961060523987, "learning_rate": 3.9807692307692314e-05, "loss": 1.393, "step": 8 }, { "epoch": 0.01429422275163788, "grad_norm": 0.4742673337459564, "learning_rate": 3.9743589743589747e-05, "loss": 1.4615, "step": 9 }, { "epoch": 0.01588246972404209, "grad_norm": 0.47878921031951904, "learning_rate": 3.9679487179487186e-05, "loss": 1.4317, "step": 10 }, { "epoch": 0.017470716696446297, "grad_norm": 0.5705658197402954, "learning_rate": 3.961538461538462e-05, "loss": 1.4019, "step": 11 }, { "epoch": 0.019058963668850508, "grad_norm": 0.4807969331741333, "learning_rate": 3.955128205128206e-05, "loss": 1.3456, "step": 12 }, { "epoch": 0.020647210641254715, "grad_norm": 0.4449750781059265, "learning_rate": 3.948717948717949e-05, "loss": 1.2978, "step": 13 }, { "epoch": 0.022235457613658922, "grad_norm": 0.42986753582954407, "learning_rate": 3.942307692307693e-05, "loss": 1.3485, "step": 14 }, { "epoch": 0.023823704586063133, "grad_norm": 0.4529072344303131, "learning_rate": 3.935897435897436e-05, "loss": 1.1967, "step": 15 }, { "epoch": 0.02541195155846734, "grad_norm": 0.36832889914512634, "learning_rate": 3.9294871794871794e-05, "loss": 1.4615, "step": 16 }, { "epoch": 0.02700019853087155, "grad_norm": 0.36332887411117554, "learning_rate": 3.923076923076923e-05, "loss": 1.424, "step": 17 }, { "epoch": 0.02858844550327576, "grad_norm": 0.3830856680870056, "learning_rate": 3.9166666666666665e-05, "loss": 1.3219, "step": 18 }, { "epoch": 0.03017669247567997, "grad_norm": 0.34034624695777893, "learning_rate": 3.9102564102564105e-05, "loss": 1.3907, "step": 19 }, { "epoch": 0.03176493944808418, "grad_norm": 0.5381081104278564, "learning_rate": 3.9038461538461544e-05, "loss": 1.166, "step": 20 }, { "epoch": 0.03335318642048839, "grad_norm": 0.3757379651069641, "learning_rate": 3.8974358974358976e-05, "loss": 1.3848, "step": 21 }, { "epoch": 0.034941433392892594, "grad_norm": 0.3074859082698822, "learning_rate": 3.8910256410256416e-05, "loss": 1.2964, "step": 22 }, { "epoch": 0.0365296803652968, "grad_norm": 0.327610045671463, "learning_rate": 3.884615384615385e-05, "loss": 1.1818, "step": 23 }, { "epoch": 0.038117927337701016, "grad_norm": 0.31075817346572876, "learning_rate": 3.878205128205129e-05, "loss": 1.1266, "step": 24 }, { "epoch": 0.03970617431010522, "grad_norm": 0.3608109652996063, "learning_rate": 3.871794871794872e-05, "loss": 1.3268, "step": 25 }, { "epoch": 0.04129442128250943, "grad_norm": 0.34548354148864746, "learning_rate": 3.865384615384616e-05, "loss": 1.4485, "step": 26 }, { "epoch": 0.04288266825491364, "grad_norm": 0.3382280468940735, "learning_rate": 3.858974358974359e-05, "loss": 1.4231, "step": 27 }, { "epoch": 0.044470915227317845, "grad_norm": 0.3258324861526489, "learning_rate": 3.852564102564103e-05, "loss": 1.2321, "step": 28 }, { "epoch": 0.04605916219972206, "grad_norm": 0.390952467918396, "learning_rate": 3.846153846153846e-05, "loss": 1.3014, "step": 29 }, { "epoch": 0.047647409172126266, "grad_norm": 0.34574779868125916, "learning_rate": 3.83974358974359e-05, "loss": 1.3385, "step": 30 }, { "epoch": 0.049235656144530474, "grad_norm": 0.32823994755744934, "learning_rate": 3.833333333333334e-05, "loss": 1.2178, "step": 31 }, { "epoch": 0.05082390311693468, "grad_norm": 0.4966191053390503, "learning_rate": 3.8269230769230774e-05, "loss": 1.3805, "step": 32 }, { "epoch": 0.052412150089338895, "grad_norm": 0.3081677556037903, "learning_rate": 3.820512820512821e-05, "loss": 1.2109, "step": 33 }, { "epoch": 0.0540003970617431, "grad_norm": 0.2919631600379944, "learning_rate": 3.8141025641025645e-05, "loss": 1.2596, "step": 34 }, { "epoch": 0.05558864403414731, "grad_norm": 0.34928378462791443, "learning_rate": 3.807692307692308e-05, "loss": 1.3003, "step": 35 }, { "epoch": 0.05717689100655152, "grad_norm": 0.32200536131858826, "learning_rate": 3.801282051282052e-05, "loss": 1.1992, "step": 36 }, { "epoch": 0.05876513797895573, "grad_norm": 0.36465853452682495, "learning_rate": 3.794871794871795e-05, "loss": 1.3853, "step": 37 }, { "epoch": 0.06035338495135994, "grad_norm": 0.36780843138694763, "learning_rate": 3.788461538461539e-05, "loss": 1.3488, "step": 38 }, { "epoch": 0.061941631923764146, "grad_norm": 0.3747501075267792, "learning_rate": 3.782051282051282e-05, "loss": 1.2759, "step": 39 }, { "epoch": 0.06352987889616836, "grad_norm": 0.4664352834224701, "learning_rate": 3.775641025641026e-05, "loss": 1.2444, "step": 40 }, { "epoch": 0.06511812586857256, "grad_norm": 0.3714566230773926, "learning_rate": 3.769230769230769e-05, "loss": 1.3729, "step": 41 }, { "epoch": 0.06670637284097677, "grad_norm": 0.35391151905059814, "learning_rate": 3.762820512820513e-05, "loss": 1.2871, "step": 42 }, { "epoch": 0.06829461981338097, "grad_norm": 0.37723308801651, "learning_rate": 3.7564102564102564e-05, "loss": 1.3243, "step": 43 }, { "epoch": 0.06988286678578519, "grad_norm": 0.3361778259277344, "learning_rate": 3.7500000000000003e-05, "loss": 1.1932, "step": 44 }, { "epoch": 0.0714711137581894, "grad_norm": 0.31701332330703735, "learning_rate": 3.7435897435897436e-05, "loss": 1.3294, "step": 45 }, { "epoch": 0.0730593607305936, "grad_norm": 0.3698272705078125, "learning_rate": 3.7371794871794875e-05, "loss": 1.2617, "step": 46 }, { "epoch": 0.07464760770299782, "grad_norm": 0.34270384907722473, "learning_rate": 3.7307692307692314e-05, "loss": 1.3205, "step": 47 }, { "epoch": 0.07623585467540203, "grad_norm": 0.31238165497779846, "learning_rate": 3.724358974358975e-05, "loss": 1.297, "step": 48 }, { "epoch": 0.07782410164780623, "grad_norm": 0.3665103614330292, "learning_rate": 3.7179487179487186e-05, "loss": 1.3757, "step": 49 }, { "epoch": 0.07941234862021045, "grad_norm": 0.36817964911460876, "learning_rate": 3.711538461538462e-05, "loss": 1.3225, "step": 50 }, { "epoch": 0.08100059559261465, "grad_norm": 0.32568806409835815, "learning_rate": 3.705128205128206e-05, "loss": 1.1309, "step": 51 }, { "epoch": 0.08258884256501886, "grad_norm": 0.2992149293422699, "learning_rate": 3.698717948717949e-05, "loss": 1.2399, "step": 52 }, { "epoch": 0.08417708953742307, "grad_norm": 0.3493598401546478, "learning_rate": 3.692307692307693e-05, "loss": 1.2718, "step": 53 }, { "epoch": 0.08576533650982728, "grad_norm": 0.38273313641548157, "learning_rate": 3.685897435897436e-05, "loss": 1.2875, "step": 54 }, { "epoch": 0.08735358348223149, "grad_norm": 0.3631385266780853, "learning_rate": 3.6794871794871794e-05, "loss": 1.176, "step": 55 }, { "epoch": 0.08894183045463569, "grad_norm": 0.33916208148002625, "learning_rate": 3.673076923076923e-05, "loss": 1.3624, "step": 56 }, { "epoch": 0.0905300774270399, "grad_norm": 0.3310489058494568, "learning_rate": 3.6666666666666666e-05, "loss": 1.1639, "step": 57 }, { "epoch": 0.09211832439944412, "grad_norm": 0.3602941930294037, "learning_rate": 3.6602564102564105e-05, "loss": 1.288, "step": 58 }, { "epoch": 0.09370657137184832, "grad_norm": 0.39397984743118286, "learning_rate": 3.653846153846154e-05, "loss": 1.285, "step": 59 }, { "epoch": 0.09529481834425253, "grad_norm": 0.3368014991283417, "learning_rate": 3.6474358974358977e-05, "loss": 1.2523, "step": 60 }, { "epoch": 0.09688306531665675, "grad_norm": 0.32057568430900574, "learning_rate": 3.6410256410256416e-05, "loss": 1.0862, "step": 61 }, { "epoch": 0.09847131228906095, "grad_norm": 0.409466028213501, "learning_rate": 3.634615384615385e-05, "loss": 1.4059, "step": 62 }, { "epoch": 0.10005955926146516, "grad_norm": 0.3347267210483551, "learning_rate": 3.628205128205129e-05, "loss": 1.0021, "step": 63 }, { "epoch": 0.10164780623386936, "grad_norm": 0.31896549463272095, "learning_rate": 3.621794871794872e-05, "loss": 1.1485, "step": 64 }, { "epoch": 0.10323605320627358, "grad_norm": 0.3799891471862793, "learning_rate": 3.615384615384616e-05, "loss": 1.2916, "step": 65 }, { "epoch": 0.10482430017867779, "grad_norm": 0.3329267203807831, "learning_rate": 3.608974358974359e-05, "loss": 1.3268, "step": 66 }, { "epoch": 0.10641254715108199, "grad_norm": 0.4332905411720276, "learning_rate": 3.602564102564103e-05, "loss": 1.3233, "step": 67 }, { "epoch": 0.1080007941234862, "grad_norm": 0.4160114824771881, "learning_rate": 3.596153846153846e-05, "loss": 1.154, "step": 68 }, { "epoch": 0.1095890410958904, "grad_norm": 0.32984739542007446, "learning_rate": 3.58974358974359e-05, "loss": 1.1314, "step": 69 }, { "epoch": 0.11117728806829462, "grad_norm": 0.3880075216293335, "learning_rate": 3.5833333333333335e-05, "loss": 1.3362, "step": 70 }, { "epoch": 0.11276553504069883, "grad_norm": 0.33941248059272766, "learning_rate": 3.5769230769230774e-05, "loss": 1.2692, "step": 71 }, { "epoch": 0.11435378201310303, "grad_norm": 0.3812304139137268, "learning_rate": 3.570512820512821e-05, "loss": 1.0389, "step": 72 }, { "epoch": 0.11594202898550725, "grad_norm": 0.3491113483905792, "learning_rate": 3.5641025641025646e-05, "loss": 1.0725, "step": 73 }, { "epoch": 0.11753027595791146, "grad_norm": 0.371980220079422, "learning_rate": 3.557692307692308e-05, "loss": 1.2572, "step": 74 }, { "epoch": 0.11911852293031566, "grad_norm": 0.4158433675765991, "learning_rate": 3.551282051282052e-05, "loss": 1.3052, "step": 75 }, { "epoch": 0.12070676990271988, "grad_norm": 0.340273380279541, "learning_rate": 3.544871794871795e-05, "loss": 1.0866, "step": 76 }, { "epoch": 0.12229501687512408, "grad_norm": 0.42024505138397217, "learning_rate": 3.538461538461539e-05, "loss": 1.3908, "step": 77 }, { "epoch": 0.12388326384752829, "grad_norm": 0.3871309757232666, "learning_rate": 3.532051282051282e-05, "loss": 1.1443, "step": 78 }, { "epoch": 0.1254715108199325, "grad_norm": 0.4081447124481201, "learning_rate": 3.525641025641026e-05, "loss": 1.256, "step": 79 }, { "epoch": 0.12705975779233672, "grad_norm": 0.398825079202652, "learning_rate": 3.519230769230769e-05, "loss": 1.0935, "step": 80 }, { "epoch": 0.12864800476474092, "grad_norm": 0.41756635904312134, "learning_rate": 3.512820512820513e-05, "loss": 1.3101, "step": 81 }, { "epoch": 0.13023625173714512, "grad_norm": 0.3089386522769928, "learning_rate": 3.5064102564102565e-05, "loss": 1.0133, "step": 82 }, { "epoch": 0.13182449870954935, "grad_norm": 0.319243848323822, "learning_rate": 3.5000000000000004e-05, "loss": 0.8617, "step": 83 }, { "epoch": 0.13341274568195355, "grad_norm": 0.4051594138145447, "learning_rate": 3.4935897435897436e-05, "loss": 1.2443, "step": 84 }, { "epoch": 0.13500099265435775, "grad_norm": 0.43612000346183777, "learning_rate": 3.4871794871794875e-05, "loss": 1.269, "step": 85 }, { "epoch": 0.13658923962676195, "grad_norm": 0.43578770756721497, "learning_rate": 3.4807692307692315e-05, "loss": 1.2405, "step": 86 }, { "epoch": 0.13817748659916618, "grad_norm": 0.4056044816970825, "learning_rate": 3.474358974358975e-05, "loss": 1.0946, "step": 87 }, { "epoch": 0.13976573357157038, "grad_norm": 0.3657781779766083, "learning_rate": 3.4679487179487186e-05, "loss": 1.0036, "step": 88 }, { "epoch": 0.14135398054397458, "grad_norm": 0.38766562938690186, "learning_rate": 3.461538461538462e-05, "loss": 1.2921, "step": 89 }, { "epoch": 0.1429422275163788, "grad_norm": 0.3952920436859131, "learning_rate": 3.455128205128206e-05, "loss": 1.0871, "step": 90 }, { "epoch": 0.144530474488783, "grad_norm": 0.37682345509529114, "learning_rate": 3.448717948717949e-05, "loss": 1.3176, "step": 91 }, { "epoch": 0.1461187214611872, "grad_norm": 0.41130203008651733, "learning_rate": 3.442307692307693e-05, "loss": 1.4224, "step": 92 }, { "epoch": 0.14770696843359143, "grad_norm": 0.446526437997818, "learning_rate": 3.435897435897436e-05, "loss": 1.1269, "step": 93 }, { "epoch": 0.14929521540599563, "grad_norm": 0.3832029104232788, "learning_rate": 3.4294871794871794e-05, "loss": 1.2257, "step": 94 }, { "epoch": 0.15088346237839984, "grad_norm": 0.3874358534812927, "learning_rate": 3.4230769230769234e-05, "loss": 1.2198, "step": 95 }, { "epoch": 0.15247170935080406, "grad_norm": 0.44150203466415405, "learning_rate": 3.4166666666666666e-05, "loss": 1.4579, "step": 96 }, { "epoch": 0.15405995632320826, "grad_norm": 0.49795201420783997, "learning_rate": 3.4102564102564105e-05, "loss": 1.1809, "step": 97 }, { "epoch": 0.15564820329561246, "grad_norm": 0.38568922877311707, "learning_rate": 3.403846153846154e-05, "loss": 1.2511, "step": 98 }, { "epoch": 0.15723645026801666, "grad_norm": 0.3719356060028076, "learning_rate": 3.397435897435898e-05, "loss": 1.1881, "step": 99 }, { "epoch": 0.1588246972404209, "grad_norm": 0.4000978469848633, "learning_rate": 3.391025641025641e-05, "loss": 1.0767, "step": 100 }, { "epoch": 0.1604129442128251, "grad_norm": 0.46317172050476074, "learning_rate": 3.384615384615385e-05, "loss": 1.1606, "step": 101 }, { "epoch": 0.1620011911852293, "grad_norm": 0.42941561341285706, "learning_rate": 3.378205128205129e-05, "loss": 1.1359, "step": 102 }, { "epoch": 0.16358943815763352, "grad_norm": 0.3518621325492859, "learning_rate": 3.371794871794872e-05, "loss": 1.1586, "step": 103 }, { "epoch": 0.16517768513003772, "grad_norm": 0.45329567790031433, "learning_rate": 3.365384615384616e-05, "loss": 1.3658, "step": 104 }, { "epoch": 0.16676593210244192, "grad_norm": 0.39535844326019287, "learning_rate": 3.358974358974359e-05, "loss": 1.1398, "step": 105 }, { "epoch": 0.16835417907484615, "grad_norm": 0.3700944781303406, "learning_rate": 3.352564102564103e-05, "loss": 1.0872, "step": 106 }, { "epoch": 0.16994242604725035, "grad_norm": 0.3956209719181061, "learning_rate": 3.346153846153846e-05, "loss": 1.3071, "step": 107 }, { "epoch": 0.17153067301965455, "grad_norm": 0.3819758892059326, "learning_rate": 3.33974358974359e-05, "loss": 1.1822, "step": 108 }, { "epoch": 0.17311891999205878, "grad_norm": 0.44656893610954285, "learning_rate": 3.3333333333333335e-05, "loss": 1.3185, "step": 109 }, { "epoch": 0.17470716696446298, "grad_norm": 0.35640376806259155, "learning_rate": 3.3269230769230774e-05, "loss": 1.0616, "step": 110 }, { "epoch": 0.17629541393686718, "grad_norm": 0.3747159242630005, "learning_rate": 3.3205128205128207e-05, "loss": 1.1727, "step": 111 }, { "epoch": 0.17788366090927138, "grad_norm": 0.4224643409252167, "learning_rate": 3.3141025641025646e-05, "loss": 1.155, "step": 112 }, { "epoch": 0.1794719078816756, "grad_norm": 0.45572227239608765, "learning_rate": 3.307692307692308e-05, "loss": 1.1105, "step": 113 }, { "epoch": 0.1810601548540798, "grad_norm": 0.4539688527584076, "learning_rate": 3.301282051282051e-05, "loss": 1.1952, "step": 114 }, { "epoch": 0.182648401826484, "grad_norm": 0.45282796025276184, "learning_rate": 3.294871794871795e-05, "loss": 1.2254, "step": 115 }, { "epoch": 0.18423664879888824, "grad_norm": 0.37122872471809387, "learning_rate": 3.288461538461539e-05, "loss": 1.0683, "step": 116 }, { "epoch": 0.18582489577129244, "grad_norm": 0.5063671469688416, "learning_rate": 3.282051282051282e-05, "loss": 1.2554, "step": 117 }, { "epoch": 0.18741314274369664, "grad_norm": 0.4486718475818634, "learning_rate": 3.275641025641026e-05, "loss": 1.3148, "step": 118 }, { "epoch": 0.18900138971610086, "grad_norm": 0.43316200375556946, "learning_rate": 3.269230769230769e-05, "loss": 1.3, "step": 119 }, { "epoch": 0.19058963668850507, "grad_norm": 0.4560703635215759, "learning_rate": 3.262820512820513e-05, "loss": 1.0387, "step": 120 }, { "epoch": 0.19217788366090927, "grad_norm": 0.41410017013549805, "learning_rate": 3.2564102564102565e-05, "loss": 1.2125, "step": 121 }, { "epoch": 0.1937661306333135, "grad_norm": 0.43740931153297424, "learning_rate": 3.2500000000000004e-05, "loss": 1.1765, "step": 122 }, { "epoch": 0.1953543776057177, "grad_norm": 0.4682227671146393, "learning_rate": 3.2435897435897436e-05, "loss": 1.3742, "step": 123 }, { "epoch": 0.1969426245781219, "grad_norm": 0.47108888626098633, "learning_rate": 3.2371794871794876e-05, "loss": 1.3794, "step": 124 }, { "epoch": 0.1985308715505261, "grad_norm": 0.4044567048549652, "learning_rate": 3.230769230769231e-05, "loss": 1.2427, "step": 125 }, { "epoch": 0.20011911852293032, "grad_norm": 0.44156500697135925, "learning_rate": 3.224358974358975e-05, "loss": 1.1546, "step": 126 }, { "epoch": 0.20170736549533452, "grad_norm": 0.4503639340400696, "learning_rate": 3.2179487179487186e-05, "loss": 1.1677, "step": 127 }, { "epoch": 0.20329561246773872, "grad_norm": 0.40942591428756714, "learning_rate": 3.211538461538462e-05, "loss": 1.0108, "step": 128 }, { "epoch": 0.20488385944014295, "grad_norm": 0.5105980038642883, "learning_rate": 3.205128205128206e-05, "loss": 1.313, "step": 129 }, { "epoch": 0.20647210641254715, "grad_norm": 0.46841809153556824, "learning_rate": 3.198717948717949e-05, "loss": 1.3129, "step": 130 }, { "epoch": 0.20806035338495135, "grad_norm": 0.4008212387561798, "learning_rate": 3.192307692307693e-05, "loss": 1.2303, "step": 131 }, { "epoch": 0.20964860035735558, "grad_norm": 0.42567890882492065, "learning_rate": 3.185897435897436e-05, "loss": 1.2456, "step": 132 }, { "epoch": 0.21123684732975978, "grad_norm": 0.44130662083625793, "learning_rate": 3.1794871794871795e-05, "loss": 1.38, "step": 133 }, { "epoch": 0.21282509430216398, "grad_norm": 0.5576122999191284, "learning_rate": 3.1730769230769234e-05, "loss": 1.3357, "step": 134 }, { "epoch": 0.2144133412745682, "grad_norm": 0.4331263601779938, "learning_rate": 3.1666666666666666e-05, "loss": 1.0914, "step": 135 }, { "epoch": 0.2160015882469724, "grad_norm": 0.5117509961128235, "learning_rate": 3.1602564102564105e-05, "loss": 1.2846, "step": 136 }, { "epoch": 0.2175898352193766, "grad_norm": 0.5039793848991394, "learning_rate": 3.153846153846154e-05, "loss": 1.4331, "step": 137 }, { "epoch": 0.2191780821917808, "grad_norm": 0.4223591387271881, "learning_rate": 3.147435897435898e-05, "loss": 1.1687, "step": 138 }, { "epoch": 0.22076632916418504, "grad_norm": 0.40778806805610657, "learning_rate": 3.141025641025641e-05, "loss": 1.173, "step": 139 }, { "epoch": 0.22235457613658924, "grad_norm": 0.4686231017112732, "learning_rate": 3.134615384615385e-05, "loss": 1.3669, "step": 140 }, { "epoch": 0.22394282310899344, "grad_norm": 0.43122047185897827, "learning_rate": 3.128205128205129e-05, "loss": 1.2708, "step": 141 }, { "epoch": 0.22553107008139767, "grad_norm": 0.3762079179286957, "learning_rate": 3.121794871794872e-05, "loss": 1.1584, "step": 142 }, { "epoch": 0.22711931705380187, "grad_norm": 0.417106568813324, "learning_rate": 3.115384615384616e-05, "loss": 1.1314, "step": 143 }, { "epoch": 0.22870756402620607, "grad_norm": 0.4525890052318573, "learning_rate": 3.108974358974359e-05, "loss": 1.124, "step": 144 }, { "epoch": 0.2302958109986103, "grad_norm": 0.41886812448501587, "learning_rate": 3.102564102564103e-05, "loss": 1.1735, "step": 145 }, { "epoch": 0.2318840579710145, "grad_norm": 0.39290478825569153, "learning_rate": 3.0961538461538464e-05, "loss": 1.2455, "step": 146 }, { "epoch": 0.2334723049434187, "grad_norm": 0.42833879590034485, "learning_rate": 3.08974358974359e-05, "loss": 1.2274, "step": 147 }, { "epoch": 0.23506055191582292, "grad_norm": 0.37532350420951843, "learning_rate": 3.0833333333333335e-05, "loss": 1.0969, "step": 148 }, { "epoch": 0.23664879888822712, "grad_norm": 0.36185258626937866, "learning_rate": 3.0769230769230774e-05, "loss": 1.1227, "step": 149 }, { "epoch": 0.23823704586063132, "grad_norm": 0.4009798765182495, "learning_rate": 3.070512820512821e-05, "loss": 1.0955, "step": 150 } ], "logging_steps": 1, "max_steps": 629, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 75, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0710275291755315e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }