|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.23823704586063132, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0015882469724042088, |
|
"grad_norm": 0.9020900726318359, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.462, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0031764939448084176, |
|
"grad_norm": 0.8714035749435425, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.7768, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004764740917212627, |
|
"grad_norm": 0.6246618032455444, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.4764, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006352987889616835, |
|
"grad_norm": 0.6882991194725037, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.3473, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007941234862021045, |
|
"grad_norm": 0.7609978914260864, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4786, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009529481834425254, |
|
"grad_norm": 0.6454429030418396, |
|
"learning_rate": 3.993589743589744e-05, |
|
"loss": 1.5175, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011117728806829461, |
|
"grad_norm": 0.5323919057846069, |
|
"learning_rate": 3.9871794871794875e-05, |
|
"loss": 1.5005, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01270597577923367, |
|
"grad_norm": 0.49380961060523987, |
|
"learning_rate": 3.9807692307692314e-05, |
|
"loss": 1.393, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01429422275163788, |
|
"grad_norm": 0.4742673337459564, |
|
"learning_rate": 3.9743589743589747e-05, |
|
"loss": 1.4615, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01588246972404209, |
|
"grad_norm": 0.47878921031951904, |
|
"learning_rate": 3.9679487179487186e-05, |
|
"loss": 1.4317, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017470716696446297, |
|
"grad_norm": 0.5705658197402954, |
|
"learning_rate": 3.961538461538462e-05, |
|
"loss": 1.4019, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.019058963668850508, |
|
"grad_norm": 0.4807969331741333, |
|
"learning_rate": 3.955128205128206e-05, |
|
"loss": 1.3456, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.020647210641254715, |
|
"grad_norm": 0.4449750781059265, |
|
"learning_rate": 3.948717948717949e-05, |
|
"loss": 1.2978, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.022235457613658922, |
|
"grad_norm": 0.42986753582954407, |
|
"learning_rate": 3.942307692307693e-05, |
|
"loss": 1.3485, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.023823704586063133, |
|
"grad_norm": 0.4529072344303131, |
|
"learning_rate": 3.935897435897436e-05, |
|
"loss": 1.1967, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02541195155846734, |
|
"grad_norm": 0.36832889914512634, |
|
"learning_rate": 3.9294871794871794e-05, |
|
"loss": 1.4615, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02700019853087155, |
|
"grad_norm": 0.36332887411117554, |
|
"learning_rate": 3.923076923076923e-05, |
|
"loss": 1.424, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02858844550327576, |
|
"grad_norm": 0.3830856680870056, |
|
"learning_rate": 3.9166666666666665e-05, |
|
"loss": 1.3219, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03017669247567997, |
|
"grad_norm": 0.34034624695777893, |
|
"learning_rate": 3.9102564102564105e-05, |
|
"loss": 1.3907, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03176493944808418, |
|
"grad_norm": 0.5381081104278564, |
|
"learning_rate": 3.9038461538461544e-05, |
|
"loss": 1.166, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03335318642048839, |
|
"grad_norm": 0.3757379651069641, |
|
"learning_rate": 3.8974358974358976e-05, |
|
"loss": 1.3848, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.034941433392892594, |
|
"grad_norm": 0.3074859082698822, |
|
"learning_rate": 3.8910256410256416e-05, |
|
"loss": 1.2964, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0365296803652968, |
|
"grad_norm": 0.327610045671463, |
|
"learning_rate": 3.884615384615385e-05, |
|
"loss": 1.1818, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.038117927337701016, |
|
"grad_norm": 0.31075817346572876, |
|
"learning_rate": 3.878205128205129e-05, |
|
"loss": 1.1266, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03970617431010522, |
|
"grad_norm": 0.3608109652996063, |
|
"learning_rate": 3.871794871794872e-05, |
|
"loss": 1.3268, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04129442128250943, |
|
"grad_norm": 0.34548354148864746, |
|
"learning_rate": 3.865384615384616e-05, |
|
"loss": 1.4485, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04288266825491364, |
|
"grad_norm": 0.3382280468940735, |
|
"learning_rate": 3.858974358974359e-05, |
|
"loss": 1.4231, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.044470915227317845, |
|
"grad_norm": 0.3258324861526489, |
|
"learning_rate": 3.852564102564103e-05, |
|
"loss": 1.2321, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04605916219972206, |
|
"grad_norm": 0.390952467918396, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.3014, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.047647409172126266, |
|
"grad_norm": 0.34574779868125916, |
|
"learning_rate": 3.83974358974359e-05, |
|
"loss": 1.3385, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.049235656144530474, |
|
"grad_norm": 0.32823994755744934, |
|
"learning_rate": 3.833333333333334e-05, |
|
"loss": 1.2178, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05082390311693468, |
|
"grad_norm": 0.4966191053390503, |
|
"learning_rate": 3.8269230769230774e-05, |
|
"loss": 1.3805, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.052412150089338895, |
|
"grad_norm": 0.3081677556037903, |
|
"learning_rate": 3.820512820512821e-05, |
|
"loss": 1.2109, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0540003970617431, |
|
"grad_norm": 0.2919631600379944, |
|
"learning_rate": 3.8141025641025645e-05, |
|
"loss": 1.2596, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05558864403414731, |
|
"grad_norm": 0.34928378462791443, |
|
"learning_rate": 3.807692307692308e-05, |
|
"loss": 1.3003, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05717689100655152, |
|
"grad_norm": 0.32200536131858826, |
|
"learning_rate": 3.801282051282052e-05, |
|
"loss": 1.1992, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05876513797895573, |
|
"grad_norm": 0.36465853452682495, |
|
"learning_rate": 3.794871794871795e-05, |
|
"loss": 1.3853, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06035338495135994, |
|
"grad_norm": 0.36780843138694763, |
|
"learning_rate": 3.788461538461539e-05, |
|
"loss": 1.3488, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.061941631923764146, |
|
"grad_norm": 0.3747501075267792, |
|
"learning_rate": 3.782051282051282e-05, |
|
"loss": 1.2759, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06352987889616836, |
|
"grad_norm": 0.4664352834224701, |
|
"learning_rate": 3.775641025641026e-05, |
|
"loss": 1.2444, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06511812586857256, |
|
"grad_norm": 0.3714566230773926, |
|
"learning_rate": 3.769230769230769e-05, |
|
"loss": 1.3729, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06670637284097677, |
|
"grad_norm": 0.35391151905059814, |
|
"learning_rate": 3.762820512820513e-05, |
|
"loss": 1.2871, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06829461981338097, |
|
"grad_norm": 0.37723308801651, |
|
"learning_rate": 3.7564102564102564e-05, |
|
"loss": 1.3243, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06988286678578519, |
|
"grad_norm": 0.3361778259277344, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.1932, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0714711137581894, |
|
"grad_norm": 0.31701332330703735, |
|
"learning_rate": 3.7435897435897436e-05, |
|
"loss": 1.3294, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0730593607305936, |
|
"grad_norm": 0.3698272705078125, |
|
"learning_rate": 3.7371794871794875e-05, |
|
"loss": 1.2617, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07464760770299782, |
|
"grad_norm": 0.34270384907722473, |
|
"learning_rate": 3.7307692307692314e-05, |
|
"loss": 1.3205, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07623585467540203, |
|
"grad_norm": 0.31238165497779846, |
|
"learning_rate": 3.724358974358975e-05, |
|
"loss": 1.297, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07782410164780623, |
|
"grad_norm": 0.3665103614330292, |
|
"learning_rate": 3.7179487179487186e-05, |
|
"loss": 1.3757, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07941234862021045, |
|
"grad_norm": 0.36817964911460876, |
|
"learning_rate": 3.711538461538462e-05, |
|
"loss": 1.3225, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08100059559261465, |
|
"grad_norm": 0.32568806409835815, |
|
"learning_rate": 3.705128205128206e-05, |
|
"loss": 1.1309, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08258884256501886, |
|
"grad_norm": 0.2992149293422699, |
|
"learning_rate": 3.698717948717949e-05, |
|
"loss": 1.2399, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08417708953742307, |
|
"grad_norm": 0.3493598401546478, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 1.2718, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08576533650982728, |
|
"grad_norm": 0.38273313641548157, |
|
"learning_rate": 3.685897435897436e-05, |
|
"loss": 1.2875, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08735358348223149, |
|
"grad_norm": 0.3631385266780853, |
|
"learning_rate": 3.6794871794871794e-05, |
|
"loss": 1.176, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08894183045463569, |
|
"grad_norm": 0.33916208148002625, |
|
"learning_rate": 3.673076923076923e-05, |
|
"loss": 1.3624, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0905300774270399, |
|
"grad_norm": 0.3310489058494568, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.1639, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09211832439944412, |
|
"grad_norm": 0.3602941930294037, |
|
"learning_rate": 3.6602564102564105e-05, |
|
"loss": 1.288, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09370657137184832, |
|
"grad_norm": 0.39397984743118286, |
|
"learning_rate": 3.653846153846154e-05, |
|
"loss": 1.285, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09529481834425253, |
|
"grad_norm": 0.3368014991283417, |
|
"learning_rate": 3.6474358974358977e-05, |
|
"loss": 1.2523, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09688306531665675, |
|
"grad_norm": 0.32057568430900574, |
|
"learning_rate": 3.6410256410256416e-05, |
|
"loss": 1.0862, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09847131228906095, |
|
"grad_norm": 0.409466028213501, |
|
"learning_rate": 3.634615384615385e-05, |
|
"loss": 1.4059, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10005955926146516, |
|
"grad_norm": 0.3347267210483551, |
|
"learning_rate": 3.628205128205129e-05, |
|
"loss": 1.0021, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10164780623386936, |
|
"grad_norm": 0.31896549463272095, |
|
"learning_rate": 3.621794871794872e-05, |
|
"loss": 1.1485, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.10323605320627358, |
|
"grad_norm": 0.3799891471862793, |
|
"learning_rate": 3.615384615384616e-05, |
|
"loss": 1.2916, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.10482430017867779, |
|
"grad_norm": 0.3329267203807831, |
|
"learning_rate": 3.608974358974359e-05, |
|
"loss": 1.3268, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.10641254715108199, |
|
"grad_norm": 0.4332905411720276, |
|
"learning_rate": 3.602564102564103e-05, |
|
"loss": 1.3233, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1080007941234862, |
|
"grad_norm": 0.4160114824771881, |
|
"learning_rate": 3.596153846153846e-05, |
|
"loss": 1.154, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 0.32984739542007446, |
|
"learning_rate": 3.58974358974359e-05, |
|
"loss": 1.1314, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11117728806829462, |
|
"grad_norm": 0.3880075216293335, |
|
"learning_rate": 3.5833333333333335e-05, |
|
"loss": 1.3362, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11276553504069883, |
|
"grad_norm": 0.33941248059272766, |
|
"learning_rate": 3.5769230769230774e-05, |
|
"loss": 1.2692, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11435378201310303, |
|
"grad_norm": 0.3812304139137268, |
|
"learning_rate": 3.570512820512821e-05, |
|
"loss": 1.0389, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11594202898550725, |
|
"grad_norm": 0.3491113483905792, |
|
"learning_rate": 3.5641025641025646e-05, |
|
"loss": 1.0725, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11753027595791146, |
|
"grad_norm": 0.371980220079422, |
|
"learning_rate": 3.557692307692308e-05, |
|
"loss": 1.2572, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.11911852293031566, |
|
"grad_norm": 0.4158433675765991, |
|
"learning_rate": 3.551282051282052e-05, |
|
"loss": 1.3052, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12070676990271988, |
|
"grad_norm": 0.340273380279541, |
|
"learning_rate": 3.544871794871795e-05, |
|
"loss": 1.0866, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12229501687512408, |
|
"grad_norm": 0.42024505138397217, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 1.3908, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12388326384752829, |
|
"grad_norm": 0.3871309757232666, |
|
"learning_rate": 3.532051282051282e-05, |
|
"loss": 1.1443, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1254715108199325, |
|
"grad_norm": 0.4081447124481201, |
|
"learning_rate": 3.525641025641026e-05, |
|
"loss": 1.256, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.12705975779233672, |
|
"grad_norm": 0.398825079202652, |
|
"learning_rate": 3.519230769230769e-05, |
|
"loss": 1.0935, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12864800476474092, |
|
"grad_norm": 0.41756635904312134, |
|
"learning_rate": 3.512820512820513e-05, |
|
"loss": 1.3101, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13023625173714512, |
|
"grad_norm": 0.3089386522769928, |
|
"learning_rate": 3.5064102564102565e-05, |
|
"loss": 1.0133, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.13182449870954935, |
|
"grad_norm": 0.319243848323822, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 0.8617, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.13341274568195355, |
|
"grad_norm": 0.4051594138145447, |
|
"learning_rate": 3.4935897435897436e-05, |
|
"loss": 1.2443, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.13500099265435775, |
|
"grad_norm": 0.43612000346183777, |
|
"learning_rate": 3.4871794871794875e-05, |
|
"loss": 1.269, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13658923962676195, |
|
"grad_norm": 0.43578770756721497, |
|
"learning_rate": 3.4807692307692315e-05, |
|
"loss": 1.2405, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.13817748659916618, |
|
"grad_norm": 0.4056044816970825, |
|
"learning_rate": 3.474358974358975e-05, |
|
"loss": 1.0946, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.13976573357157038, |
|
"grad_norm": 0.3657781779766083, |
|
"learning_rate": 3.4679487179487186e-05, |
|
"loss": 1.0036, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.14135398054397458, |
|
"grad_norm": 0.38766562938690186, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 1.2921, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1429422275163788, |
|
"grad_norm": 0.3952920436859131, |
|
"learning_rate": 3.455128205128206e-05, |
|
"loss": 1.0871, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.144530474488783, |
|
"grad_norm": 0.37682345509529114, |
|
"learning_rate": 3.448717948717949e-05, |
|
"loss": 1.3176, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1461187214611872, |
|
"grad_norm": 0.41130203008651733, |
|
"learning_rate": 3.442307692307693e-05, |
|
"loss": 1.4224, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.14770696843359143, |
|
"grad_norm": 0.446526437997818, |
|
"learning_rate": 3.435897435897436e-05, |
|
"loss": 1.1269, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.14929521540599563, |
|
"grad_norm": 0.3832029104232788, |
|
"learning_rate": 3.4294871794871794e-05, |
|
"loss": 1.2257, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.15088346237839984, |
|
"grad_norm": 0.3874358534812927, |
|
"learning_rate": 3.4230769230769234e-05, |
|
"loss": 1.2198, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.15247170935080406, |
|
"grad_norm": 0.44150203466415405, |
|
"learning_rate": 3.4166666666666666e-05, |
|
"loss": 1.4579, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.15405995632320826, |
|
"grad_norm": 0.49795201420783997, |
|
"learning_rate": 3.4102564102564105e-05, |
|
"loss": 1.1809, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.15564820329561246, |
|
"grad_norm": 0.38568922877311707, |
|
"learning_rate": 3.403846153846154e-05, |
|
"loss": 1.2511, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.15723645026801666, |
|
"grad_norm": 0.3719356060028076, |
|
"learning_rate": 3.397435897435898e-05, |
|
"loss": 1.1881, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1588246972404209, |
|
"grad_norm": 0.4000978469848633, |
|
"learning_rate": 3.391025641025641e-05, |
|
"loss": 1.0767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1604129442128251, |
|
"grad_norm": 0.46317172050476074, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 1.1606, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1620011911852293, |
|
"grad_norm": 0.42941561341285706, |
|
"learning_rate": 3.378205128205129e-05, |
|
"loss": 1.1359, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.16358943815763352, |
|
"grad_norm": 0.3518621325492859, |
|
"learning_rate": 3.371794871794872e-05, |
|
"loss": 1.1586, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.16517768513003772, |
|
"grad_norm": 0.45329567790031433, |
|
"learning_rate": 3.365384615384616e-05, |
|
"loss": 1.3658, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.16676593210244192, |
|
"grad_norm": 0.39535844326019287, |
|
"learning_rate": 3.358974358974359e-05, |
|
"loss": 1.1398, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16835417907484615, |
|
"grad_norm": 0.3700944781303406, |
|
"learning_rate": 3.352564102564103e-05, |
|
"loss": 1.0872, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.16994242604725035, |
|
"grad_norm": 0.3956209719181061, |
|
"learning_rate": 3.346153846153846e-05, |
|
"loss": 1.3071, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.17153067301965455, |
|
"grad_norm": 0.3819758892059326, |
|
"learning_rate": 3.33974358974359e-05, |
|
"loss": 1.1822, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.17311891999205878, |
|
"grad_norm": 0.44656893610954285, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.3185, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.17470716696446298, |
|
"grad_norm": 0.35640376806259155, |
|
"learning_rate": 3.3269230769230774e-05, |
|
"loss": 1.0616, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17629541393686718, |
|
"grad_norm": 0.3747159242630005, |
|
"learning_rate": 3.3205128205128207e-05, |
|
"loss": 1.1727, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.17788366090927138, |
|
"grad_norm": 0.4224643409252167, |
|
"learning_rate": 3.3141025641025646e-05, |
|
"loss": 1.155, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1794719078816756, |
|
"grad_norm": 0.45572227239608765, |
|
"learning_rate": 3.307692307692308e-05, |
|
"loss": 1.1105, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1810601548540798, |
|
"grad_norm": 0.4539688527584076, |
|
"learning_rate": 3.301282051282051e-05, |
|
"loss": 1.1952, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.182648401826484, |
|
"grad_norm": 0.45282796025276184, |
|
"learning_rate": 3.294871794871795e-05, |
|
"loss": 1.2254, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.18423664879888824, |
|
"grad_norm": 0.37122872471809387, |
|
"learning_rate": 3.288461538461539e-05, |
|
"loss": 1.0683, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.18582489577129244, |
|
"grad_norm": 0.5063671469688416, |
|
"learning_rate": 3.282051282051282e-05, |
|
"loss": 1.2554, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.18741314274369664, |
|
"grad_norm": 0.4486718475818634, |
|
"learning_rate": 3.275641025641026e-05, |
|
"loss": 1.3148, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.18900138971610086, |
|
"grad_norm": 0.43316200375556946, |
|
"learning_rate": 3.269230769230769e-05, |
|
"loss": 1.3, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.19058963668850507, |
|
"grad_norm": 0.4560703635215759, |
|
"learning_rate": 3.262820512820513e-05, |
|
"loss": 1.0387, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19217788366090927, |
|
"grad_norm": 0.41410017013549805, |
|
"learning_rate": 3.2564102564102565e-05, |
|
"loss": 1.2125, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.1937661306333135, |
|
"grad_norm": 0.43740931153297424, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 1.1765, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1953543776057177, |
|
"grad_norm": 0.4682227671146393, |
|
"learning_rate": 3.2435897435897436e-05, |
|
"loss": 1.3742, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1969426245781219, |
|
"grad_norm": 0.47108888626098633, |
|
"learning_rate": 3.2371794871794876e-05, |
|
"loss": 1.3794, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1985308715505261, |
|
"grad_norm": 0.4044567048549652, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 1.2427, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.20011911852293032, |
|
"grad_norm": 0.44156500697135925, |
|
"learning_rate": 3.224358974358975e-05, |
|
"loss": 1.1546, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.20170736549533452, |
|
"grad_norm": 0.4503639340400696, |
|
"learning_rate": 3.2179487179487186e-05, |
|
"loss": 1.1677, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.20329561246773872, |
|
"grad_norm": 0.40942591428756714, |
|
"learning_rate": 3.211538461538462e-05, |
|
"loss": 1.0108, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.20488385944014295, |
|
"grad_norm": 0.5105980038642883, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 1.313, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.20647210641254715, |
|
"grad_norm": 0.46841809153556824, |
|
"learning_rate": 3.198717948717949e-05, |
|
"loss": 1.3129, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.20806035338495135, |
|
"grad_norm": 0.4008212387561798, |
|
"learning_rate": 3.192307692307693e-05, |
|
"loss": 1.2303, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.20964860035735558, |
|
"grad_norm": 0.42567890882492065, |
|
"learning_rate": 3.185897435897436e-05, |
|
"loss": 1.2456, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.21123684732975978, |
|
"grad_norm": 0.44130662083625793, |
|
"learning_rate": 3.1794871794871795e-05, |
|
"loss": 1.38, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.21282509430216398, |
|
"grad_norm": 0.5576122999191284, |
|
"learning_rate": 3.1730769230769234e-05, |
|
"loss": 1.3357, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2144133412745682, |
|
"grad_norm": 0.4331263601779938, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 1.0914, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2160015882469724, |
|
"grad_norm": 0.5117509961128235, |
|
"learning_rate": 3.1602564102564105e-05, |
|
"loss": 1.2846, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2175898352193766, |
|
"grad_norm": 0.5039793848991394, |
|
"learning_rate": 3.153846153846154e-05, |
|
"loss": 1.4331, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 0.4223591387271881, |
|
"learning_rate": 3.147435897435898e-05, |
|
"loss": 1.1687, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.22076632916418504, |
|
"grad_norm": 0.40778806805610657, |
|
"learning_rate": 3.141025641025641e-05, |
|
"loss": 1.173, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.22235457613658924, |
|
"grad_norm": 0.4686231017112732, |
|
"learning_rate": 3.134615384615385e-05, |
|
"loss": 1.3669, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22394282310899344, |
|
"grad_norm": 0.43122047185897827, |
|
"learning_rate": 3.128205128205129e-05, |
|
"loss": 1.2708, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.22553107008139767, |
|
"grad_norm": 0.3762079179286957, |
|
"learning_rate": 3.121794871794872e-05, |
|
"loss": 1.1584, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.22711931705380187, |
|
"grad_norm": 0.417106568813324, |
|
"learning_rate": 3.115384615384616e-05, |
|
"loss": 1.1314, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.22870756402620607, |
|
"grad_norm": 0.4525890052318573, |
|
"learning_rate": 3.108974358974359e-05, |
|
"loss": 1.124, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2302958109986103, |
|
"grad_norm": 0.41886812448501587, |
|
"learning_rate": 3.102564102564103e-05, |
|
"loss": 1.1735, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2318840579710145, |
|
"grad_norm": 0.39290478825569153, |
|
"learning_rate": 3.0961538461538464e-05, |
|
"loss": 1.2455, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2334723049434187, |
|
"grad_norm": 0.42833879590034485, |
|
"learning_rate": 3.08974358974359e-05, |
|
"loss": 1.2274, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.23506055191582292, |
|
"grad_norm": 0.37532350420951843, |
|
"learning_rate": 3.0833333333333335e-05, |
|
"loss": 1.0969, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.23664879888822712, |
|
"grad_norm": 0.36185258626937866, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 1.1227, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.23823704586063132, |
|
"grad_norm": 0.4009798765182495, |
|
"learning_rate": 3.070512820512821e-05, |
|
"loss": 1.0955, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 629, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 75, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0710275291755315e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|