|
{ |
|
"best_metric": 1.2467516660690308, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 1.103448275862069, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005517241379310344, |
|
"grad_norm": 81.85233306884766, |
|
"learning_rate": 6e-06, |
|
"loss": 12.4813, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005517241379310344, |
|
"eval_loss": 3.5472052097320557, |
|
"eval_runtime": 21.8033, |
|
"eval_samples_per_second": 13.989, |
|
"eval_steps_per_second": 3.532, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011034482758620689, |
|
"grad_norm": 67.96113586425781, |
|
"learning_rate": 1.2e-05, |
|
"loss": 10.6517, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.016551724137931035, |
|
"grad_norm": 87.34710693359375, |
|
"learning_rate": 1.8e-05, |
|
"loss": 12.4284, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.022068965517241378, |
|
"grad_norm": 68.4135513305664, |
|
"learning_rate": 2.4e-05, |
|
"loss": 11.0609, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.027586206896551724, |
|
"grad_norm": 75.67364501953125, |
|
"learning_rate": 3e-05, |
|
"loss": 10.8972, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03310344827586207, |
|
"grad_norm": 76.12530517578125, |
|
"learning_rate": 3.6e-05, |
|
"loss": 10.0061, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.038620689655172416, |
|
"grad_norm": 79.84320068359375, |
|
"learning_rate": 4.2e-05, |
|
"loss": 9.8832, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.044137931034482755, |
|
"grad_norm": 50.45161056518555, |
|
"learning_rate": 4.8e-05, |
|
"loss": 9.4741, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0496551724137931, |
|
"grad_norm": 37.255496978759766, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 8.7538, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05517241379310345, |
|
"grad_norm": 45.016075134277344, |
|
"learning_rate": 6e-05, |
|
"loss": 8.3654, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.060689655172413794, |
|
"grad_norm": 33.683189392089844, |
|
"learning_rate": 5.999589914977407e-05, |
|
"loss": 7.7713, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06620689655172414, |
|
"grad_norm": 25.923715591430664, |
|
"learning_rate": 5.998359772022778e-05, |
|
"loss": 6.825, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07172413793103448, |
|
"grad_norm": 36.873512268066406, |
|
"learning_rate": 5.996309907444915e-05, |
|
"loss": 8.1533, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07724137931034483, |
|
"grad_norm": 23.235549926757812, |
|
"learning_rate": 5.9934408816563236e-05, |
|
"loss": 7.3939, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08275862068965517, |
|
"grad_norm": 24.117935180664062, |
|
"learning_rate": 5.98975347902001e-05, |
|
"loss": 7.3477, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08827586206896551, |
|
"grad_norm": 21.500791549682617, |
|
"learning_rate": 5.9852487076350345e-05, |
|
"loss": 7.0144, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09379310344827586, |
|
"grad_norm": 23.357107162475586, |
|
"learning_rate": 5.979927799060915e-05, |
|
"loss": 6.9306, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0993103448275862, |
|
"grad_norm": 26.277986526489258, |
|
"learning_rate": 5.9737922079809257e-05, |
|
"loss": 7.2077, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10482758620689656, |
|
"grad_norm": 20.46823501586914, |
|
"learning_rate": 5.9668436118044054e-05, |
|
"loss": 6.7012, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1103448275862069, |
|
"grad_norm": 35.533565521240234, |
|
"learning_rate": 5.959083910208167e-05, |
|
"loss": 6.5318, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11586206896551725, |
|
"grad_norm": 21.680561065673828, |
|
"learning_rate": 5.9505152246171474e-05, |
|
"loss": 6.8213, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12137931034482759, |
|
"grad_norm": 38.167606353759766, |
|
"learning_rate": 5.941139897624428e-05, |
|
"loss": 6.7067, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12689655172413794, |
|
"grad_norm": 22.853790283203125, |
|
"learning_rate": 5.9309604923507984e-05, |
|
"loss": 6.8576, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13241379310344828, |
|
"grad_norm": 23.914487838745117, |
|
"learning_rate": 5.9199797917440176e-05, |
|
"loss": 6.7643, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 36.852081298828125, |
|
"learning_rate": 5.908200797817991e-05, |
|
"loss": 6.7685, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14344827586206896, |
|
"grad_norm": 28.959129333496094, |
|
"learning_rate": 5.895626730832046e-05, |
|
"loss": 7.1442, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1489655172413793, |
|
"grad_norm": 23.524133682250977, |
|
"learning_rate": 5.882261028410545e-05, |
|
"loss": 6.6208, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15448275862068966, |
|
"grad_norm": 29.11441993713379, |
|
"learning_rate": 5.8681073446030734e-05, |
|
"loss": 8.4364, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 29.907983779907227, |
|
"learning_rate": 5.853169548885461e-05, |
|
"loss": 6.3796, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16551724137931034, |
|
"grad_norm": 28.542919158935547, |
|
"learning_rate": 5.8374517251019035e-05, |
|
"loss": 7.1945, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17103448275862068, |
|
"grad_norm": 33.40464401245117, |
|
"learning_rate": 5.820958170348484e-05, |
|
"loss": 8.0262, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17655172413793102, |
|
"grad_norm": 23.497774124145508, |
|
"learning_rate": 5.8036933937983825e-05, |
|
"loss": 7.4088, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1820689655172414, |
|
"grad_norm": 25.99003791809082, |
|
"learning_rate": 5.7856621154691217e-05, |
|
"loss": 8.0957, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.18758620689655173, |
|
"grad_norm": 20.609542846679688, |
|
"learning_rate": 5.766869264932154e-05, |
|
"loss": 6.9359, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19310344827586207, |
|
"grad_norm": 20.997133255004883, |
|
"learning_rate": 5.747319979965172e-05, |
|
"loss": 8.0183, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1986206896551724, |
|
"grad_norm": 27.197574615478516, |
|
"learning_rate": 5.727019605147488e-05, |
|
"loss": 7.0162, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20413793103448277, |
|
"grad_norm": 23.199996948242188, |
|
"learning_rate": 5.7059736903988775e-05, |
|
"loss": 7.9867, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2096551724137931, |
|
"grad_norm": 29.606868743896484, |
|
"learning_rate": 5.684187989462291e-05, |
|
"loss": 6.414, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.21517241379310345, |
|
"grad_norm": 24.353099822998047, |
|
"learning_rate": 5.661668458330836e-05, |
|
"loss": 6.8272, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2206896551724138, |
|
"grad_norm": 22.26854133605957, |
|
"learning_rate": 5.638421253619467e-05, |
|
"loss": 5.6701, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22620689655172413, |
|
"grad_norm": 23.756772994995117, |
|
"learning_rate": 5.614452730881832e-05, |
|
"loss": 6.4688, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2317241379310345, |
|
"grad_norm": 26.136266708374023, |
|
"learning_rate": 5.589769442872722e-05, |
|
"loss": 6.3717, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.23724137931034484, |
|
"grad_norm": 23.154205322265625, |
|
"learning_rate": 5.5643781377566175e-05, |
|
"loss": 6.0505, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24275862068965517, |
|
"grad_norm": 36.596099853515625, |
|
"learning_rate": 5.538285757262806e-05, |
|
"loss": 6.3005, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2482758620689655, |
|
"grad_norm": 52.90260314941406, |
|
"learning_rate": 5.5114994347875856e-05, |
|
"loss": 7.8155, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2537931034482759, |
|
"grad_norm": 121.07559967041016, |
|
"learning_rate": 5.48402649344406e-05, |
|
"loss": 11.1405, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2593103448275862, |
|
"grad_norm": 63.700775146484375, |
|
"learning_rate": 5.455874444060078e-05, |
|
"loss": 10.3049, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.26482758620689656, |
|
"grad_norm": 27.780960083007812, |
|
"learning_rate": 5.427050983124843e-05, |
|
"loss": 7.6377, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.27034482758620687, |
|
"grad_norm": 36.80820846557617, |
|
"learning_rate": 5.397563990684774e-05, |
|
"loss": 7.0712, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 45.82158660888672, |
|
"learning_rate": 5.367421528189181e-05, |
|
"loss": 7.8169, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"eval_loss": 1.749925971031189, |
|
"eval_runtime": 22.2372, |
|
"eval_samples_per_second": 13.716, |
|
"eval_steps_per_second": 3.463, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2813793103448276, |
|
"grad_norm": 32.10503005981445, |
|
"learning_rate": 5.336631836286338e-05, |
|
"loss": 7.0049, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2868965517241379, |
|
"grad_norm": 20.947622299194336, |
|
"learning_rate": 5.3052033325705774e-05, |
|
"loss": 6.132, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2924137931034483, |
|
"grad_norm": 18.891435623168945, |
|
"learning_rate": 5.2731446092810044e-05, |
|
"loss": 6.8589, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2979310344827586, |
|
"grad_norm": 18.95319175720215, |
|
"learning_rate": 5.240464430952462e-05, |
|
"loss": 6.297, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30344827586206896, |
|
"grad_norm": 18.678953170776367, |
|
"learning_rate": 5.207171732019395e-05, |
|
"loss": 6.4108, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.30896551724137933, |
|
"grad_norm": 17.936857223510742, |
|
"learning_rate": 5.1732756143732675e-05, |
|
"loss": 6.0579, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.31448275862068964, |
|
"grad_norm": 21.307292938232422, |
|
"learning_rate": 5.1387853448741916e-05, |
|
"loss": 5.6681, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 20.082971572875977, |
|
"learning_rate": 5.103710352817465e-05, |
|
"loss": 6.8433, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3255172413793103, |
|
"grad_norm": 16.341541290283203, |
|
"learning_rate": 5.068060227355698e-05, |
|
"loss": 5.6629, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.3310344827586207, |
|
"grad_norm": 17.958162307739258, |
|
"learning_rate": 5.0318447148772234e-05, |
|
"loss": 5.8787, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33655172413793105, |
|
"grad_norm": 16.37387466430664, |
|
"learning_rate": 4.995073716341545e-05, |
|
"loss": 5.8848, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.34206896551724136, |
|
"grad_norm": 18.893905639648438, |
|
"learning_rate": 4.957757284572506e-05, |
|
"loss": 5.7301, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.34758620689655173, |
|
"grad_norm": 20.43461036682129, |
|
"learning_rate": 4.91990562150995e-05, |
|
"loss": 5.7722, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.35310344827586204, |
|
"grad_norm": 18.65955924987793, |
|
"learning_rate": 4.881529075420611e-05, |
|
"loss": 5.825, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3586206896551724, |
|
"grad_norm": 22.72260284423828, |
|
"learning_rate": 4.8426381380690036e-05, |
|
"loss": 5.7372, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3641379310344828, |
|
"grad_norm": 19.48175811767578, |
|
"learning_rate": 4.8032434418490753e-05, |
|
"loss": 6.3996, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3696551724137931, |
|
"grad_norm": 19.140289306640625, |
|
"learning_rate": 4.7633557568774194e-05, |
|
"loss": 6.5187, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.37517241379310345, |
|
"grad_norm": 17.06688690185547, |
|
"learning_rate": 4.722985988048831e-05, |
|
"loss": 6.3792, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.38068965517241377, |
|
"grad_norm": 18.04470443725586, |
|
"learning_rate": 4.6821451720550184e-05, |
|
"loss": 5.2939, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.38620689655172413, |
|
"grad_norm": 17.178627014160156, |
|
"learning_rate": 4.640844474367282e-05, |
|
"loss": 6.0978, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3917241379310345, |
|
"grad_norm": 19.80453109741211, |
|
"learning_rate": 4.5990951861839815e-05, |
|
"loss": 6.1981, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3972413793103448, |
|
"grad_norm": 21.434978485107422, |
|
"learning_rate": 4.5569087213436455e-05, |
|
"loss": 6.0233, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4027586206896552, |
|
"grad_norm": 26.16228675842285, |
|
"learning_rate": 4.514296613204532e-05, |
|
"loss": 6.1622, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.40827586206896554, |
|
"grad_norm": 28.807147979736328, |
|
"learning_rate": 4.471270511491525e-05, |
|
"loss": 6.3984, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 23.963605880737305, |
|
"learning_rate": 4.427842179111221e-05, |
|
"loss": 6.9674, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4193103448275862, |
|
"grad_norm": 28.31536102294922, |
|
"learning_rate": 4.3840234889360634e-05, |
|
"loss": 7.5399, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.42482758620689653, |
|
"grad_norm": 17.923322677612305, |
|
"learning_rate": 4.33982642055842e-05, |
|
"loss": 6.0038, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4303448275862069, |
|
"grad_norm": 21.877260208129883, |
|
"learning_rate": 4.2952630570154785e-05, |
|
"loss": 6.4209, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.43586206896551727, |
|
"grad_norm": 18.959318161010742, |
|
"learning_rate": 4.250345581485871e-05, |
|
"loss": 6.9811, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4413793103448276, |
|
"grad_norm": 24.570106506347656, |
|
"learning_rate": 4.205086273958909e-05, |
|
"loss": 7.1458, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.44689655172413795, |
|
"grad_norm": 25.716232299804688, |
|
"learning_rate": 4.1594975078773565e-05, |
|
"loss": 6.8472, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.45241379310344826, |
|
"grad_norm": 21.59646987915039, |
|
"learning_rate": 4.113591746754662e-05, |
|
"loss": 6.0973, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4579310344827586, |
|
"grad_norm": 24.108993530273438, |
|
"learning_rate": 4.06738154076755e-05, |
|
"loss": 5.961, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.463448275862069, |
|
"grad_norm": 23.924850463867188, |
|
"learning_rate": 4.020879523324929e-05, |
|
"loss": 5.7052, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4689655172413793, |
|
"grad_norm": 18.18124008178711, |
|
"learning_rate": 3.974098407614051e-05, |
|
"loss": 5.0366, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.47448275862068967, |
|
"grad_norm": 24.268354415893555, |
|
"learning_rate": 3.927050983124842e-05, |
|
"loss": 5.2907, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 24.5296630859375, |
|
"learning_rate": 3.8797501121533946e-05, |
|
"loss": 6.244, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.48551724137931035, |
|
"grad_norm": 22.945091247558594, |
|
"learning_rate": 3.832208726285534e-05, |
|
"loss": 4.8176, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4910344827586207, |
|
"grad_norm": 27.000350952148438, |
|
"learning_rate": 3.784439822861459e-05, |
|
"loss": 4.9201, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.496551724137931, |
|
"grad_norm": 40.46442413330078, |
|
"learning_rate": 3.7364564614223976e-05, |
|
"loss": 5.4079, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5020689655172413, |
|
"grad_norm": 44.84676742553711, |
|
"learning_rate": 3.688271760140255e-05, |
|
"loss": 8.7851, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5075862068965518, |
|
"grad_norm": 38.57492446899414, |
|
"learning_rate": 3.6398988922312406e-05, |
|
"loss": 8.1502, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5131034482758621, |
|
"grad_norm": 23.454004287719727, |
|
"learning_rate": 3.591351082354441e-05, |
|
"loss": 6.3463, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5186206896551724, |
|
"grad_norm": 21.795522689819336, |
|
"learning_rate": 3.54264160299633e-05, |
|
"loss": 7.3336, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5241379310344828, |
|
"grad_norm": 18.910308837890625, |
|
"learning_rate": 3.493783770842202e-05, |
|
"loss": 5.9417, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5296551724137931, |
|
"grad_norm": 20.068740844726562, |
|
"learning_rate": 3.444790943135526e-05, |
|
"loss": 7.0276, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5351724137931034, |
|
"grad_norm": 16.92316436767578, |
|
"learning_rate": 3.3956765140262074e-05, |
|
"loss": 5.9382, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5406896551724137, |
|
"grad_norm": 15.357147216796875, |
|
"learning_rate": 3.346453910908759e-05, |
|
"loss": 5.2924, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5462068965517242, |
|
"grad_norm": 15.970285415649414, |
|
"learning_rate": 3.297136590751389e-05, |
|
"loss": 5.3965, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 17.879304885864258, |
|
"learning_rate": 3.247738036416998e-05, |
|
"loss": 5.2484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"eval_loss": 1.4723836183547974, |
|
"eval_runtime": 22.2418, |
|
"eval_samples_per_second": 13.713, |
|
"eval_steps_per_second": 3.462, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5572413793103448, |
|
"grad_norm": 16.214435577392578, |
|
"learning_rate": 3.1982717529770985e-05, |
|
"loss": 5.8451, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5627586206896552, |
|
"grad_norm": 14.805839538574219, |
|
"learning_rate": 3.148751264019667e-05, |
|
"loss": 5.5145, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5682758620689655, |
|
"grad_norm": 18.47629165649414, |
|
"learning_rate": 3.099190107951924e-05, |
|
"loss": 6.4664, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5737931034482758, |
|
"grad_norm": 20.32671356201172, |
|
"learning_rate": 3.049601834299076e-05, |
|
"loss": 4.8782, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5793103448275863, |
|
"grad_norm": 14.552179336547852, |
|
"learning_rate": 3e-05, |
|
"loss": 4.837, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5848275862068966, |
|
"grad_norm": 16.728355407714844, |
|
"learning_rate": 2.9503981657009246e-05, |
|
"loss": 4.9243, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5903448275862069, |
|
"grad_norm": 15.932158470153809, |
|
"learning_rate": 2.9008098920480752e-05, |
|
"loss": 5.0072, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5958620689655172, |
|
"grad_norm": 14.092022895812988, |
|
"learning_rate": 2.851248735980333e-05, |
|
"loss": 4.4422, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6013793103448276, |
|
"grad_norm": 14.16683292388916, |
|
"learning_rate": 2.801728247022902e-05, |
|
"loss": 5.0976, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6068965517241379, |
|
"grad_norm": 15.384170532226562, |
|
"learning_rate": 2.7522619635830034e-05, |
|
"loss": 5.0471, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6124137931034482, |
|
"grad_norm": 14.895685195922852, |
|
"learning_rate": 2.702863409248612e-05, |
|
"loss": 4.8804, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6179310344827587, |
|
"grad_norm": 16.85907554626465, |
|
"learning_rate": 2.6535460890912416e-05, |
|
"loss": 5.7392, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.623448275862069, |
|
"grad_norm": 17.640703201293945, |
|
"learning_rate": 2.604323485973793e-05, |
|
"loss": 5.387, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6289655172413793, |
|
"grad_norm": 16.179893493652344, |
|
"learning_rate": 2.555209056864474e-05, |
|
"loss": 5.212, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6344827586206897, |
|
"grad_norm": 17.897340774536133, |
|
"learning_rate": 2.5062162291577978e-05, |
|
"loss": 4.851, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 17.833797454833984, |
|
"learning_rate": 2.4573583970036712e-05, |
|
"loss": 5.0259, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6455172413793103, |
|
"grad_norm": 16.975656509399414, |
|
"learning_rate": 2.4086489176455595e-05, |
|
"loss": 4.5732, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6510344827586206, |
|
"grad_norm": 17.853879928588867, |
|
"learning_rate": 2.36010110776876e-05, |
|
"loss": 4.9148, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6565517241379311, |
|
"grad_norm": 17.815895080566406, |
|
"learning_rate": 2.3117282398597456e-05, |
|
"loss": 5.7234, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6620689655172414, |
|
"grad_norm": 18.00694465637207, |
|
"learning_rate": 2.263543538577603e-05, |
|
"loss": 5.6019, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6675862068965517, |
|
"grad_norm": 21.1002140045166, |
|
"learning_rate": 2.215560177138541e-05, |
|
"loss": 5.9787, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6731034482758621, |
|
"grad_norm": 18.753433227539062, |
|
"learning_rate": 2.167791273714467e-05, |
|
"loss": 5.7944, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6786206896551724, |
|
"grad_norm": 20.584749221801758, |
|
"learning_rate": 2.1202498878466062e-05, |
|
"loss": 5.4823, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6841379310344827, |
|
"grad_norm": 21.03537940979004, |
|
"learning_rate": 2.072949016875158e-05, |
|
"loss": 5.1549, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 19.885255813598633, |
|
"learning_rate": 2.0259015923859498e-05, |
|
"loss": 6.1, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6951724137931035, |
|
"grad_norm": 20.665145874023438, |
|
"learning_rate": 1.979120476675071e-05, |
|
"loss": 5.7938, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7006896551724138, |
|
"grad_norm": 23.18328094482422, |
|
"learning_rate": 1.9326184592324503e-05, |
|
"loss": 6.5563, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7062068965517241, |
|
"grad_norm": 24.821685791015625, |
|
"learning_rate": 1.8864082532453373e-05, |
|
"loss": 5.9187, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7117241379310345, |
|
"grad_norm": 20.381574630737305, |
|
"learning_rate": 1.840502492122644e-05, |
|
"loss": 5.0224, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7172413793103448, |
|
"grad_norm": 25.46938705444336, |
|
"learning_rate": 1.7949137260410924e-05, |
|
"loss": 5.9761, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7227586206896551, |
|
"grad_norm": 24.690874099731445, |
|
"learning_rate": 1.7496544185141295e-05, |
|
"loss": 6.0875, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7282758620689656, |
|
"grad_norm": 20.75514793395996, |
|
"learning_rate": 1.7047369429845216e-05, |
|
"loss": 4.9455, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7337931034482759, |
|
"grad_norm": 18.397716522216797, |
|
"learning_rate": 1.6601735794415806e-05, |
|
"loss": 3.7152, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7393103448275862, |
|
"grad_norm": 24.97144317626953, |
|
"learning_rate": 1.615976511063937e-05, |
|
"loss": 4.8452, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7448275862068966, |
|
"grad_norm": 32.98619842529297, |
|
"learning_rate": 1.5721578208887793e-05, |
|
"loss": 4.977, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7503448275862069, |
|
"grad_norm": 23.18657684326172, |
|
"learning_rate": 1.5287294885084766e-05, |
|
"loss": 8.0823, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7558620689655172, |
|
"grad_norm": 18.28014373779297, |
|
"learning_rate": 1.4857033867954697e-05, |
|
"loss": 7.2086, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7613793103448275, |
|
"grad_norm": 16.946495056152344, |
|
"learning_rate": 1.4430912786563554e-05, |
|
"loss": 5.8691, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.766896551724138, |
|
"grad_norm": 15.615433692932129, |
|
"learning_rate": 1.4009048138160195e-05, |
|
"loss": 5.6876, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7724137931034483, |
|
"grad_norm": 15.042305946350098, |
|
"learning_rate": 1.3591555256327199e-05, |
|
"loss": 5.1846, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7779310344827586, |
|
"grad_norm": 14.744568824768066, |
|
"learning_rate": 1.3178548279449822e-05, |
|
"loss": 5.5824, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.783448275862069, |
|
"grad_norm": 15.294870376586914, |
|
"learning_rate": 1.2770140119511693e-05, |
|
"loss": 5.23, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7889655172413793, |
|
"grad_norm": 15.637053489685059, |
|
"learning_rate": 1.2366442431225809e-05, |
|
"loss": 4.4227, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7944827586206896, |
|
"grad_norm": 17.9448184967041, |
|
"learning_rate": 1.1967565581509248e-05, |
|
"loss": 6.4883, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 17.575916290283203, |
|
"learning_rate": 1.1573618619309965e-05, |
|
"loss": 4.7533, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8055172413793104, |
|
"grad_norm": 12.778429985046387, |
|
"learning_rate": 1.1184709245793889e-05, |
|
"loss": 4.6147, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8110344827586207, |
|
"grad_norm": 16.762371063232422, |
|
"learning_rate": 1.0800943784900502e-05, |
|
"loss": 5.0578, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8165517241379311, |
|
"grad_norm": 15.866377830505371, |
|
"learning_rate": 1.042242715427494e-05, |
|
"loss": 4.8278, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8220689655172414, |
|
"grad_norm": 16.081371307373047, |
|
"learning_rate": 1.004926283658455e-05, |
|
"loss": 5.0605, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 17.62371826171875, |
|
"learning_rate": 9.681552851227774e-06, |
|
"loss": 4.6947, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"eval_loss": 1.2990185022354126, |
|
"eval_runtime": 22.2401, |
|
"eval_samples_per_second": 13.714, |
|
"eval_steps_per_second": 3.462, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.833103448275862, |
|
"grad_norm": 15.228355407714844, |
|
"learning_rate": 9.319397726443026e-06, |
|
"loss": 4.537, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8386206896551724, |
|
"grad_norm": 16.1376953125, |
|
"learning_rate": 8.962896471825342e-06, |
|
"loss": 4.2811, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8441379310344828, |
|
"grad_norm": 17.54079246520996, |
|
"learning_rate": 8.61214655125809e-06, |
|
"loss": 5.1807, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8496551724137931, |
|
"grad_norm": 15.536772727966309, |
|
"learning_rate": 8.267243856267331e-06, |
|
"loss": 4.5729, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8551724137931035, |
|
"grad_norm": 19.280027389526367, |
|
"learning_rate": 7.928282679806052e-06, |
|
"loss": 4.8167, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8606896551724138, |
|
"grad_norm": 16.457712173461914, |
|
"learning_rate": 7.595355690475393e-06, |
|
"loss": 4.7762, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8662068965517241, |
|
"grad_norm": 18.711694717407227, |
|
"learning_rate": 7.268553907189964e-06, |
|
"loss": 4.7609, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8717241379310345, |
|
"grad_norm": 15.429764747619629, |
|
"learning_rate": 6.947966674294236e-06, |
|
"loss": 4.4401, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8772413793103448, |
|
"grad_norm": 18.525232315063477, |
|
"learning_rate": 6.6336816371366305e-06, |
|
"loss": 4.5169, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8827586206896552, |
|
"grad_norm": 20.283931732177734, |
|
"learning_rate": 6.325784718108196e-06, |
|
"loss": 5.5306, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8882758620689655, |
|
"grad_norm": 23.284629821777344, |
|
"learning_rate": 6.0243600931522595e-06, |
|
"loss": 4.9871, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8937931034482759, |
|
"grad_norm": 16.778202056884766, |
|
"learning_rate": 5.72949016875158e-06, |
|
"loss": 5.2333, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8993103448275862, |
|
"grad_norm": 21.578655242919922, |
|
"learning_rate": 5.44125555939923e-06, |
|
"loss": 5.4952, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.9048275862068965, |
|
"grad_norm": 18.722185134887695, |
|
"learning_rate": 5.159735065559399e-06, |
|
"loss": 4.6659, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.9103448275862069, |
|
"grad_norm": 19.489261627197266, |
|
"learning_rate": 4.885005652124144e-06, |
|
"loss": 4.7271, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9158620689655173, |
|
"grad_norm": 19.362234115600586, |
|
"learning_rate": 4.617142427371934e-06, |
|
"loss": 4.8474, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.9213793103448276, |
|
"grad_norm": 21.526710510253906, |
|
"learning_rate": 4.3562186224338265e-06, |
|
"loss": 5.7556, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.926896551724138, |
|
"grad_norm": 18.980236053466797, |
|
"learning_rate": 4.102305571272783e-06, |
|
"loss": 5.0587, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9324137931034483, |
|
"grad_norm": 18.678552627563477, |
|
"learning_rate": 3.855472691181678e-06, |
|
"loss": 4.3548, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9379310344827586, |
|
"grad_norm": 25.089895248413086, |
|
"learning_rate": 3.615787463805331e-06, |
|
"loss": 5.2909, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9434482758620689, |
|
"grad_norm": 24.198055267333984, |
|
"learning_rate": 3.383315416691646e-06, |
|
"loss": 4.7572, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9489655172413793, |
|
"grad_norm": 19.62432861328125, |
|
"learning_rate": 3.158120105377096e-06, |
|
"loss": 5.0689, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9544827586206897, |
|
"grad_norm": 20.130558013916016, |
|
"learning_rate": 2.940263096011233e-06, |
|
"loss": 4.3257, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 25.325763702392578, |
|
"learning_rate": 2.729803948525125e-06, |
|
"loss": 5.8623, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 21.457735061645508, |
|
"learning_rate": 2.526800200348275e-06, |
|
"loss": 4.4341, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9710344827586207, |
|
"grad_norm": 22.726511001586914, |
|
"learning_rate": 2.3313073506784575e-06, |
|
"loss": 4.9613, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.976551724137931, |
|
"grad_norm": 27.022869110107422, |
|
"learning_rate": 2.143378845308791e-06, |
|
"loss": 5.0634, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.9820689655172414, |
|
"grad_norm": 25.477567672729492, |
|
"learning_rate": 1.9630660620161777e-06, |
|
"loss": 4.4496, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9875862068965517, |
|
"grad_norm": 24.415048599243164, |
|
"learning_rate": 1.790418296515165e-06, |
|
"loss": 4.149, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.993103448275862, |
|
"grad_norm": 34.604026794433594, |
|
"learning_rate": 1.625482748980961e-06, |
|
"loss": 5.0238, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9986206896551724, |
|
"grad_norm": 16.760923385620117, |
|
"learning_rate": 1.4683045111453942e-06, |
|
"loss": 5.2373, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.0041379310344827, |
|
"grad_norm": 16.47662925720215, |
|
"learning_rate": 1.3189265539692707e-06, |
|
"loss": 4.4056, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.0096551724137932, |
|
"grad_norm": 16.61656951904297, |
|
"learning_rate": 1.1773897158945557e-06, |
|
"loss": 5.7166, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.0151724137931035, |
|
"grad_norm": 16.779102325439453, |
|
"learning_rate": 1.0437326916795432e-06, |
|
"loss": 5.355, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.0206896551724138, |
|
"grad_norm": 15.03419017791748, |
|
"learning_rate": 9.179920218200888e-07, |
|
"loss": 4.5611, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0262068965517241, |
|
"grad_norm": 16.868610382080078, |
|
"learning_rate": 8.002020825598277e-07, |
|
"loss": 4.6968, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0317241379310345, |
|
"grad_norm": 16.311511993408203, |
|
"learning_rate": 6.90395076492022e-07, |
|
"loss": 4.3882, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0372413793103448, |
|
"grad_norm": 14.302980422973633, |
|
"learning_rate": 5.886010237557194e-07, |
|
"loss": 4.1967, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.042758620689655, |
|
"grad_norm": 15.19272232055664, |
|
"learning_rate": 4.94847753828529e-07, |
|
"loss": 3.7039, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0482758620689656, |
|
"grad_norm": 13.517196655273438, |
|
"learning_rate": 4.091608979183303e-07, |
|
"loss": 3.6311, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.053793103448276, |
|
"grad_norm": 18.231815338134766, |
|
"learning_rate": 3.315638819559452e-07, |
|
"loss": 3.5249, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.0593103448275862, |
|
"grad_norm": 13.879122734069824, |
|
"learning_rate": 2.6207792019074414e-07, |
|
"loss": 3.1093, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.0648275862068965, |
|
"grad_norm": 14.321069717407227, |
|
"learning_rate": 2.0072200939085573e-07, |
|
"loss": 4.4564, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0703448275862069, |
|
"grad_norm": 15.723977088928223, |
|
"learning_rate": 1.475129236496575e-07, |
|
"loss": 3.9035, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.0758620689655172, |
|
"grad_norm": 14.243907928466797, |
|
"learning_rate": 1.0246520979990459e-07, |
|
"loss": 3.327, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.0813793103448275, |
|
"grad_norm": 11.917428970336914, |
|
"learning_rate": 6.559118343676396e-08, |
|
"loss": 2.6171, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.086896551724138, |
|
"grad_norm": 11.877168655395508, |
|
"learning_rate": 3.690092555085789e-08, |
|
"loss": 2.8428, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.0924137931034483, |
|
"grad_norm": 12.58403491973877, |
|
"learning_rate": 1.640227977221853e-08, |
|
"loss": 3.6764, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.0979310344827586, |
|
"grad_norm": 12.414690017700195, |
|
"learning_rate": 4.1008502259298755e-09, |
|
"loss": 2.7963, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.103448275862069, |
|
"grad_norm": 11.689571380615234, |
|
"learning_rate": 0.0, |
|
"loss": 2.3991, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.103448275862069, |
|
"eval_loss": 1.2467516660690308, |
|
"eval_runtime": 22.2287, |
|
"eval_samples_per_second": 13.721, |
|
"eval_steps_per_second": 3.464, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8598115829284864e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|