|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.2422360248447206, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012422360248447204, |
|
"grad_norm": 0.00018831038323696703, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 2.4276, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.024844720496894408, |
|
"grad_norm": 0.00010692311479942873, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 2.2565, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.037267080745341616, |
|
"grad_norm": 6.173110887175426e-05, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 1.9191, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.049689440993788817, |
|
"grad_norm": 4.326488488004543e-05, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 1.7997, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.062111801242236024, |
|
"grad_norm": 3.188664049957879e-05, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 1.6296, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07453416149068323, |
|
"grad_norm": 2.6054423869936727e-05, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.5767, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 2.7649846742860973e-05, |
|
"learning_rate": 7.878787878787879e-05, |
|
"loss": 1.5131, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09937888198757763, |
|
"grad_norm": 1.6101763321785256e-05, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.4497, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11180124223602485, |
|
"grad_norm": 2.3354801669484004e-05, |
|
"learning_rate": 0.00010303030303030303, |
|
"loss": 1.4353, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12422360248447205, |
|
"grad_norm": 1.531304769741837e-05, |
|
"learning_rate": 0.00011515151515151516, |
|
"loss": 1.3655, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13664596273291926, |
|
"grad_norm": 9.777257218956947e-06, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 1.3691, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14906832298136646, |
|
"grad_norm": 1.5149210412346292e-05, |
|
"learning_rate": 0.0001393939393939394, |
|
"loss": 1.3916, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16149068322981366, |
|
"grad_norm": 1.1748486940632574e-05, |
|
"learning_rate": 0.00015151515151515152, |
|
"loss": 1.341, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 3.532869595801458e-05, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 1.3409, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18633540372670807, |
|
"grad_norm": 7.877199095673859e-06, |
|
"learning_rate": 0.00017575757575757578, |
|
"loss": 1.3728, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19875776397515527, |
|
"grad_norm": 1.1650959095277358e-05, |
|
"learning_rate": 0.0001878787878787879, |
|
"loss": 1.3579, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2111801242236025, |
|
"grad_norm": 3.298332740087062e-05, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3835, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2236024844720497, |
|
"grad_norm": 9.103293450607453e-06, |
|
"learning_rate": 0.00019999471258384815, |
|
"loss": 1.3517, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2360248447204969, |
|
"grad_norm": 1.055269422067795e-05, |
|
"learning_rate": 0.000199978850894528, |
|
"loss": 1.3748, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 1.8264010577695444e-05, |
|
"learning_rate": 0.0001999524166093866, |
|
"loss": 1.383, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 9.094917004404124e-06, |
|
"learning_rate": 0.00019991541252380526, |
|
"loss": 1.4102, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2732919254658385, |
|
"grad_norm": 1.286895803787047e-05, |
|
"learning_rate": 0.00019986784255090397, |
|
"loss": 1.3811, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 1.386065014230553e-05, |
|
"learning_rate": 0.0001998097117211276, |
|
"loss": 1.3794, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2981366459627329, |
|
"grad_norm": 8.020670065889135e-06, |
|
"learning_rate": 0.00019974102618171394, |
|
"loss": 1.3584, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3105590062111801, |
|
"grad_norm": 8.217157301260158e-06, |
|
"learning_rate": 0.00019966179319604355, |
|
"loss": 1.3592, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32298136645962733, |
|
"grad_norm": 1.1515216101543047e-05, |
|
"learning_rate": 0.00019957202114287187, |
|
"loss": 1.3679, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.33540372670807456, |
|
"grad_norm": 8.87443184183212e-06, |
|
"learning_rate": 0.0001994717195154429, |
|
"loss": 1.3779, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 7.773131073918194e-06, |
|
"learning_rate": 0.00019936089892048556, |
|
"loss": 1.3634, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.36024844720496896, |
|
"grad_norm": 8.845816410030238e-06, |
|
"learning_rate": 0.00019923957107709195, |
|
"loss": 1.3312, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.37267080745341613, |
|
"grad_norm": 8.347928087459877e-06, |
|
"learning_rate": 0.000199107748815478, |
|
"loss": 1.3626, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38509316770186336, |
|
"grad_norm": 9.047604180523194e-06, |
|
"learning_rate": 0.0001989654460756269, |
|
"loss": 1.3653, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.39751552795031053, |
|
"grad_norm": 8.44144778966438e-06, |
|
"learning_rate": 0.00019881267790581466, |
|
"loss": 1.3712, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.40993788819875776, |
|
"grad_norm": 8.453951522824354e-06, |
|
"learning_rate": 0.0001986494604610191, |
|
"loss": 1.3027, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.422360248447205, |
|
"grad_norm": 7.831426046323031e-06, |
|
"learning_rate": 0.00019847581100121127, |
|
"loss": 1.3737, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 8.356658327102195e-06, |
|
"learning_rate": 0.00019829174788953038, |
|
"loss": 1.3522, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4472049689440994, |
|
"grad_norm": 8.671122486703098e-06, |
|
"learning_rate": 0.0001980972905903418, |
|
"loss": 1.3808, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.45962732919254656, |
|
"grad_norm": 9.771009899850469e-06, |
|
"learning_rate": 0.00019789245966717883, |
|
"loss": 1.3695, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.4720496894409938, |
|
"grad_norm": 6.7302303250471596e-06, |
|
"learning_rate": 0.00019767727678056805, |
|
"loss": 1.3754, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.484472049689441, |
|
"grad_norm": 9.788966963242274e-06, |
|
"learning_rate": 0.00019745176468573893, |
|
"loss": 1.379, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 9.375480658491142e-06, |
|
"learning_rate": 0.00019721594723021732, |
|
"loss": 1.3484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5093167701863354, |
|
"grad_norm": 3.550610927050002e-05, |
|
"learning_rate": 0.00019696984935130364, |
|
"loss": 1.377, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 9.707620847621001e-06, |
|
"learning_rate": 0.00019671349707343593, |
|
"loss": 1.3587, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5341614906832298, |
|
"grad_norm": 1.0621732144500129e-05, |
|
"learning_rate": 0.00019644691750543767, |
|
"loss": 1.393, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.546583850931677, |
|
"grad_norm": 9.491611308476422e-06, |
|
"learning_rate": 0.0001961701388376511, |
|
"loss": 1.3715, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5590062111801242, |
|
"grad_norm": 8.982805411505979e-06, |
|
"learning_rate": 0.00019588319033895623, |
|
"loss": 1.3829, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.0132759598491248e-05, |
|
"learning_rate": 0.0001955861023536756, |
|
"loss": 1.3395, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5838509316770186, |
|
"grad_norm": 1.3477648280968424e-05, |
|
"learning_rate": 0.0001952789062983654, |
|
"loss": 1.3908, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5962732919254659, |
|
"grad_norm": 1.1435187843744643e-05, |
|
"learning_rate": 0.0001949616346584934, |
|
"loss": 1.3799, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 1.273245652555488e-05, |
|
"learning_rate": 0.00019463432098500337, |
|
"loss": 1.3973, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6211180124223602, |
|
"grad_norm": 1.1162846021761652e-05, |
|
"learning_rate": 0.00019429699989076746, |
|
"loss": 1.3564, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6335403726708074, |
|
"grad_norm": 1.1645292943285313e-05, |
|
"learning_rate": 0.00019394970704692566, |
|
"loss": 1.3535, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6459627329192547, |
|
"grad_norm": 3.541897240211256e-05, |
|
"learning_rate": 0.00019359247917911384, |
|
"loss": 1.3813, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6583850931677019, |
|
"grad_norm": 9.2498794401763e-06, |
|
"learning_rate": 0.00019322535406358, |
|
"loss": 1.3617, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6708074534161491, |
|
"grad_norm": 8.978020559879951e-06, |
|
"learning_rate": 0.00019284837052318933, |
|
"loss": 1.3813, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6832298136645962, |
|
"grad_norm": 1.3010416296310723e-05, |
|
"learning_rate": 0.00019246156842331918, |
|
"loss": 1.3726, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 1.2821891687053721e-05, |
|
"learning_rate": 0.00019206498866764288, |
|
"loss": 1.4042, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7080745341614907, |
|
"grad_norm": 9.70847668213537e-06, |
|
"learning_rate": 0.00019165867319380456, |
|
"loss": 1.4061, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7204968944099379, |
|
"grad_norm": 1.6388959920732304e-05, |
|
"learning_rate": 0.0001912426649689842, |
|
"loss": 1.3668, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7329192546583851, |
|
"grad_norm": 1.66231893672375e-05, |
|
"learning_rate": 0.00019081700798535397, |
|
"loss": 1.3712, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 1.864772457338404e-05, |
|
"learning_rate": 0.00019038174725542604, |
|
"loss": 1.3849, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7577639751552795, |
|
"grad_norm": 1.4855336303298827e-05, |
|
"learning_rate": 0.0001899369288072927, |
|
"loss": 1.4006, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7701863354037267, |
|
"grad_norm": 1.8388282114756294e-05, |
|
"learning_rate": 0.00018948259967975888, |
|
"loss": 1.4008, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 2.689829489099793e-05, |
|
"learning_rate": 0.00018901880791736793, |
|
"loss": 1.3808, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7950310559006211, |
|
"grad_norm": 2.5062678105314262e-05, |
|
"learning_rate": 0.000188545602565321, |
|
"loss": 1.3948, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8074534161490683, |
|
"grad_norm": 2.1250931240501814e-05, |
|
"learning_rate": 0.0001880630336642905, |
|
"loss": 1.402, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8198757763975155, |
|
"grad_norm": 2.403008875262458e-05, |
|
"learning_rate": 0.0001875711522451284, |
|
"loss": 1.384, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8322981366459627, |
|
"grad_norm": 2.597655839053914e-05, |
|
"learning_rate": 0.00018707001032347, |
|
"loss": 1.3541, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.84472049689441, |
|
"grad_norm": 2.3845692339818925e-05, |
|
"learning_rate": 0.0001865596608942331, |
|
"loss": 1.396, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.8721084416029043e-05, |
|
"learning_rate": 0.00018604015792601396, |
|
"loss": 1.3427, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 3.2439467759104446e-05, |
|
"learning_rate": 0.0001855115563553803, |
|
"loss": 1.376, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8819875776397516, |
|
"grad_norm": 2.3974960640771315e-05, |
|
"learning_rate": 0.0001849739120810618, |
|
"loss": 1.4019, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8944099378881988, |
|
"grad_norm": 3.250224108342081e-05, |
|
"learning_rate": 0.00018442728195803881, |
|
"loss": 1.3915, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.906832298136646, |
|
"grad_norm": 2.820833833538927e-05, |
|
"learning_rate": 0.0001838717237915302, |
|
"loss": 1.3943, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9192546583850931, |
|
"grad_norm": 4.0885188354877755e-05, |
|
"learning_rate": 0.00018330729633088045, |
|
"loss": 1.3818, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9316770186335404, |
|
"grad_norm": 5.243903797236271e-05, |
|
"learning_rate": 0.00018273405926334696, |
|
"loss": 1.4112, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9440993788819876, |
|
"grad_norm": 4.928431008011103e-05, |
|
"learning_rate": 0.0001821520732077883, |
|
"loss": 4.4804, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 7.073425513226539e-05, |
|
"learning_rate": 0.00018156139970825391, |
|
"loss": 1.4202, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.968944099378882, |
|
"grad_norm": 4.9798843974713236e-05, |
|
"learning_rate": 0.00018096210122747584, |
|
"loss": 1.4218, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.9813664596273292, |
|
"grad_norm": 6.847319309599698e-05, |
|
"learning_rate": 0.0001803542411402634, |
|
"loss": 1.401, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 5.58948922844138e-05, |
|
"learning_rate": 0.0001797378837268015, |
|
"loss": 1.3425, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0062111801242235, |
|
"grad_norm": 4.716762123280205e-05, |
|
"learning_rate": 0.00017911309416585296, |
|
"loss": 1.3779, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.0186335403726707, |
|
"grad_norm": 5.352822699933313e-05, |
|
"learning_rate": 0.0001784799385278661, |
|
"loss": 1.341, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.031055900621118, |
|
"grad_norm": 5.088459511171095e-05, |
|
"learning_rate": 0.0001778384837679879, |
|
"loss": 1.4021, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 5.318582770996727e-05, |
|
"learning_rate": 0.00017718879771898348, |
|
"loss": 1.3597, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0559006211180124, |
|
"grad_norm": 4.925034227198921e-05, |
|
"learning_rate": 0.00017653094908406301, |
|
"loss": 1.3425, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0683229813664596, |
|
"grad_norm": 5.734206206398085e-05, |
|
"learning_rate": 0.00017586500742961653, |
|
"loss": 1.389, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.0807453416149069, |
|
"grad_norm": 5.8051424275618047e-05, |
|
"learning_rate": 0.00017519104317785717, |
|
"loss": 1.3776, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.093167701863354, |
|
"grad_norm": 4.752572931465693e-05, |
|
"learning_rate": 0.00017450912759937434, |
|
"loss": 1.4045, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.1055900621118013, |
|
"grad_norm": 4.9184389354195446e-05, |
|
"learning_rate": 0.00017381933280559693, |
|
"loss": 2.4013, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.1180124223602483, |
|
"grad_norm": 6.995958392508328e-05, |
|
"learning_rate": 0.00017312173174116762, |
|
"loss": 1.6484, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 4.960764272254892e-05, |
|
"learning_rate": 0.0001724163981762291, |
|
"loss": 1.3301, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 4.448357503861189e-05, |
|
"learning_rate": 0.0001717034066986231, |
|
"loss": 1.4058, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.15527950310559, |
|
"grad_norm": 4.6940349420765415e-05, |
|
"learning_rate": 0.00017098283270600267, |
|
"loss": 1.3731, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.1677018633540373, |
|
"grad_norm": 4.82694485981483e-05, |
|
"learning_rate": 0.00017025475239785919, |
|
"loss": 1.3586, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.1801242236024845, |
|
"grad_norm": 4.615723810275085e-05, |
|
"learning_rate": 0.00016951924276746425, |
|
"loss": 1.3777, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1925465838509317, |
|
"grad_norm": 4.163803532719612e-05, |
|
"learning_rate": 0.00016877638159372782, |
|
"loss": 1.3974, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.204968944099379, |
|
"grad_norm": 4.0616945625515655e-05, |
|
"learning_rate": 0.00016802624743297333, |
|
"loss": 1.41, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 4.708253254648298e-05, |
|
"learning_rate": 0.00016726891961063028, |
|
"loss": 1.3499, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.2298136645962732, |
|
"grad_norm": 4.392105984152295e-05, |
|
"learning_rate": 0.00016650447821284594, |
|
"loss": 1.3619, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"grad_norm": 4.838509266846813e-05, |
|
"learning_rate": 0.00016573300407801616, |
|
"loss": 1.3634, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 644, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8994607076972954e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|