|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.03305238803503553, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016526194017517766, |
|
"grad_norm": 0.555425226688385, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2272, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016526194017517766, |
|
"eval_loss": 1.8130699396133423, |
|
"eval_runtime": 42.0001, |
|
"eval_samples_per_second": 60.667, |
|
"eval_steps_per_second": 30.333, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003305238803503553, |
|
"grad_norm": 0.38278359174728394, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7691, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004957858205255329, |
|
"grad_norm": 0.2836111783981323, |
|
"learning_rate": 6e-05, |
|
"loss": 1.6578, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006610477607007106, |
|
"grad_norm": 0.4891590178012848, |
|
"learning_rate": 8e-05, |
|
"loss": 2.1334, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008263097008758883, |
|
"grad_norm": 0.3973068296909332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8242, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009915716410510659, |
|
"grad_norm": 0.3104502856731415, |
|
"learning_rate": 0.00012, |
|
"loss": 1.6421, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011568335812262437, |
|
"grad_norm": 0.4168192446231842, |
|
"learning_rate": 0.00014, |
|
"loss": 1.8231, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0013220955214014213, |
|
"grad_norm": 0.45680752396583557, |
|
"learning_rate": 0.00016, |
|
"loss": 1.7829, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.001487357461576599, |
|
"grad_norm": 0.40455353260040283, |
|
"learning_rate": 0.00018, |
|
"loss": 2.0192, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0016526194017517765, |
|
"grad_norm": 0.3940141499042511, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6667, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0018178813419269541, |
|
"grad_norm": 0.3571617603302002, |
|
"learning_rate": 0.0001999863304992469, |
|
"loss": 1.6667, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019831432821021317, |
|
"grad_norm": 0.5306565761566162, |
|
"learning_rate": 0.00019994532573409262, |
|
"loss": 1.767, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0021484052222773093, |
|
"grad_norm": 0.33301275968551636, |
|
"learning_rate": 0.00019987699691483048, |
|
"loss": 1.4221, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0023136671624524874, |
|
"grad_norm": 0.3087587058544159, |
|
"learning_rate": 0.00019978136272187747, |
|
"loss": 1.5223, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002478929102627665, |
|
"grad_norm": 0.5549050569534302, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 1.6935, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0026441910428028426, |
|
"grad_norm": 0.43276911973953247, |
|
"learning_rate": 0.00019950829025450114, |
|
"loss": 1.7447, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00280945298297802, |
|
"grad_norm": 0.41811099648475647, |
|
"learning_rate": 0.00019933092663536382, |
|
"loss": 1.6279, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002974714923153198, |
|
"grad_norm": 0.7340661287307739, |
|
"learning_rate": 0.00019912640693269752, |
|
"loss": 1.7762, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0031399768633283754, |
|
"grad_norm": 0.5529046654701233, |
|
"learning_rate": 0.00019889478706014687, |
|
"loss": 1.7278, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003305238803503553, |
|
"grad_norm": 0.5795073509216309, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 1.6349, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0034705007436787306, |
|
"grad_norm": 0.4024738073348999, |
|
"learning_rate": 0.00019835050748723824, |
|
"loss": 1.478, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0036357626838539082, |
|
"grad_norm": 0.5151705145835876, |
|
"learning_rate": 0.00019803799658748094, |
|
"loss": 1.3476, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0038010246240290863, |
|
"grad_norm": 0.47924888134002686, |
|
"learning_rate": 0.00019769868307835994, |
|
"loss": 1.6887, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0039662865642042635, |
|
"grad_norm": 0.414813756942749, |
|
"learning_rate": 0.0001973326597248006, |
|
"loss": 1.4679, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0041315485043794415, |
|
"grad_norm": 0.5144776701927185, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 1.6018, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004296810444554619, |
|
"grad_norm": 0.6902279853820801, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 1.6141, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004462072384729797, |
|
"grad_norm": 0.5526428818702698, |
|
"learning_rate": 0.00019607536761368484, |
|
"loss": 1.4361, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004627334324904975, |
|
"grad_norm": 0.9285733699798584, |
|
"learning_rate": 0.00019560357815343577, |
|
"loss": 1.9259, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004792596265080152, |
|
"grad_norm": 0.37774184346199036, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 1.4737, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00495785820525533, |
|
"grad_norm": 0.36778566241264343, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 1.5279, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005123120145430507, |
|
"grad_norm": 0.5695986151695251, |
|
"learning_rate": 0.00019403193901161613, |
|
"loss": 1.8606, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005288382085605685, |
|
"grad_norm": 0.3802303969860077, |
|
"learning_rate": 0.0001934564464599461, |
|
"loss": 1.6067, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005453644025780862, |
|
"grad_norm": 0.7587772011756897, |
|
"learning_rate": 0.00019285540384897073, |
|
"loss": 1.8386, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00561890596595604, |
|
"grad_norm": 0.6221739053726196, |
|
"learning_rate": 0.00019222897549773848, |
|
"loss": 1.4816, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.005784167906131218, |
|
"grad_norm": 0.583857536315918, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 1.6782, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005949429846306396, |
|
"grad_norm": 0.481245756149292, |
|
"learning_rate": 0.00019090065350491626, |
|
"loss": 1.4703, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006114691786481574, |
|
"grad_norm": 0.44734448194503784, |
|
"learning_rate": 0.00019019912301329592, |
|
"loss": 1.5667, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006279953726656751, |
|
"grad_norm": 0.4786554276943207, |
|
"learning_rate": 0.00018947293298207635, |
|
"loss": 1.5302, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006445215666831929, |
|
"grad_norm": 0.47823983430862427, |
|
"learning_rate": 0.0001887222819443612, |
|
"loss": 1.4859, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.006610477607007106, |
|
"grad_norm": 0.4842425286769867, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 1.3932, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006775739547182284, |
|
"grad_norm": 0.3999955356121063, |
|
"learning_rate": 0.00018714842436272773, |
|
"loss": 1.4406, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006941001487357461, |
|
"grad_norm": 0.4841754138469696, |
|
"learning_rate": 0.00018632564809575742, |
|
"loss": 1.4164, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007106263427532639, |
|
"grad_norm": 0.5468775033950806, |
|
"learning_rate": 0.0001854792712585539, |
|
"loss": 1.7234, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0072715253677078165, |
|
"grad_norm": 0.4681849479675293, |
|
"learning_rate": 0.00018460952524209355, |
|
"loss": 1.3656, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0074367873078829945, |
|
"grad_norm": 0.5926626920700073, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 1.5722, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.007602049248058173, |
|
"grad_norm": 0.5072199702262878, |
|
"learning_rate": 0.00018280088311480201, |
|
"loss": 1.4031, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.00776731118823335, |
|
"grad_norm": 0.48731309175491333, |
|
"learning_rate": 0.00018186248146866927, |
|
"loss": 1.4756, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007932573128408527, |
|
"grad_norm": 0.4783889949321747, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.0337, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008097835068583706, |
|
"grad_norm": 0.458065927028656, |
|
"learning_rate": 0.0001799187996894925, |
|
"loss": 1.5369, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.008263097008758883, |
|
"grad_norm": 0.44745975732803345, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 1.4635, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008263097008758883, |
|
"eval_loss": 1.5854825973510742, |
|
"eval_runtime": 42.2167, |
|
"eval_samples_per_second": 60.355, |
|
"eval_steps_per_second": 30.178, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00842835894893406, |
|
"grad_norm": 0.47436589002609253, |
|
"learning_rate": 0.00017788772787621126, |
|
"loss": 1.667, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008593620889109237, |
|
"grad_norm": 0.6580041646957397, |
|
"learning_rate": 0.00017684011108568592, |
|
"loss": 1.4318, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008758882829284416, |
|
"grad_norm": 0.5122596621513367, |
|
"learning_rate": 0.0001757714869760335, |
|
"loss": 1.5615, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.008924144769459593, |
|
"grad_norm": 0.5801290273666382, |
|
"learning_rate": 0.0001746821476984154, |
|
"loss": 1.3862, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.00908940670963477, |
|
"grad_norm": 0.6132497191429138, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 1.3698, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.00925466864980995, |
|
"grad_norm": 0.6059843301773071, |
|
"learning_rate": 0.00017244252047910892, |
|
"loss": 1.6441, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.009419930589985127, |
|
"grad_norm": 0.6696584224700928, |
|
"learning_rate": 0.00017129284482913972, |
|
"loss": 1.5325, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009585192530160304, |
|
"grad_norm": 0.6695738434791565, |
|
"learning_rate": 0.00017012367842724887, |
|
"loss": 1.3917, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.009750454470335481, |
|
"grad_norm": 0.5053813457489014, |
|
"learning_rate": 0.0001689353409118566, |
|
"loss": 1.597, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.00991571641051066, |
|
"grad_norm": 0.568986713886261, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 1.926, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010080978350685837, |
|
"grad_norm": 0.5120527148246765, |
|
"learning_rate": 0.0001665024572113848, |
|
"loss": 1.5176, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.010246240290861014, |
|
"grad_norm": 0.3892988860607147, |
|
"learning_rate": 0.00016525857615241687, |
|
"loss": 1.6132, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.010411502231036193, |
|
"grad_norm": 0.5233981609344482, |
|
"learning_rate": 0.00016399685405033167, |
|
"loss": 1.501, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01057676417121137, |
|
"grad_norm": 0.5761738419532776, |
|
"learning_rate": 0.0001627176358473537, |
|
"loss": 1.6147, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.010742026111386548, |
|
"grad_norm": 0.475273460149765, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 1.6735, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.010907288051561725, |
|
"grad_norm": 0.4451874792575836, |
|
"learning_rate": 0.00016010811472830252, |
|
"loss": 1.3603, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.011072549991736904, |
|
"grad_norm": 0.3606850504875183, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 1.2809, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01123781193191208, |
|
"grad_norm": 0.4811137020587921, |
|
"learning_rate": 0.00015743286626829437, |
|
"loss": 1.5322, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.011403073872087258, |
|
"grad_norm": 0.40435728430747986, |
|
"learning_rate": 0.0001560715057351673, |
|
"loss": 1.4096, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.011568335812262435, |
|
"grad_norm": 0.4875733554363251, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 1.694, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.011733597752437614, |
|
"grad_norm": 0.3643505871295929, |
|
"learning_rate": 0.0001533031728727994, |
|
"loss": 1.4144, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011898859692612791, |
|
"grad_norm": 0.5411165952682495, |
|
"learning_rate": 0.00015189695737812152, |
|
"loss": 1.5356, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.012064121632787968, |
|
"grad_norm": 0.48910924792289734, |
|
"learning_rate": 0.0001504765537734844, |
|
"loss": 1.4965, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.012229383572963147, |
|
"grad_norm": 0.3826967477798462, |
|
"learning_rate": 0.00014904235038305083, |
|
"loss": 1.5359, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.012394645513138325, |
|
"grad_norm": 0.6282126903533936, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 1.8296, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012559907453313502, |
|
"grad_norm": 0.444775253534317, |
|
"learning_rate": 0.0001461341162978688, |
|
"loss": 1.3698, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.012725169393488679, |
|
"grad_norm": 0.5965347290039062, |
|
"learning_rate": 0.00014466088068528068, |
|
"loss": 1.8349, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012890431333663858, |
|
"grad_norm": 0.6038627624511719, |
|
"learning_rate": 0.00014317543523384928, |
|
"loss": 1.4731, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.013055693273839035, |
|
"grad_norm": 0.5650402307510376, |
|
"learning_rate": 0.00014167818604952906, |
|
"loss": 1.5489, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.013220955214014212, |
|
"grad_norm": 0.6513015627861023, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 1.6598, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.013386217154189391, |
|
"grad_norm": 0.7023486495018005, |
|
"learning_rate": 0.00013864991692924523, |
|
"loss": 1.5978, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.013551479094364568, |
|
"grad_norm": 0.4741460680961609, |
|
"learning_rate": 0.00013711972489182208, |
|
"loss": 1.4441, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.013716741034539745, |
|
"grad_norm": 0.5267840623855591, |
|
"learning_rate": 0.00013557938469225167, |
|
"loss": 1.6758, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013882002974714923, |
|
"grad_norm": 0.4459191858768463, |
|
"learning_rate": 0.00013402931744416433, |
|
"loss": 1.6122, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.014047264914890101, |
|
"grad_norm": 0.4186781048774719, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 1.4066, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.014212526855065279, |
|
"grad_norm": 0.6138430833816528, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 1.6558, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.014377788795240456, |
|
"grad_norm": 0.5305865406990051, |
|
"learning_rate": 0.0001293250037384465, |
|
"loss": 1.5737, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.014543050735415633, |
|
"grad_norm": 0.44837602972984314, |
|
"learning_rate": 0.00012774029087618446, |
|
"loss": 1.4177, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.014708312675590812, |
|
"grad_norm": 0.3854394853115082, |
|
"learning_rate": 0.00012614799409538198, |
|
"loss": 1.3886, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014873574615765989, |
|
"grad_norm": 0.4300711154937744, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 1.4607, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.015038836555941166, |
|
"grad_norm": 0.5587463974952698, |
|
"learning_rate": 0.00012294239200467516, |
|
"loss": 1.5351, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.015204098496116345, |
|
"grad_norm": 0.5253020524978638, |
|
"learning_rate": 0.0001213299630743747, |
|
"loss": 1.3066, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.015369360436291522, |
|
"grad_norm": 0.46560612320899963, |
|
"learning_rate": 0.00011971170274514802, |
|
"loss": 1.5455, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0155346223764667, |
|
"grad_norm": 0.5804600715637207, |
|
"learning_rate": 0.000118088053433211, |
|
"loss": 1.8134, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.015699884316641877, |
|
"grad_norm": 0.4481876790523529, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 1.589, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.015865146256817054, |
|
"grad_norm": 0.43116191029548645, |
|
"learning_rate": 0.0001148263647711842, |
|
"loss": 1.601, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01603040819699223, |
|
"grad_norm": 0.6765596270561218, |
|
"learning_rate": 0.00011318921713420691, |
|
"loss": 1.6353, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.01619567013716741, |
|
"grad_norm": 0.37676534056663513, |
|
"learning_rate": 0.00011154846369695863, |
|
"loss": 1.5367, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01636093207734259, |
|
"grad_norm": 0.8401118516921997, |
|
"learning_rate": 0.0001099045530250463, |
|
"loss": 1.8154, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.016526194017517766, |
|
"grad_norm": 0.45617929100990295, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 1.2815, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016526194017517766, |
|
"eval_loss": 1.5545666217803955, |
|
"eval_runtime": 41.38, |
|
"eval_samples_per_second": 61.576, |
|
"eval_steps_per_second": 30.788, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016691455957692943, |
|
"grad_norm": 0.5702753663063049, |
|
"learning_rate": 0.00010660905843256994, |
|
"loss": 1.553, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.01685671789786812, |
|
"grad_norm": 0.5369312167167664, |
|
"learning_rate": 0.00010495837546732224, |
|
"loss": 1.5788, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.017021979838043298, |
|
"grad_norm": 0.4222307801246643, |
|
"learning_rate": 0.00010330633693173082, |
|
"loss": 1.5621, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.017187241778218475, |
|
"grad_norm": 0.5532189607620239, |
|
"learning_rate": 0.00010165339447663587, |
|
"loss": 1.4918, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.017352503718393655, |
|
"grad_norm": 0.7070478796958923, |
|
"learning_rate": 0.0001, |
|
"loss": 1.623, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.017517765658568833, |
|
"grad_norm": 0.6058260202407837, |
|
"learning_rate": 9.834660552336415e-05, |
|
"loss": 1.5853, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01768302759874401, |
|
"grad_norm": 0.3898620009422302, |
|
"learning_rate": 9.669366306826919e-05, |
|
"loss": 1.5686, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.017848289538919187, |
|
"grad_norm": 0.5478883385658264, |
|
"learning_rate": 9.504162453267777e-05, |
|
"loss": 1.5867, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.018013551479094364, |
|
"grad_norm": 0.47779756784439087, |
|
"learning_rate": 9.339094156743007e-05, |
|
"loss": 1.4921, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01817881341926954, |
|
"grad_norm": 0.8399332165718079, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 1.6164, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01834407535944472, |
|
"grad_norm": 0.5388432145118713, |
|
"learning_rate": 9.009544697495374e-05, |
|
"loss": 1.6265, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0185093372996199, |
|
"grad_norm": 0.5579386949539185, |
|
"learning_rate": 8.845153630304139e-05, |
|
"loss": 1.3753, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.018674599239795076, |
|
"grad_norm": 0.40802866220474243, |
|
"learning_rate": 8.681078286579311e-05, |
|
"loss": 1.3763, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.018839861179970253, |
|
"grad_norm": 0.552657425403595, |
|
"learning_rate": 8.517363522881579e-05, |
|
"loss": 1.3575, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01900512312014543, |
|
"grad_norm": 0.8166072964668274, |
|
"learning_rate": 8.35405409719266e-05, |
|
"loss": 1.6855, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.019170385060320608, |
|
"grad_norm": 0.8131512999534607, |
|
"learning_rate": 8.191194656678904e-05, |
|
"loss": 1.8306, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.019335647000495785, |
|
"grad_norm": 0.42067432403564453, |
|
"learning_rate": 8.028829725485199e-05, |
|
"loss": 1.5663, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.019500908940670962, |
|
"grad_norm": 0.5358683466911316, |
|
"learning_rate": 7.867003692562534e-05, |
|
"loss": 1.7055, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.019666170880846143, |
|
"grad_norm": 0.6525923013687134, |
|
"learning_rate": 7.705760799532485e-05, |
|
"loss": 1.3311, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.01983143282102132, |
|
"grad_norm": 0.6140702962875366, |
|
"learning_rate": 7.54514512859201e-05, |
|
"loss": 1.7929, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.019996694761196497, |
|
"grad_norm": 0.6280947923660278, |
|
"learning_rate": 7.385200590461803e-05, |
|
"loss": 1.7175, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.020161956701371674, |
|
"grad_norm": 0.686147153377533, |
|
"learning_rate": 7.225970912381556e-05, |
|
"loss": 1.6716, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.02032721864154685, |
|
"grad_norm": 0.5507709383964539, |
|
"learning_rate": 7.067499626155354e-05, |
|
"loss": 1.3555, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02049248058172203, |
|
"grad_norm": 0.5885040163993835, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 1.3161, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.020657742521897206, |
|
"grad_norm": 0.4891628623008728, |
|
"learning_rate": 6.753005307953167e-05, |
|
"loss": 1.4675, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.020823004462072386, |
|
"grad_norm": 0.8562346696853638, |
|
"learning_rate": 6.59706825558357e-05, |
|
"loss": 1.5319, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.020988266402247564, |
|
"grad_norm": 0.676142156124115, |
|
"learning_rate": 6.442061530774834e-05, |
|
"loss": 1.4428, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.02115352834242274, |
|
"grad_norm": 0.5416271686553955, |
|
"learning_rate": 6.28802751081779e-05, |
|
"loss": 1.4903, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.021318790282597918, |
|
"grad_norm": 0.4796253442764282, |
|
"learning_rate": 6.135008307075481e-05, |
|
"loss": 1.4259, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.021484052222773095, |
|
"grad_norm": 0.4909597635269165, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 1.3397, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.021649314162948272, |
|
"grad_norm": 0.5054872035980225, |
|
"learning_rate": 5.832181395047098e-05, |
|
"loss": 1.6404, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.02181457610312345, |
|
"grad_norm": 0.6180214285850525, |
|
"learning_rate": 5.6824564766150726e-05, |
|
"loss": 1.782, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.021979838043298627, |
|
"grad_norm": 0.657794177532196, |
|
"learning_rate": 5.533911931471936e-05, |
|
"loss": 1.5387, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.022145099983473807, |
|
"grad_norm": 0.5247397422790527, |
|
"learning_rate": 5.386588370213124e-05, |
|
"loss": 1.4955, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.022310361923648984, |
|
"grad_norm": 0.5147126913070679, |
|
"learning_rate": 5.240526069629265e-05, |
|
"loss": 1.4354, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02247562386382416, |
|
"grad_norm": 0.4486481249332428, |
|
"learning_rate": 5.095764961694922e-05, |
|
"loss": 1.3835, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02264088580399934, |
|
"grad_norm": 0.4644688367843628, |
|
"learning_rate": 4.952344622651566e-05, |
|
"loss": 1.5637, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.022806147744174516, |
|
"grad_norm": 0.7985265254974365, |
|
"learning_rate": 4.810304262187852e-05, |
|
"loss": 1.4771, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.022971409684349693, |
|
"grad_norm": 0.43993720412254333, |
|
"learning_rate": 4.669682712720065e-05, |
|
"loss": 1.4761, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02313667162452487, |
|
"grad_norm": 0.597682535648346, |
|
"learning_rate": 4.530518418775733e-05, |
|
"loss": 1.5009, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02330193356470005, |
|
"grad_norm": 0.46359845995903015, |
|
"learning_rate": 4.392849426483274e-05, |
|
"loss": 1.4938, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.023467195504875228, |
|
"grad_norm": 0.5101354718208313, |
|
"learning_rate": 4.256713373170564e-05, |
|
"loss": 1.4861, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.023632457445050405, |
|
"grad_norm": 0.5093466639518738, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 1.4807, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.023797719385225583, |
|
"grad_norm": 0.5195640325546265, |
|
"learning_rate": 3.9891885271697496e-05, |
|
"loss": 1.61, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02396298132540076, |
|
"grad_norm": 0.500438392162323, |
|
"learning_rate": 3.857872873103322e-05, |
|
"loss": 1.4983, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.024128243265575937, |
|
"grad_norm": 0.4799457788467407, |
|
"learning_rate": 3.7282364152646297e-05, |
|
"loss": 1.3141, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.024293505205751114, |
|
"grad_norm": 0.5169847011566162, |
|
"learning_rate": 3.600314594966834e-05, |
|
"loss": 1.7889, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.024458767145926295, |
|
"grad_norm": 0.49090850353240967, |
|
"learning_rate": 3.4741423847583134e-05, |
|
"loss": 1.6389, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.024624029086101472, |
|
"grad_norm": 0.6075248122215271, |
|
"learning_rate": 3.349754278861517e-05, |
|
"loss": 1.5403, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.02478929102627665, |
|
"grad_norm": 0.5191071033477783, |
|
"learning_rate": 3.227184283742591e-05, |
|
"loss": 1.3608, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02478929102627665, |
|
"eval_loss": 1.5432822704315186, |
|
"eval_runtime": 41.6111, |
|
"eval_samples_per_second": 61.234, |
|
"eval_steps_per_second": 30.617, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.024954552966451826, |
|
"grad_norm": 1.1631395816802979, |
|
"learning_rate": 3.106465908814342e-05, |
|
"loss": 1.8673, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.025119814906627003, |
|
"grad_norm": 0.5590987205505371, |
|
"learning_rate": 2.9876321572751144e-05, |
|
"loss": 1.5469, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.02528507684680218, |
|
"grad_norm": 0.49430692195892334, |
|
"learning_rate": 2.87071551708603e-05, |
|
"loss": 1.5041, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.025450338786977358, |
|
"grad_norm": 0.539833128452301, |
|
"learning_rate": 2.7557479520891104e-05, |
|
"loss": 1.5439, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.02561560072715254, |
|
"grad_norm": 0.5095410346984863, |
|
"learning_rate": 2.6427608932686843e-05, |
|
"loss": 1.6017, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.025780862667327716, |
|
"grad_norm": 0.4884549379348755, |
|
"learning_rate": 2.5317852301584643e-05, |
|
"loss": 1.3461, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.025946124607502893, |
|
"grad_norm": 0.4909934997558594, |
|
"learning_rate": 2.422851302396655e-05, |
|
"loss": 1.4771, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.02611138654767807, |
|
"grad_norm": 0.6615016460418701, |
|
"learning_rate": 2.315988891431412e-05, |
|
"loss": 1.6216, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.026276648487853247, |
|
"grad_norm": 0.5424089431762695, |
|
"learning_rate": 2.2112272123788768e-05, |
|
"loss": 1.4016, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.026441910428028424, |
|
"grad_norm": 0.4783364236354828, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 1.2278, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0266071723682036, |
|
"grad_norm": 0.4361153841018677, |
|
"learning_rate": 2.008120031050753e-05, |
|
"loss": 1.5001, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.026772434308378782, |
|
"grad_norm": 0.4618135392665863, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 1.5697, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.02693769624855396, |
|
"grad_norm": 0.5692691206932068, |
|
"learning_rate": 1.8137518531330767e-05, |
|
"loss": 1.7622, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.027102958188729136, |
|
"grad_norm": 0.7295346856117249, |
|
"learning_rate": 1.7199116885197995e-05, |
|
"loss": 1.8002, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.027268220128904314, |
|
"grad_norm": 0.6334387063980103, |
|
"learning_rate": 1.6283352173747145e-05, |
|
"loss": 1.3173, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02743348206907949, |
|
"grad_norm": 0.5129164457321167, |
|
"learning_rate": 1.5390474757906446e-05, |
|
"loss": 1.1693, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.027598744009254668, |
|
"grad_norm": 0.56026291847229, |
|
"learning_rate": 1.4520728741446089e-05, |
|
"loss": 1.4209, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.027764005949429845, |
|
"grad_norm": 0.692166805267334, |
|
"learning_rate": 1.3674351904242611e-05, |
|
"loss": 1.315, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.027929267889605022, |
|
"grad_norm": 0.595594048500061, |
|
"learning_rate": 1.2851575637272262e-05, |
|
"loss": 1.4176, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.028094529829780203, |
|
"grad_norm": 0.36242327094078064, |
|
"learning_rate": 1.2052624879351104e-05, |
|
"loss": 1.6176, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02825979176995538, |
|
"grad_norm": 0.4128834307193756, |
|
"learning_rate": 1.1277718055638819e-05, |
|
"loss": 1.4342, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.028425053710130557, |
|
"grad_norm": 0.533230185508728, |
|
"learning_rate": 1.0527067017923654e-05, |
|
"loss": 1.6252, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.028590315650305734, |
|
"grad_norm": 0.5379830598831177, |
|
"learning_rate": 9.80087698670411e-06, |
|
"loss": 1.3614, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.02875557759048091, |
|
"grad_norm": 0.5856308341026306, |
|
"learning_rate": 9.09934649508375e-06, |
|
"loss": 1.4459, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.02892083953065609, |
|
"grad_norm": 0.5911732316017151, |
|
"learning_rate": 8.422667334494249e-06, |
|
"loss": 1.4906, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.029086101470831266, |
|
"grad_norm": 0.8282976150512695, |
|
"learning_rate": 7.771024502261526e-06, |
|
"loss": 1.81, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.029251363411006447, |
|
"grad_norm": 0.5876789689064026, |
|
"learning_rate": 7.144596151029303e-06, |
|
"loss": 1.4096, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.029416625351181624, |
|
"grad_norm": 0.5349177122116089, |
|
"learning_rate": 6.543553540053926e-06, |
|
"loss": 1.4491, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0295818872913568, |
|
"grad_norm": 0.6630465388298035, |
|
"learning_rate": 5.968060988383883e-06, |
|
"loss": 1.3134, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.029747149231531978, |
|
"grad_norm": 0.6198413968086243, |
|
"learning_rate": 5.418275829936537e-06, |
|
"loss": 1.746, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.029912411171707155, |
|
"grad_norm": 0.4724350571632385, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 1.4524, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.030077673111882332, |
|
"grad_norm": 0.5511718988418579, |
|
"learning_rate": 4.3964218465642355e-06, |
|
"loss": 1.395, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.03024293505205751, |
|
"grad_norm": 0.6854819059371948, |
|
"learning_rate": 3.924632386315186e-06, |
|
"loss": 1.6976, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.03040819699223269, |
|
"grad_norm": 0.5091037750244141, |
|
"learning_rate": 3.4791089722651436e-06, |
|
"loss": 1.2758, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.030573458932407867, |
|
"grad_norm": 0.5180460810661316, |
|
"learning_rate": 3.059973406066963e-06, |
|
"loss": 1.4986, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.030738720872583045, |
|
"grad_norm": 0.8126318454742432, |
|
"learning_rate": 2.667340275199426e-06, |
|
"loss": 1.7087, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.030903982812758222, |
|
"grad_norm": 0.567361056804657, |
|
"learning_rate": 2.3013169216400733e-06, |
|
"loss": 1.5864, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0310692447529334, |
|
"grad_norm": 0.6181171536445618, |
|
"learning_rate": 1.9620034125190644e-06, |
|
"loss": 1.6582, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.031234506693108576, |
|
"grad_norm": 0.4644870460033417, |
|
"learning_rate": 1.6494925127617634e-06, |
|
"loss": 1.3485, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03139976863328375, |
|
"grad_norm": 0.6458631753921509, |
|
"learning_rate": 1.3638696597277679e-06, |
|
"loss": 1.6619, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.031565030573458934, |
|
"grad_norm": 0.6136733293533325, |
|
"learning_rate": 1.1052129398531507e-06, |
|
"loss": 1.453, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03173029251363411, |
|
"grad_norm": 0.5048571825027466, |
|
"learning_rate": 8.735930673024806e-07, |
|
"loss": 1.7202, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03189555445380929, |
|
"grad_norm": 0.4640374481678009, |
|
"learning_rate": 6.690733646361857e-07, |
|
"loss": 1.4326, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.03206081639398446, |
|
"grad_norm": 0.8511689901351929, |
|
"learning_rate": 4.917097454988584e-07, |
|
"loss": 1.701, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03222607833415964, |
|
"grad_norm": 0.6593170166015625, |
|
"learning_rate": 3.415506993330153e-07, |
|
"loss": 1.2468, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03239134027433482, |
|
"grad_norm": 0.5053160190582275, |
|
"learning_rate": 2.1863727812254653e-07, |
|
"loss": 1.7233, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03255660221451, |
|
"grad_norm": 0.5864344239234924, |
|
"learning_rate": 1.230030851695263e-07, |
|
"loss": 1.5125, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03272186415468518, |
|
"grad_norm": 0.5440968871116638, |
|
"learning_rate": 5.467426590739511e-08, |
|
"loss": 1.5666, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03288712609486035, |
|
"grad_norm": 0.47717025876045227, |
|
"learning_rate": 1.3669500753099585e-08, |
|
"loss": 1.458, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03305238803503553, |
|
"grad_norm": 0.5752529501914978, |
|
"learning_rate": 0.0, |
|
"loss": 1.4294, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03305238803503553, |
|
"eval_loss": 1.540518879890442, |
|
"eval_runtime": 42.0573, |
|
"eval_samples_per_second": 60.584, |
|
"eval_steps_per_second": 30.292, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5243735554129920.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|