|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2408187838651415, |
|
"eval_steps": 200, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0012040939193257074, |
|
"grad_norm": 4.470886707305908, |
|
"learning_rate": 1.9975915221579964e-05, |
|
"loss": 2.6688, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002408187838651415, |
|
"grad_norm": 4.067092418670654, |
|
"learning_rate": 1.9951830443159926e-05, |
|
"loss": 2.2509, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003612281757977122, |
|
"grad_norm": 4.133108615875244, |
|
"learning_rate": 1.9927745664739885e-05, |
|
"loss": 2.3732, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00481637567730283, |
|
"grad_norm": 3.4890763759613037, |
|
"learning_rate": 1.9903660886319848e-05, |
|
"loss": 2.3632, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006020469596628537, |
|
"grad_norm": 4.1045308113098145, |
|
"learning_rate": 1.987957610789981e-05, |
|
"loss": 2.5203, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007224563515954244, |
|
"grad_norm": 4.26784086227417, |
|
"learning_rate": 1.985549132947977e-05, |
|
"loss": 2.3349, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008428657435279952, |
|
"grad_norm": 4.144766330718994, |
|
"learning_rate": 1.983140655105973e-05, |
|
"loss": 2.4106, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00963275135460566, |
|
"grad_norm": 3.9538934230804443, |
|
"learning_rate": 1.9807321772639694e-05, |
|
"loss": 2.3192, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010836845273931367, |
|
"grad_norm": 3.9219865798950195, |
|
"learning_rate": 1.9783236994219656e-05, |
|
"loss": 2.2012, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.012040939193257074, |
|
"grad_norm": 3.391493797302246, |
|
"learning_rate": 1.9759152215799615e-05, |
|
"loss": 2.4064, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013245033112582781, |
|
"grad_norm": 4.393350124359131, |
|
"learning_rate": 1.9735067437379577e-05, |
|
"loss": 2.1513, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.014449127031908489, |
|
"grad_norm": 3.6243207454681396, |
|
"learning_rate": 1.971098265895954e-05, |
|
"loss": 2.2788, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.015653220951234198, |
|
"grad_norm": 3.642468214035034, |
|
"learning_rate": 1.9686897880539502e-05, |
|
"loss": 2.2244, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.016857314870559904, |
|
"grad_norm": 4.0894670486450195, |
|
"learning_rate": 1.966281310211946e-05, |
|
"loss": 2.3622, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.018061408789885613, |
|
"grad_norm": 4.033819198608398, |
|
"learning_rate": 1.9638728323699423e-05, |
|
"loss": 2.3803, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01926550270921132, |
|
"grad_norm": 3.535987138748169, |
|
"learning_rate": 1.9614643545279385e-05, |
|
"loss": 2.2161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.020469596628537028, |
|
"grad_norm": 3.541868209838867, |
|
"learning_rate": 1.9590558766859348e-05, |
|
"loss": 2.1721, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.021673690547862733, |
|
"grad_norm": 4.147072792053223, |
|
"learning_rate": 1.9566473988439307e-05, |
|
"loss": 2.3239, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.022877784467188442, |
|
"grad_norm": 3.4949986934661865, |
|
"learning_rate": 1.954238921001927e-05, |
|
"loss": 2.265, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.024081878386514148, |
|
"grad_norm": 3.793950319290161, |
|
"learning_rate": 1.951830443159923e-05, |
|
"loss": 2.152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025285972305839857, |
|
"grad_norm": 3.9355053901672363, |
|
"learning_rate": 1.949421965317919e-05, |
|
"loss": 2.2534, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.026490066225165563, |
|
"grad_norm": 3.255175828933716, |
|
"learning_rate": 1.9470134874759156e-05, |
|
"loss": 2.2971, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.027694160144491272, |
|
"grad_norm": 3.650298595428467, |
|
"learning_rate": 1.9446050096339115e-05, |
|
"loss": 2.1228, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.028898254063816978, |
|
"grad_norm": 3.1906814575195312, |
|
"learning_rate": 1.9421965317919077e-05, |
|
"loss": 2.0995, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.030102347983142687, |
|
"grad_norm": 3.8122494220733643, |
|
"learning_rate": 1.939788053949904e-05, |
|
"loss": 2.1651, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.031306441902468396, |
|
"grad_norm": 3.8269336223602295, |
|
"learning_rate": 1.9373795761079e-05, |
|
"loss": 2.1731, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0325105358217941, |
|
"grad_norm": 3.75238037109375, |
|
"learning_rate": 1.934971098265896e-05, |
|
"loss": 2.3071, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.03371462974111981, |
|
"grad_norm": 3.538330078125, |
|
"learning_rate": 1.9325626204238923e-05, |
|
"loss": 2.3015, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.034918723660445516, |
|
"grad_norm": 3.497131586074829, |
|
"learning_rate": 1.9301541425818882e-05, |
|
"loss": 2.128, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.036122817579771226, |
|
"grad_norm": 3.6173276901245117, |
|
"learning_rate": 1.9277456647398845e-05, |
|
"loss": 2.1792, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03732691149909693, |
|
"grad_norm": 3.2987892627716064, |
|
"learning_rate": 1.9253371868978807e-05, |
|
"loss": 2.0246, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03853100541842264, |
|
"grad_norm": 3.1787831783294678, |
|
"learning_rate": 1.922928709055877e-05, |
|
"loss": 2.2108, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.039735099337748346, |
|
"grad_norm": 3.5422236919403076, |
|
"learning_rate": 1.920520231213873e-05, |
|
"loss": 2.1738, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.040939193257074055, |
|
"grad_norm": 3.7987539768218994, |
|
"learning_rate": 1.918111753371869e-05, |
|
"loss": 2.1161, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04214328717639976, |
|
"grad_norm": 3.2058522701263428, |
|
"learning_rate": 1.9157032755298653e-05, |
|
"loss": 2.0808, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04334738109572547, |
|
"grad_norm": 3.00519061088562, |
|
"learning_rate": 1.9132947976878615e-05, |
|
"loss": 2.1412, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.044551475015051176, |
|
"grad_norm": 3.4471330642700195, |
|
"learning_rate": 1.9108863198458578e-05, |
|
"loss": 2.1695, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.045755568934376885, |
|
"grad_norm": 3.394496440887451, |
|
"learning_rate": 1.9084778420038536e-05, |
|
"loss": 1.9532, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04695966285370259, |
|
"grad_norm": 3.03004789352417, |
|
"learning_rate": 1.90606936416185e-05, |
|
"loss": 2.0659, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.048163756773028296, |
|
"grad_norm": 3.4260365962982178, |
|
"learning_rate": 1.903660886319846e-05, |
|
"loss": 2.0792, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.048163756773028296, |
|
"eval_loss": 2.1430513858795166, |
|
"eval_runtime": 16.4051, |
|
"eval_samples_per_second": 6.096, |
|
"eval_steps_per_second": 0.792, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.049367850692354005, |
|
"grad_norm": 4.670680999755859, |
|
"learning_rate": 1.901252408477842e-05, |
|
"loss": 2.0952, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.050571944611679714, |
|
"grad_norm": 3.510042667388916, |
|
"learning_rate": 1.8988439306358382e-05, |
|
"loss": 2.195, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05177603853100542, |
|
"grad_norm": 3.0459847450256348, |
|
"learning_rate": 1.8964354527938345e-05, |
|
"loss": 2.2117, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.052980132450331126, |
|
"grad_norm": 4.36016321182251, |
|
"learning_rate": 1.8940269749518304e-05, |
|
"loss": 2.1191, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.054184226369656835, |
|
"grad_norm": 3.0498242378234863, |
|
"learning_rate": 1.891618497109827e-05, |
|
"loss": 2.0838, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.055388320288982544, |
|
"grad_norm": 3.218038558959961, |
|
"learning_rate": 1.889210019267823e-05, |
|
"loss": 2.1118, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.056592414208308246, |
|
"grad_norm": 3.3144683837890625, |
|
"learning_rate": 1.886801541425819e-05, |
|
"loss": 2.2176, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.057796508127633955, |
|
"grad_norm": 3.2364652156829834, |
|
"learning_rate": 1.8843930635838153e-05, |
|
"loss": 2.112, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.059000602046959665, |
|
"grad_norm": 3.291278839111328, |
|
"learning_rate": 1.8819845857418112e-05, |
|
"loss": 2.144, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.060204695966285374, |
|
"grad_norm": 3.65297794342041, |
|
"learning_rate": 1.8795761078998074e-05, |
|
"loss": 2.2597, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.061408789885611076, |
|
"grad_norm": 3.2321982383728027, |
|
"learning_rate": 1.8771676300578037e-05, |
|
"loss": 2.1618, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.06261288380493679, |
|
"grad_norm": 3.352842330932617, |
|
"learning_rate": 1.8747591522158e-05, |
|
"loss": 2.006, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0638169777242625, |
|
"grad_norm": 3.5657215118408203, |
|
"learning_rate": 1.8723506743737958e-05, |
|
"loss": 2.2253, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.0650210716435882, |
|
"grad_norm": 3.060060739517212, |
|
"learning_rate": 1.869942196531792e-05, |
|
"loss": 2.1187, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 3.473719835281372, |
|
"learning_rate": 1.8675337186897883e-05, |
|
"loss": 2.0299, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06742925948223961, |
|
"grad_norm": 3.1167919635772705, |
|
"learning_rate": 1.8651252408477845e-05, |
|
"loss": 2.0381, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06863335340156532, |
|
"grad_norm": 3.815816640853882, |
|
"learning_rate": 1.8627167630057804e-05, |
|
"loss": 2.1624, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.06983744732089103, |
|
"grad_norm": 3.2820959091186523, |
|
"learning_rate": 1.8603082851637766e-05, |
|
"loss": 2.0819, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07104154124021674, |
|
"grad_norm": 3.568885087966919, |
|
"learning_rate": 1.857899807321773e-05, |
|
"loss": 2.0749, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.07224563515954245, |
|
"grad_norm": 3.424076795578003, |
|
"learning_rate": 1.855491329479769e-05, |
|
"loss": 2.129, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07344972907886815, |
|
"grad_norm": 3.2800493240356445, |
|
"learning_rate": 1.853082851637765e-05, |
|
"loss": 2.2067, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.07465382299819386, |
|
"grad_norm": 3.487868547439575, |
|
"learning_rate": 1.8506743737957612e-05, |
|
"loss": 2.124, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07585791691751957, |
|
"grad_norm": 3.3999245166778564, |
|
"learning_rate": 1.8482658959537575e-05, |
|
"loss": 1.9888, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.07706201083684527, |
|
"grad_norm": 3.973482370376587, |
|
"learning_rate": 1.8458574181117533e-05, |
|
"loss": 2.0592, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07826610475617098, |
|
"grad_norm": 2.9601657390594482, |
|
"learning_rate": 1.8434489402697496e-05, |
|
"loss": 2.1022, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07947019867549669, |
|
"grad_norm": 3.260118246078491, |
|
"learning_rate": 1.8410404624277458e-05, |
|
"loss": 1.9763, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.0806742925948224, |
|
"grad_norm": 3.509838819503784, |
|
"learning_rate": 1.838631984585742e-05, |
|
"loss": 2.0284, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.08187838651414811, |
|
"grad_norm": 4.363494396209717, |
|
"learning_rate": 1.8362235067437383e-05, |
|
"loss": 2.0479, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08308248043347381, |
|
"grad_norm": 3.2578630447387695, |
|
"learning_rate": 1.8338150289017342e-05, |
|
"loss": 2.0488, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.08428657435279951, |
|
"grad_norm": 3.2846531867980957, |
|
"learning_rate": 1.8314065510597304e-05, |
|
"loss": 2.0876, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08549066827212523, |
|
"grad_norm": 3.3275203704833984, |
|
"learning_rate": 1.8289980732177266e-05, |
|
"loss": 2.0564, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.08669476219145093, |
|
"grad_norm": 3.1368625164031982, |
|
"learning_rate": 1.8265895953757225e-05, |
|
"loss": 2.1533, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08789885611077664, |
|
"grad_norm": 3.3824191093444824, |
|
"learning_rate": 1.8241811175337188e-05, |
|
"loss": 2.1821, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.08910295003010235, |
|
"grad_norm": 3.5150134563446045, |
|
"learning_rate": 1.821772639691715e-05, |
|
"loss": 2.0292, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09030704394942805, |
|
"grad_norm": 3.421921730041504, |
|
"learning_rate": 1.8193641618497112e-05, |
|
"loss": 1.9862, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.09151113786875377, |
|
"grad_norm": 3.616887092590332, |
|
"learning_rate": 1.8169556840077075e-05, |
|
"loss": 2.0158, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09271523178807947, |
|
"grad_norm": 5.063056945800781, |
|
"learning_rate": 1.8145472061657034e-05, |
|
"loss": 2.0579, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.09391932570740517, |
|
"grad_norm": 3.5242559909820557, |
|
"learning_rate": 1.8121387283236996e-05, |
|
"loss": 2.0272, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09512341962673089, |
|
"grad_norm": 3.2852962017059326, |
|
"learning_rate": 1.809730250481696e-05, |
|
"loss": 2.077, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.09632751354605659, |
|
"grad_norm": 3.710927963256836, |
|
"learning_rate": 1.8073217726396917e-05, |
|
"loss": 2.1271, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09632751354605659, |
|
"eval_loss": 2.0655810832977295, |
|
"eval_runtime": 16.3755, |
|
"eval_samples_per_second": 6.107, |
|
"eval_steps_per_second": 0.794, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0975316074653823, |
|
"grad_norm": 3.5019216537475586, |
|
"learning_rate": 1.804913294797688e-05, |
|
"loss": 2.1081, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.09873570138470801, |
|
"grad_norm": 3.5533690452575684, |
|
"learning_rate": 1.8025048169556842e-05, |
|
"loss": 2.0751, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09993979530403371, |
|
"grad_norm": 3.4970240592956543, |
|
"learning_rate": 1.8000963391136804e-05, |
|
"loss": 2.066, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.10114388922335943, |
|
"grad_norm": 3.0926427841186523, |
|
"learning_rate": 1.7976878612716763e-05, |
|
"loss": 2.0516, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10234798314268513, |
|
"grad_norm": 3.747452974319458, |
|
"learning_rate": 1.7952793834296726e-05, |
|
"loss": 2.0721, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.10355207706201083, |
|
"grad_norm": 3.3113677501678467, |
|
"learning_rate": 1.7928709055876688e-05, |
|
"loss": 2.1527, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10475617098133655, |
|
"grad_norm": 3.357912063598633, |
|
"learning_rate": 1.7904624277456647e-05, |
|
"loss": 2.0113, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.10596026490066225, |
|
"grad_norm": 3.023893356323242, |
|
"learning_rate": 1.7880539499036613e-05, |
|
"loss": 2.1332, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10716435881998795, |
|
"grad_norm": 3.3027355670928955, |
|
"learning_rate": 1.785645472061657e-05, |
|
"loss": 1.9699, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.10836845273931367, |
|
"grad_norm": 5.3524932861328125, |
|
"learning_rate": 1.7832369942196534e-05, |
|
"loss": 2.0182, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10957254665863937, |
|
"grad_norm": 3.200258731842041, |
|
"learning_rate": 1.7808285163776496e-05, |
|
"loss": 2.007, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.11077664057796509, |
|
"grad_norm": 3.286268949508667, |
|
"learning_rate": 1.7784200385356455e-05, |
|
"loss": 2.0907, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11198073449729079, |
|
"grad_norm": 3.15291428565979, |
|
"learning_rate": 1.7760115606936417e-05, |
|
"loss": 2.0468, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.11318482841661649, |
|
"grad_norm": 3.3798069953918457, |
|
"learning_rate": 1.773603082851638e-05, |
|
"loss": 1.9927, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11438892233594221, |
|
"grad_norm": 3.4220967292785645, |
|
"learning_rate": 1.771194605009634e-05, |
|
"loss": 2.1326, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.11559301625526791, |
|
"grad_norm": 3.379628896713257, |
|
"learning_rate": 1.76878612716763e-05, |
|
"loss": 1.9202, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11679711017459361, |
|
"grad_norm": 3.3020846843719482, |
|
"learning_rate": 1.7663776493256263e-05, |
|
"loss": 2.1176, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.11800120409391933, |
|
"grad_norm": 3.2711665630340576, |
|
"learning_rate": 1.7639691714836226e-05, |
|
"loss": 2.0865, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11920529801324503, |
|
"grad_norm": 3.239253520965576, |
|
"learning_rate": 1.7615606936416188e-05, |
|
"loss": 1.9284, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.12040939193257075, |
|
"grad_norm": 3.4960460662841797, |
|
"learning_rate": 1.7591522157996147e-05, |
|
"loss": 2.0088, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12161348585189645, |
|
"grad_norm": 3.337407350540161, |
|
"learning_rate": 1.756743737957611e-05, |
|
"loss": 1.9687, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.12281757977122215, |
|
"grad_norm": 3.534827709197998, |
|
"learning_rate": 1.754335260115607e-05, |
|
"loss": 2.0273, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12402167369054787, |
|
"grad_norm": 3.6207938194274902, |
|
"learning_rate": 1.7519267822736034e-05, |
|
"loss": 2.0458, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.12522576760987358, |
|
"grad_norm": 3.396012544631958, |
|
"learning_rate": 1.7495183044315993e-05, |
|
"loss": 1.9185, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12642986152919927, |
|
"grad_norm": 3.001236915588379, |
|
"learning_rate": 1.7471098265895955e-05, |
|
"loss": 1.9407, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.127633955448525, |
|
"grad_norm": 3.1318376064300537, |
|
"learning_rate": 1.7447013487475918e-05, |
|
"loss": 1.8984, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1288380493678507, |
|
"grad_norm": 3.4541585445404053, |
|
"learning_rate": 1.7422928709055877e-05, |
|
"loss": 1.8846, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.1300421432871764, |
|
"grad_norm": 3.311082363128662, |
|
"learning_rate": 1.739884393063584e-05, |
|
"loss": 2.0015, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1312462372065021, |
|
"grad_norm": 3.2366561889648438, |
|
"learning_rate": 1.73747591522158e-05, |
|
"loss": 2.0176, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 3.123307943344116, |
|
"learning_rate": 1.735067437379576e-05, |
|
"loss": 1.9731, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1336544250451535, |
|
"grad_norm": 3.776921033859253, |
|
"learning_rate": 1.7326589595375726e-05, |
|
"loss": 2.0484, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.13485851896447923, |
|
"grad_norm": 2.959716796875, |
|
"learning_rate": 1.7302504816955685e-05, |
|
"loss": 1.9689, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13606261288380495, |
|
"grad_norm": 3.527384042739868, |
|
"learning_rate": 1.7278420038535647e-05, |
|
"loss": 1.9488, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.13726670680313063, |
|
"grad_norm": 3.0703189373016357, |
|
"learning_rate": 1.725433526011561e-05, |
|
"loss": 2.1226, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13847080072245635, |
|
"grad_norm": 3.1028363704681396, |
|
"learning_rate": 1.723025048169557e-05, |
|
"loss": 1.8966, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13967489464178207, |
|
"grad_norm": 3.340517044067383, |
|
"learning_rate": 1.720616570327553e-05, |
|
"loss": 2.2156, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14087898856110775, |
|
"grad_norm": 3.2740213871002197, |
|
"learning_rate": 1.7182080924855493e-05, |
|
"loss": 1.9445, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.14208308248043347, |
|
"grad_norm": 3.240690231323242, |
|
"learning_rate": 1.7157996146435455e-05, |
|
"loss": 2.0295, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1432871763997592, |
|
"grad_norm": 3.821340799331665, |
|
"learning_rate": 1.7133911368015418e-05, |
|
"loss": 2.1401, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.1444912703190849, |
|
"grad_norm": 3.103550910949707, |
|
"learning_rate": 1.7109826589595377e-05, |
|
"loss": 2.1164, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1444912703190849, |
|
"eval_loss": 2.017059564590454, |
|
"eval_runtime": 16.4324, |
|
"eval_samples_per_second": 6.086, |
|
"eval_steps_per_second": 0.791, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1456953642384106, |
|
"grad_norm": 3.4971117973327637, |
|
"learning_rate": 1.708574181117534e-05, |
|
"loss": 1.9864, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.1468994581577363, |
|
"grad_norm": 3.324803590774536, |
|
"learning_rate": 1.70616570327553e-05, |
|
"loss": 2.0402, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14810355207706202, |
|
"grad_norm": 3.302614450454712, |
|
"learning_rate": 1.703757225433526e-05, |
|
"loss": 1.9494, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.1493076459963877, |
|
"grad_norm": 3.3090734481811523, |
|
"learning_rate": 1.7013487475915223e-05, |
|
"loss": 2.0748, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15051173991571343, |
|
"grad_norm": 3.559049129486084, |
|
"learning_rate": 1.6989402697495185e-05, |
|
"loss": 1.9038, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.15171583383503914, |
|
"grad_norm": 3.5149178504943848, |
|
"learning_rate": 1.6965317919075147e-05, |
|
"loss": 2.0261, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15291992775436483, |
|
"grad_norm": 3.835693120956421, |
|
"learning_rate": 1.6941233140655106e-05, |
|
"loss": 1.9453, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.15412402167369055, |
|
"grad_norm": 3.521132469177246, |
|
"learning_rate": 1.691714836223507e-05, |
|
"loss": 2.0138, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15532811559301626, |
|
"grad_norm": 3.2369840145111084, |
|
"learning_rate": 1.689306358381503e-05, |
|
"loss": 2.0285, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.15653220951234195, |
|
"grad_norm": 3.1592392921447754, |
|
"learning_rate": 1.686897880539499e-05, |
|
"loss": 1.9912, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15773630343166767, |
|
"grad_norm": 3.2069106101989746, |
|
"learning_rate": 1.6844894026974952e-05, |
|
"loss": 2.0159, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.15894039735099338, |
|
"grad_norm": 3.318230390548706, |
|
"learning_rate": 1.6820809248554915e-05, |
|
"loss": 2.0412, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16014449127031907, |
|
"grad_norm": 3.549443244934082, |
|
"learning_rate": 1.6796724470134877e-05, |
|
"loss": 2.0014, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.1613485851896448, |
|
"grad_norm": 3.32999324798584, |
|
"learning_rate": 1.677263969171484e-05, |
|
"loss": 2.0303, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.1625526791089705, |
|
"grad_norm": 3.262946367263794, |
|
"learning_rate": 1.6748554913294798e-05, |
|
"loss": 1.9883, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.16375677302829622, |
|
"grad_norm": 3.484685182571411, |
|
"learning_rate": 1.672447013487476e-05, |
|
"loss": 1.9695, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1649608669476219, |
|
"grad_norm": 3.4177358150482178, |
|
"learning_rate": 1.6700385356454723e-05, |
|
"loss": 2.0088, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.16616496086694763, |
|
"grad_norm": 3.447498321533203, |
|
"learning_rate": 1.6676300578034682e-05, |
|
"loss": 2.0813, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16736905478627334, |
|
"grad_norm": 3.152740240097046, |
|
"learning_rate": 1.6652215799614644e-05, |
|
"loss": 1.9988, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.16857314870559903, |
|
"grad_norm": 3.8948824405670166, |
|
"learning_rate": 1.6628131021194607e-05, |
|
"loss": 2.0801, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16977724262492475, |
|
"grad_norm": 3.81358003616333, |
|
"learning_rate": 1.660404624277457e-05, |
|
"loss": 1.944, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.17098133654425046, |
|
"grad_norm": 2.980236053466797, |
|
"learning_rate": 1.657996146435453e-05, |
|
"loss": 1.9151, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17218543046357615, |
|
"grad_norm": 3.041680335998535, |
|
"learning_rate": 1.655587668593449e-05, |
|
"loss": 1.9486, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.17338952438290187, |
|
"grad_norm": 2.898974657058716, |
|
"learning_rate": 1.6531791907514452e-05, |
|
"loss": 2.1119, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17459361830222758, |
|
"grad_norm": 3.161224603652954, |
|
"learning_rate": 1.6507707129094415e-05, |
|
"loss": 2.036, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.17579771222155327, |
|
"grad_norm": 3.2449426651000977, |
|
"learning_rate": 1.6483622350674374e-05, |
|
"loss": 2.0635, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.177001806140879, |
|
"grad_norm": 3.2805328369140625, |
|
"learning_rate": 1.6459537572254336e-05, |
|
"loss": 1.8022, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1782059000602047, |
|
"grad_norm": 3.491149663925171, |
|
"learning_rate": 1.64354527938343e-05, |
|
"loss": 1.9832, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1794099939795304, |
|
"grad_norm": 3.423267126083374, |
|
"learning_rate": 1.641136801541426e-05, |
|
"loss": 1.9574, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.1806140878988561, |
|
"grad_norm": 3.1914217472076416, |
|
"learning_rate": 1.638728323699422e-05, |
|
"loss": 1.9283, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 3.2903149127960205, |
|
"learning_rate": 1.6363198458574182e-05, |
|
"loss": 2.1174, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.18302227573750754, |
|
"grad_norm": 3.113159656524658, |
|
"learning_rate": 1.6339113680154144e-05, |
|
"loss": 1.8794, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18422636965683323, |
|
"grad_norm": 3.261596918106079, |
|
"learning_rate": 1.6315028901734103e-05, |
|
"loss": 2.0853, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.18543046357615894, |
|
"grad_norm": 2.9525296688079834, |
|
"learning_rate": 1.629094412331407e-05, |
|
"loss": 1.8622, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.18663455749548466, |
|
"grad_norm": 3.2103638648986816, |
|
"learning_rate": 1.6266859344894028e-05, |
|
"loss": 2.0536, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.18783865141481035, |
|
"grad_norm": 3.5312676429748535, |
|
"learning_rate": 1.624277456647399e-05, |
|
"loss": 1.9387, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18904274533413606, |
|
"grad_norm": 3.277223825454712, |
|
"learning_rate": 1.6218689788053953e-05, |
|
"loss": 1.9804, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.19024683925346178, |
|
"grad_norm": 3.207287549972534, |
|
"learning_rate": 1.619460500963391e-05, |
|
"loss": 1.8782, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19145093317278747, |
|
"grad_norm": 3.401834487915039, |
|
"learning_rate": 1.6170520231213874e-05, |
|
"loss": 1.9383, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.19265502709211318, |
|
"grad_norm": 3.5186078548431396, |
|
"learning_rate": 1.6146435452793836e-05, |
|
"loss": 2.0963, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19265502709211318, |
|
"eval_loss": 1.979533076286316, |
|
"eval_runtime": 16.4348, |
|
"eval_samples_per_second": 6.085, |
|
"eval_steps_per_second": 0.791, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1938591210114389, |
|
"grad_norm": 3.0080161094665527, |
|
"learning_rate": 1.6122350674373795e-05, |
|
"loss": 1.951, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.1950632149307646, |
|
"grad_norm": 3.124155044555664, |
|
"learning_rate": 1.6098265895953758e-05, |
|
"loss": 1.8663, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1962673088500903, |
|
"grad_norm": 3.6262383460998535, |
|
"learning_rate": 1.607418111753372e-05, |
|
"loss": 1.9478, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.19747140276941602, |
|
"grad_norm": 3.3047947883605957, |
|
"learning_rate": 1.6050096339113682e-05, |
|
"loss": 1.9203, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 3.0261447429656982, |
|
"learning_rate": 1.6026011560693645e-05, |
|
"loss": 1.8988, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.19987959060806743, |
|
"grad_norm": 4.233884334564209, |
|
"learning_rate": 1.6001926782273604e-05, |
|
"loss": 2.0327, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20108368452739314, |
|
"grad_norm": 2.9169118404388428, |
|
"learning_rate": 1.5977842003853566e-05, |
|
"loss": 1.9264, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.20228777844671886, |
|
"grad_norm": 3.0078282356262207, |
|
"learning_rate": 1.5953757225433528e-05, |
|
"loss": 1.8821, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20349187236604455, |
|
"grad_norm": 3.4188835620880127, |
|
"learning_rate": 1.592967244701349e-05, |
|
"loss": 1.933, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.20469596628537026, |
|
"grad_norm": 4.739987850189209, |
|
"learning_rate": 1.590558766859345e-05, |
|
"loss": 1.9182, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20590006020469598, |
|
"grad_norm": 3.1810977458953857, |
|
"learning_rate": 1.5881502890173412e-05, |
|
"loss": 1.984, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.20710415412402167, |
|
"grad_norm": 3.174739360809326, |
|
"learning_rate": 1.5857418111753374e-05, |
|
"loss": 1.7719, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20830824804334738, |
|
"grad_norm": 3.379767656326294, |
|
"learning_rate": 1.5833333333333333e-05, |
|
"loss": 1.9481, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.2095123419626731, |
|
"grad_norm": 3.3487260341644287, |
|
"learning_rate": 1.5809248554913295e-05, |
|
"loss": 1.9416, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2107164358819988, |
|
"grad_norm": 3.4879958629608154, |
|
"learning_rate": 1.5785163776493258e-05, |
|
"loss": 2.0463, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.2119205298013245, |
|
"grad_norm": 3.2338194847106934, |
|
"learning_rate": 1.5761078998073217e-05, |
|
"loss": 1.9441, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21312462372065022, |
|
"grad_norm": 3.122405529022217, |
|
"learning_rate": 1.5736994219653182e-05, |
|
"loss": 2.0644, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.2143287176399759, |
|
"grad_norm": 3.0773510932922363, |
|
"learning_rate": 1.571290944123314e-05, |
|
"loss": 1.9415, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21553281155930162, |
|
"grad_norm": 3.004040241241455, |
|
"learning_rate": 1.5688824662813104e-05, |
|
"loss": 1.9305, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.21673690547862734, |
|
"grad_norm": 3.547109603881836, |
|
"learning_rate": 1.5664739884393066e-05, |
|
"loss": 2.1088, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21794099939795303, |
|
"grad_norm": 3.1982204914093018, |
|
"learning_rate": 1.5640655105973025e-05, |
|
"loss": 1.8667, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.21914509331727874, |
|
"grad_norm": 3.381781578063965, |
|
"learning_rate": 1.5616570327552987e-05, |
|
"loss": 1.9783, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22034918723660446, |
|
"grad_norm": 2.9775896072387695, |
|
"learning_rate": 1.559248554913295e-05, |
|
"loss": 2.0211, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.22155328115593018, |
|
"grad_norm": 2.864551067352295, |
|
"learning_rate": 1.5568400770712912e-05, |
|
"loss": 1.8579, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22275737507525586, |
|
"grad_norm": 3.0532050132751465, |
|
"learning_rate": 1.5544315992292874e-05, |
|
"loss": 1.9398, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.22396146899458158, |
|
"grad_norm": 2.859631061553955, |
|
"learning_rate": 1.5520231213872833e-05, |
|
"loss": 1.8625, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2251655629139073, |
|
"grad_norm": 3.1373536586761475, |
|
"learning_rate": 1.5496146435452796e-05, |
|
"loss": 1.9003, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.22636965683323299, |
|
"grad_norm": 3.3248465061187744, |
|
"learning_rate": 1.5472061657032758e-05, |
|
"loss": 2.0517, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2275737507525587, |
|
"grad_norm": 3.5990936756134033, |
|
"learning_rate": 1.5447976878612717e-05, |
|
"loss": 1.8975, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.22877784467188442, |
|
"grad_norm": 3.4047725200653076, |
|
"learning_rate": 1.542389210019268e-05, |
|
"loss": 1.9786, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2299819385912101, |
|
"grad_norm": 3.3326022624969482, |
|
"learning_rate": 1.539980732177264e-05, |
|
"loss": 1.7848, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.23118603251053582, |
|
"grad_norm": 3.2942848205566406, |
|
"learning_rate": 1.5375722543352604e-05, |
|
"loss": 1.8549, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23239012642986154, |
|
"grad_norm": 3.4602601528167725, |
|
"learning_rate": 1.5351637764932563e-05, |
|
"loss": 1.917, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.23359422034918723, |
|
"grad_norm": 3.591327428817749, |
|
"learning_rate": 1.5327552986512525e-05, |
|
"loss": 1.9457, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23479831426851294, |
|
"grad_norm": 3.215808868408203, |
|
"learning_rate": 1.5303468208092487e-05, |
|
"loss": 1.9261, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.23600240818783866, |
|
"grad_norm": 3.5032927989959717, |
|
"learning_rate": 1.5279383429672446e-05, |
|
"loss": 1.967, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.23720650210716435, |
|
"grad_norm": 3.1476144790649414, |
|
"learning_rate": 1.525529865125241e-05, |
|
"loss": 2.0448, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.23841059602649006, |
|
"grad_norm": 3.046126365661621, |
|
"learning_rate": 1.5231213872832371e-05, |
|
"loss": 1.9086, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.23961468994581578, |
|
"grad_norm": 3.0403099060058594, |
|
"learning_rate": 1.5207129094412332e-05, |
|
"loss": 1.9671, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.2408187838651415, |
|
"grad_norm": 3.524573802947998, |
|
"learning_rate": 1.5183044315992294e-05, |
|
"loss": 1.9532, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2408187838651415, |
|
"eval_loss": 1.9571956396102905, |
|
"eval_runtime": 16.4441, |
|
"eval_samples_per_second": 6.081, |
|
"eval_steps_per_second": 0.791, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4152, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1054167504365664.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|