|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.001626016260163, |
|
"eval_steps": 500, |
|
"global_step": 3076, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016260162601626018, |
|
"grad_norm": 11.395153045654297, |
|
"learning_rate": 1.2987012987012986e-05, |
|
"loss": 1.6854, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 3.7914836406707764, |
|
"learning_rate": 2.5974025974025972e-05, |
|
"loss": 0.7918, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 2.904379367828369, |
|
"learning_rate": 3.8961038961038966e-05, |
|
"loss": 0.3595, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 1.9180514812469482, |
|
"learning_rate": 5.1948051948051944e-05, |
|
"loss": 0.2492, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 1.5458292961120605, |
|
"learning_rate": 6.493506493506494e-05, |
|
"loss": 0.192, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 2.6975796222686768, |
|
"learning_rate": 7.792207792207793e-05, |
|
"loss": 0.1904, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11382113821138211, |
|
"grad_norm": 1.1650338172912598, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.1408, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 1.7408723831176758, |
|
"learning_rate": 0.00010389610389610389, |
|
"loss": 0.142, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 1.0142450332641602, |
|
"learning_rate": 0.00011688311688311689, |
|
"loss": 0.1426, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 1.9548156261444092, |
|
"learning_rate": 0.00012987012987012987, |
|
"loss": 0.1518, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17886178861788618, |
|
"grad_norm": 1.2884893417358398, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.1255, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.9044198393821716, |
|
"learning_rate": 0.00015584415584415587, |
|
"loss": 0.1339, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21138211382113822, |
|
"grad_norm": 0.9501888155937195, |
|
"learning_rate": 0.00016883116883116884, |
|
"loss": 0.1031, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22764227642276422, |
|
"grad_norm": 1.059137225151062, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 0.1248, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 1.6067619323730469, |
|
"learning_rate": 0.0001948051948051948, |
|
"loss": 0.1125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2601626016260163, |
|
"grad_norm": 0.800540030002594, |
|
"learning_rate": 0.00019999791929590824, |
|
"loss": 0.1226, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2764227642276423, |
|
"grad_norm": 0.8457431793212891, |
|
"learning_rate": 0.00019998520419557735, |
|
"loss": 0.117, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.8381863236427307, |
|
"learning_rate": 0.00019996093140964042, |
|
"loss": 0.097, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3089430894308943, |
|
"grad_norm": 0.6342546939849854, |
|
"learning_rate": 0.00019992510374388481, |
|
"loss": 0.1145, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 1.3920717239379883, |
|
"learning_rate": 0.0001998777253397723, |
|
"loss": 0.1136, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 0.6389938592910767, |
|
"learning_rate": 0.0001998188016739602, |
|
"loss": 0.1014, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35772357723577236, |
|
"grad_norm": 1.111118197441101, |
|
"learning_rate": 0.0001997483395576683, |
|
"loss": 0.1186, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37398373983739835, |
|
"grad_norm": 0.8862912058830261, |
|
"learning_rate": 0.00019966634713589157, |
|
"loss": 0.1037, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.47783714532852173, |
|
"learning_rate": 0.00019957283388645874, |
|
"loss": 0.0986, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 0.7350404858589172, |
|
"learning_rate": 0.00019946781061893647, |
|
"loss": 0.0938, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42276422764227645, |
|
"grad_norm": 1.0116444826126099, |
|
"learning_rate": 0.0001993512894733801, |
|
"loss": 0.1017, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.6825156211853027, |
|
"learning_rate": 0.0001992232839189301, |
|
"loss": 0.0956, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45528455284552843, |
|
"grad_norm": 1.1011830568313599, |
|
"learning_rate": 0.00019908380875225534, |
|
"loss": 0.0993, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4715447154471545, |
|
"grad_norm": 0.560863196849823, |
|
"learning_rate": 0.00019893288009584256, |
|
"loss": 0.0784, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.8265031576156616, |
|
"learning_rate": 0.00019877051539613264, |
|
"loss": 0.0931, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5040650406504065, |
|
"grad_norm": 0.5396478772163391, |
|
"learning_rate": 0.0001985967334215041, |
|
"loss": 0.0804, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5203252032520326, |
|
"grad_norm": 0.5586859583854675, |
|
"learning_rate": 0.0001984115542601034, |
|
"loss": 0.0889, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 0.9515529870986938, |
|
"learning_rate": 0.0001982149993175229, |
|
"loss": 0.0915, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5528455284552846, |
|
"grad_norm": 0.9865338206291199, |
|
"learning_rate": 0.00019800709131432667, |
|
"loss": 0.0765, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5691056910569106, |
|
"grad_norm": 0.46598589420318604, |
|
"learning_rate": 0.00019778785428342386, |
|
"loss": 0.0844, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 1.2223691940307617, |
|
"learning_rate": 0.00019755731356729093, |
|
"loss": 0.0797, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6016260162601627, |
|
"grad_norm": 0.568372905254364, |
|
"learning_rate": 0.00019731549581504193, |
|
"loss": 0.0713, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6178861788617886, |
|
"grad_norm": 0.9440287947654724, |
|
"learning_rate": 0.00019706242897934824, |
|
"loss": 0.0859, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.9875515699386597, |
|
"learning_rate": 0.00019679814231320734, |
|
"loss": 0.0959, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 1.18289315700531, |
|
"learning_rate": 0.00019652266636656132, |
|
"loss": 0.0794, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.8284242749214172, |
|
"learning_rate": 0.00019623603298276544, |
|
"loss": 0.0807, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.5042060017585754, |
|
"learning_rate": 0.00019593827529490741, |
|
"loss": 0.0798, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6991869918699187, |
|
"grad_norm": 0.4532330632209778, |
|
"learning_rate": 0.00019562942772197723, |
|
"loss": 0.0704, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7154471544715447, |
|
"grad_norm": 0.6993445754051208, |
|
"learning_rate": 0.00019530952596488857, |
|
"loss": 0.0829, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.7966378927230835, |
|
"learning_rate": 0.00019497860700235206, |
|
"loss": 0.0716, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7479674796747967, |
|
"grad_norm": 0.6459921002388, |
|
"learning_rate": 0.00019463670908660075, |
|
"loss": 0.0695, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7642276422764228, |
|
"grad_norm": 0.53463214635849, |
|
"learning_rate": 0.00019428387173896837, |
|
"loss": 0.071, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.5773355960845947, |
|
"learning_rate": 0.0001939201357453208, |
|
"loss": 0.0619, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7967479674796748, |
|
"grad_norm": 0.5577520728111267, |
|
"learning_rate": 0.00019354554315134166, |
|
"loss": 0.0643, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 0.6475623846054077, |
|
"learning_rate": 0.0001931601372576719, |
|
"loss": 0.06, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.6802220344543457, |
|
"learning_rate": 0.00019276396261490465, |
|
"loss": 0.0617, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8455284552845529, |
|
"grad_norm": 0.41168370842933655, |
|
"learning_rate": 0.0001923570650184354, |
|
"loss": 0.0529, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8617886178861789, |
|
"grad_norm": 0.5927830338478088, |
|
"learning_rate": 0.00019193949150316826, |
|
"loss": 0.0747, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.45662933588027954, |
|
"learning_rate": 0.00019151129033807908, |
|
"loss": 0.0644, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8943089430894309, |
|
"grad_norm": 0.5423726439476013, |
|
"learning_rate": 0.0001910725110206358, |
|
"loss": 0.0541, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9105691056910569, |
|
"grad_norm": 0.4179861843585968, |
|
"learning_rate": 0.00019062320427107698, |
|
"loss": 0.0558, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.9079977869987488, |
|
"learning_rate": 0.0001901634220265486, |
|
"loss": 0.0749, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.943089430894309, |
|
"grad_norm": 0.7620092034339905, |
|
"learning_rate": 0.00018969321743510066, |
|
"loss": 0.0692, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.959349593495935, |
|
"grad_norm": 0.6436122059822083, |
|
"learning_rate": 0.00018921264484954344, |
|
"loss": 0.0571, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 1.0210789442062378, |
|
"learning_rate": 0.00018872175982116482, |
|
"loss": 0.0646, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.991869918699187, |
|
"grad_norm": 0.7681993246078491, |
|
"learning_rate": 0.00018822061909330864, |
|
"loss": 0.0716, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.008130081300813, |
|
"grad_norm": 0.6054890751838684, |
|
"learning_rate": 0.00018770928059481574, |
|
"loss": 0.0656, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 0.5673167705535889, |
|
"learning_rate": 0.00018718780343332755, |
|
"loss": 0.059, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.040650406504065, |
|
"grad_norm": 0.5374870896339417, |
|
"learning_rate": 0.00018665624788845387, |
|
"loss": 0.056, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.056910569105691, |
|
"grad_norm": 0.544373095035553, |
|
"learning_rate": 0.0001861146754048045, |
|
"loss": 0.0646, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 0.4903548061847687, |
|
"learning_rate": 0.00018556314858488707, |
|
"loss": 0.0618, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.089430894308943, |
|
"grad_norm": 0.4946286082267761, |
|
"learning_rate": 0.0001850017311818702, |
|
"loss": 0.0484, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1056910569105691, |
|
"grad_norm": 0.46937233209609985, |
|
"learning_rate": 0.00018443048809221424, |
|
"loss": 0.0651, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 0.6026718020439148, |
|
"learning_rate": 0.0001838494853481695, |
|
"loss": 0.0469, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1382113821138211, |
|
"grad_norm": 0.5334330201148987, |
|
"learning_rate": 0.00018325879011014352, |
|
"loss": 0.0527, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1544715447154472, |
|
"grad_norm": 0.6381818652153015, |
|
"learning_rate": 0.00018265847065893737, |
|
"loss": 0.0621, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 0.9406611919403076, |
|
"learning_rate": 0.00018204859638785328, |
|
"loss": 0.055, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1869918699186992, |
|
"grad_norm": 0.4635262191295624, |
|
"learning_rate": 0.0001814292377946727, |
|
"loss": 0.0594, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.203252032520325, |
|
"grad_norm": 0.5755775570869446, |
|
"learning_rate": 0.00018080046647350756, |
|
"loss": 0.0497, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 0.4675120413303375, |
|
"learning_rate": 0.00018016235510652425, |
|
"loss": 0.0498, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2357723577235773, |
|
"grad_norm": 0.6721792817115784, |
|
"learning_rate": 0.0001795149774555421, |
|
"loss": 0.0562, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2520325203252032, |
|
"grad_norm": 0.416792631149292, |
|
"learning_rate": 0.00017885840835350674, |
|
"loss": 0.051, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 0.2711058259010315, |
|
"learning_rate": 0.00017819272369584016, |
|
"loss": 0.0549, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2845528455284554, |
|
"grad_norm": 0.6742275357246399, |
|
"learning_rate": 0.00017751800043166744, |
|
"loss": 0.0456, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3008130081300813, |
|
"grad_norm": 0.43051138520240784, |
|
"learning_rate": 0.000176834316554922, |
|
"loss": 0.0488, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 0.7541930079460144, |
|
"learning_rate": 0.00017614175109532997, |
|
"loss": 0.0531, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.46846646070480347, |
|
"learning_rate": 0.00017544038410927476, |
|
"loss": 0.0632, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3495934959349594, |
|
"grad_norm": 0.5363613963127136, |
|
"learning_rate": 0.00017473029667054298, |
|
"loss": 0.0546, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 0.5677704811096191, |
|
"learning_rate": 0.00017401157086095317, |
|
"loss": 0.0536, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3821138211382114, |
|
"grad_norm": 0.4499371647834778, |
|
"learning_rate": 0.00017328428976086702, |
|
"loss": 0.0549, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3983739837398375, |
|
"grad_norm": 0.42111602425575256, |
|
"learning_rate": 0.00017254853743958642, |
|
"loss": 0.0576, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 0.602150022983551, |
|
"learning_rate": 0.00017180439894563497, |
|
"loss": 0.0522, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4308943089430894, |
|
"grad_norm": 0.5597222447395325, |
|
"learning_rate": 0.00017105196029692743, |
|
"loss": 0.0526, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4471544715447155, |
|
"grad_norm": 0.3994801342487335, |
|
"learning_rate": 0.00017029130847082615, |
|
"loss": 0.0451, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.36083677411079407, |
|
"learning_rate": 0.00016952253139408723, |
|
"loss": 0.0467, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4796747967479675, |
|
"grad_norm": 0.4908004105091095, |
|
"learning_rate": 0.00016874571793269665, |
|
"loss": 0.0519, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4959349593495934, |
|
"grad_norm": 0.3880288600921631, |
|
"learning_rate": 0.0001679609578815979, |
|
"loss": 0.0603, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 0.30312085151672363, |
|
"learning_rate": 0.00016716834195431223, |
|
"loss": 0.0651, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5284552845528454, |
|
"grad_norm": 0.5956284999847412, |
|
"learning_rate": 0.00016636796177245278, |
|
"loss": 0.0431, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5447154471544715, |
|
"grad_norm": 0.4647449553012848, |
|
"learning_rate": 0.0001655599098551335, |
|
"loss": 0.0455, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 0.3006104528903961, |
|
"learning_rate": 0.00016474427960827472, |
|
"loss": 0.0575, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5772357723577235, |
|
"grad_norm": 0.37439459562301636, |
|
"learning_rate": 0.0001639211653138059, |
|
"loss": 0.05, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5934959349593496, |
|
"grad_norm": 0.40915167331695557, |
|
"learning_rate": 0.00016309066211876708, |
|
"loss": 0.0439, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 0.4699673652648926, |
|
"learning_rate": 0.00016225286602431063, |
|
"loss": 0.0475, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 0.5752647519111633, |
|
"learning_rate": 0.00016140787387460405, |
|
"loss": 0.0501, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6422764227642277, |
|
"grad_norm": 0.294344037771225, |
|
"learning_rate": 0.0001605557833456354, |
|
"loss": 0.0659, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 0.4021753668785095, |
|
"learning_rate": 0.0001596966929339224, |
|
"loss": 0.0401, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6747967479674797, |
|
"grad_norm": 0.5346744656562805, |
|
"learning_rate": 0.00015883070194512694, |
|
"loss": 0.0428, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6910569105691056, |
|
"grad_norm": 0.5238606929779053, |
|
"learning_rate": 0.0001579579104825761, |
|
"loss": 0.0402, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 0.4844679534435272, |
|
"learning_rate": 0.00015707841943569037, |
|
"loss": 0.0445, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7235772357723578, |
|
"grad_norm": 0.603135347366333, |
|
"learning_rate": 0.000156192330468322, |
|
"loss": 0.0544, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.7398373983739837, |
|
"grad_norm": 0.40627196431159973, |
|
"learning_rate": 0.00015529974600700275, |
|
"loss": 0.0461, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 0.5749149918556213, |
|
"learning_rate": 0.0001544007692291044, |
|
"loss": 0.0434, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7723577235772359, |
|
"grad_norm": 0.39188066124916077, |
|
"learning_rate": 0.0001534955040509119, |
|
"loss": 0.0414, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7886178861788617, |
|
"grad_norm": 0.22225162386894226, |
|
"learning_rate": 0.00015258405511561115, |
|
"loss": 0.0363, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 0.394105464220047, |
|
"learning_rate": 0.0001516665277811932, |
|
"loss": 0.0407, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.821138211382114, |
|
"grad_norm": 0.2789241075515747, |
|
"learning_rate": 0.00015074302810827515, |
|
"loss": 0.035, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8373983739837398, |
|
"grad_norm": 0.5585935115814209, |
|
"learning_rate": 0.00014981366284784058, |
|
"loss": 0.0433, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 0.7364455461502075, |
|
"learning_rate": 0.00014887853942889927, |
|
"loss": 0.0371, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8699186991869918, |
|
"grad_norm": 0.46286317706108093, |
|
"learning_rate": 0.0001479377659460695, |
|
"loss": 0.0492, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.886178861788618, |
|
"grad_norm": 0.470198392868042, |
|
"learning_rate": 0.00014699145114708287, |
|
"loss": 0.038, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 0.5939562320709229, |
|
"learning_rate": 0.00014603970442021347, |
|
"loss": 0.0507, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.91869918699187, |
|
"grad_norm": 0.4777078926563263, |
|
"learning_rate": 0.00014508263578163358, |
|
"loss": 0.0443, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.934959349593496, |
|
"grad_norm": 0.4176855981349945, |
|
"learning_rate": 0.00014412035586269624, |
|
"loss": 0.0463, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.493206262588501, |
|
"learning_rate": 0.0001431529758971471, |
|
"loss": 0.0557, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.967479674796748, |
|
"grad_norm": 0.44188496470451355, |
|
"learning_rate": 0.00014218060770826636, |
|
"loss": 0.0561, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9837398373983741, |
|
"grad_norm": 0.5256350636482239, |
|
"learning_rate": 0.00014120336369594265, |
|
"loss": 0.0504, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.2594618499279022, |
|
"learning_rate": 0.0001402213568236804, |
|
"loss": 0.0403, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.016260162601626, |
|
"grad_norm": 0.36164024472236633, |
|
"learning_rate": 0.00013923470060554186, |
|
"loss": 0.047, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 0.44333750009536743, |
|
"learning_rate": 0.00013824350909302552, |
|
"loss": 0.0368, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 0.6669727563858032, |
|
"learning_rate": 0.00013724789686188267, |
|
"loss": 0.0305, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.065040650406504, |
|
"grad_norm": 0.32740581035614014, |
|
"learning_rate": 0.0001362479789988727, |
|
"loss": 0.0347, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.08130081300813, |
|
"grad_norm": 0.5777595043182373, |
|
"learning_rate": 0.00013524387108846032, |
|
"loss": 0.0476, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.097560975609756, |
|
"grad_norm": 0.30891892313957214, |
|
"learning_rate": 0.0001342356891994542, |
|
"loss": 0.0437, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.113821138211382, |
|
"grad_norm": 0.4573158919811249, |
|
"learning_rate": 0.00013322354987159045, |
|
"loss": 0.0421, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.130081300813008, |
|
"grad_norm": 0.42825648188591003, |
|
"learning_rate": 0.00013220757010206112, |
|
"loss": 0.0405, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 0.26400211453437805, |
|
"learning_rate": 0.00013118786733199014, |
|
"loss": 0.0385, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.16260162601626, |
|
"grad_norm": 0.42198774218559265, |
|
"learning_rate": 0.00013016455943285786, |
|
"loss": 0.0464, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.178861788617886, |
|
"grad_norm": 0.41942107677459717, |
|
"learning_rate": 0.0001291377646928757, |
|
"loss": 0.0514, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 0.2822849154472351, |
|
"learning_rate": 0.00012810760180331288, |
|
"loss": 0.0365, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.2113821138211383, |
|
"grad_norm": 0.41634827852249146, |
|
"learning_rate": 0.00012707418984477637, |
|
"loss": 0.0497, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.227642276422764, |
|
"grad_norm": 0.3224164545536041, |
|
"learning_rate": 0.0001260376482734458, |
|
"loss": 0.0379, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 0.34206849336624146, |
|
"learning_rate": 0.00012499809690726522, |
|
"loss": 0.0338, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.2601626016260163, |
|
"grad_norm": 0.364622563123703, |
|
"learning_rate": 0.00012395565591209273, |
|
"loss": 0.0422, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.2764227642276422, |
|
"grad_norm": 0.28372102975845337, |
|
"learning_rate": 0.00012291044578781015, |
|
"loss": 0.0369, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.292682926829268, |
|
"grad_norm": 0.6621326804161072, |
|
"learning_rate": 0.00012186258735439379, |
|
"loss": 0.0547, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.3089430894308944, |
|
"grad_norm": 0.3451598286628723, |
|
"learning_rate": 0.00012081220173794863, |
|
"loss": 0.0411, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3252032520325203, |
|
"grad_norm": 0.36769524216651917, |
|
"learning_rate": 0.00011975941035670664, |
|
"loss": 0.0428, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 0.38125452399253845, |
|
"learning_rate": 0.00011870433490699166, |
|
"loss": 0.0363, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3577235772357725, |
|
"grad_norm": 0.44016361236572266, |
|
"learning_rate": 0.00011764709734915218, |
|
"loss": 0.038, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3739837398373984, |
|
"grad_norm": 0.2804912328720093, |
|
"learning_rate": 0.0001165878198934632, |
|
"loss": 0.0385, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 0.31978878378868103, |
|
"learning_rate": 0.00011552662498599958, |
|
"loss": 0.0269, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.40650406504065, |
|
"grad_norm": 0.2746686339378357, |
|
"learning_rate": 0.00011446363529448209, |
|
"loss": 0.0365, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.4227642276422765, |
|
"grad_norm": 0.35929563641548157, |
|
"learning_rate": 0.00011339897369409774, |
|
"loss": 0.0399, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 0.25356271862983704, |
|
"learning_rate": 0.00011233276325329596, |
|
"loss": 0.0381, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4552845528455283, |
|
"grad_norm": 0.27866753935813904, |
|
"learning_rate": 0.00011126512721956289, |
|
"loss": 0.0273, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4715447154471546, |
|
"grad_norm": 0.3891955018043518, |
|
"learning_rate": 0.00011019618900517455, |
|
"loss": 0.0367, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 0.26467326283454895, |
|
"learning_rate": 0.0001091260721729312, |
|
"loss": 0.0311, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.5040650406504064, |
|
"grad_norm": 0.21822647750377655, |
|
"learning_rate": 0.0001080549004218742, |
|
"loss": 0.0296, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5203252032520327, |
|
"grad_norm": 0.3661019206047058, |
|
"learning_rate": 0.00010698279757298715, |
|
"loss": 0.0395, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 0.15822070837020874, |
|
"learning_rate": 0.00010590988755488292, |
|
"loss": 0.0265, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5528455284552845, |
|
"grad_norm": 0.25284048914909363, |
|
"learning_rate": 0.00010483629438947826, |
|
"loss": 0.0307, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.569105691056911, |
|
"grad_norm": 0.3222500681877136, |
|
"learning_rate": 0.00010376214217765772, |
|
"loss": 0.0361, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 0.3294614255428314, |
|
"learning_rate": 0.00010268755508492804, |
|
"loss": 0.0354, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.6016260162601625, |
|
"grad_norm": 0.4046204388141632, |
|
"learning_rate": 0.0001016126573270658, |
|
"loss": 0.0297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.617886178861789, |
|
"grad_norm": 0.37592411041259766, |
|
"learning_rate": 0.0001005375731557584, |
|
"loss": 0.0326, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 0.4793863594532013, |
|
"learning_rate": 9.946242684424162e-05, |
|
"loss": 0.0315, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.6504065040650406, |
|
"grad_norm": 0.31578221917152405, |
|
"learning_rate": 9.838734267293421e-05, |
|
"loss": 0.0316, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.4366059899330139, |
|
"learning_rate": 9.731244491507197e-05, |
|
"loss": 0.0286, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 0.3444521725177765, |
|
"learning_rate": 9.623785782234234e-05, |
|
"loss": 0.0278, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.6991869918699187, |
|
"grad_norm": 0.5460310578346252, |
|
"learning_rate": 9.516370561052174e-05, |
|
"loss": 0.0444, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.7154471544715446, |
|
"grad_norm": 0.360115110874176, |
|
"learning_rate": 9.409011244511712e-05, |
|
"loss": 0.0288, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 0.3582008183002472, |
|
"learning_rate": 9.30172024270129e-05, |
|
"loss": 0.0311, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.747967479674797, |
|
"grad_norm": 0.4152023494243622, |
|
"learning_rate": 9.194509957812582e-05, |
|
"loss": 0.0463, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7642276422764227, |
|
"grad_norm": 0.2088373750448227, |
|
"learning_rate": 9.087392782706883e-05, |
|
"loss": 0.0331, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 0.5220609307289124, |
|
"learning_rate": 8.980381099482546e-05, |
|
"loss": 0.0311, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.796747967479675, |
|
"grad_norm": 0.49769702553749084, |
|
"learning_rate": 8.873487278043712e-05, |
|
"loss": 0.0336, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.813008130081301, |
|
"grad_norm": 0.3503866195678711, |
|
"learning_rate": 8.766723674670407e-05, |
|
"loss": 0.0272, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 0.24078375101089478, |
|
"learning_rate": 8.660102630590227e-05, |
|
"loss": 0.0365, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"grad_norm": 0.27272090315818787, |
|
"learning_rate": 8.553636470551792e-05, |
|
"loss": 0.0252, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.861788617886179, |
|
"grad_norm": 0.25519323348999023, |
|
"learning_rate": 8.447337501400047e-05, |
|
"loss": 0.0286, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.8780487804878048, |
|
"grad_norm": 0.27832046151161194, |
|
"learning_rate": 8.341218010653684e-05, |
|
"loss": 0.0306, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.894308943089431, |
|
"grad_norm": 0.3626549243927002, |
|
"learning_rate": 8.235290265084785e-05, |
|
"loss": 0.0266, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.910569105691057, |
|
"grad_norm": 0.2994539737701416, |
|
"learning_rate": 8.129566509300835e-05, |
|
"loss": 0.0338, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 0.32438531517982483, |
|
"learning_rate": 8.02405896432934e-05, |
|
"loss": 0.0308, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.943089430894309, |
|
"grad_norm": 0.4146476686000824, |
|
"learning_rate": 7.91877982620514e-05, |
|
"loss": 0.0258, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.959349593495935, |
|
"grad_norm": 0.3459465503692627, |
|
"learning_rate": 7.813741264560622e-05, |
|
"loss": 0.025, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 0.3172384202480316, |
|
"learning_rate": 7.708955421218986e-05, |
|
"loss": 0.0327, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.991869918699187, |
|
"grad_norm": 0.5268030166625977, |
|
"learning_rate": 7.604434408790729e-05, |
|
"loss": 0.033, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.008130081300813, |
|
"grad_norm": 0.2980897128582001, |
|
"learning_rate": 7.50019030927348e-05, |
|
"loss": 0.0307, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.024390243902439, |
|
"grad_norm": 0.3045070767402649, |
|
"learning_rate": 7.396235172655422e-05, |
|
"loss": 0.025, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.040650406504065, |
|
"grad_norm": 0.2718939185142517, |
|
"learning_rate": 7.292581015522363e-05, |
|
"loss": 0.0229, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.0569105691056913, |
|
"grad_norm": 0.5520108938217163, |
|
"learning_rate": 7.189239819668711e-05, |
|
"loss": 0.0305, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.073170731707317, |
|
"grad_norm": 0.2900155186653137, |
|
"learning_rate": 7.086223530712433e-05, |
|
"loss": 0.0301, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.089430894308943, |
|
"grad_norm": 0.31466805934906006, |
|
"learning_rate": 6.983544056714215e-05, |
|
"loss": 0.0343, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.105691056910569, |
|
"grad_norm": 0.4351949989795685, |
|
"learning_rate": 6.881213266800988e-05, |
|
"loss": 0.0287, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.1219512195121952, |
|
"grad_norm": 0.29242777824401855, |
|
"learning_rate": 6.779242989793892e-05, |
|
"loss": 0.0214, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.138211382113821, |
|
"grad_norm": 0.27103492617607117, |
|
"learning_rate": 6.677645012840956e-05, |
|
"loss": 0.0262, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.154471544715447, |
|
"grad_norm": 0.46795374155044556, |
|
"learning_rate": 6.576431080054581e-05, |
|
"loss": 0.0264, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 0.2594567537307739, |
|
"learning_rate": 6.475612891153969e-05, |
|
"loss": 0.0318, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.186991869918699, |
|
"grad_norm": 0.2888941168785095, |
|
"learning_rate": 6.375202100112729e-05, |
|
"loss": 0.0318, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.203252032520325, |
|
"grad_norm": 0.3916606307029724, |
|
"learning_rate": 6.275210313811739e-05, |
|
"loss": 0.0293, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.2195121951219514, |
|
"grad_norm": 0.22073742747306824, |
|
"learning_rate": 6.175649090697448e-05, |
|
"loss": 0.0321, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.2357723577235773, |
|
"grad_norm": 0.3153645694255829, |
|
"learning_rate": 6.0765299394458185e-05, |
|
"loss": 0.0265, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"grad_norm": 0.5098276138305664, |
|
"learning_rate": 5.977864317631965e-05, |
|
"loss": 0.0255, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2682926829268295, |
|
"grad_norm": 0.2877456843852997, |
|
"learning_rate": 5.879663630405736e-05, |
|
"loss": 0.0296, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.2845528455284554, |
|
"grad_norm": 0.1817747801542282, |
|
"learning_rate": 5.781939229173365e-05, |
|
"loss": 0.0225, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.3008130081300813, |
|
"grad_norm": 0.31750866770744324, |
|
"learning_rate": 5.684702410285292e-05, |
|
"loss": 0.0237, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.317073170731707, |
|
"grad_norm": 0.2775154411792755, |
|
"learning_rate": 5.5879644137303797e-05, |
|
"loss": 0.0241, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.30817779898643494, |
|
"learning_rate": 5.491736421836646e-05, |
|
"loss": 0.0217, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.3495934959349594, |
|
"grad_norm": 0.2524275481700897, |
|
"learning_rate": 5.396029557978657e-05, |
|
"loss": 0.0278, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.3658536585365852, |
|
"grad_norm": 0.21641696989536285, |
|
"learning_rate": 5.3008548852917194e-05, |
|
"loss": 0.0266, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.3821138211382116, |
|
"grad_norm": 0.32717615365982056, |
|
"learning_rate": 5.2062234053930504e-05, |
|
"loss": 0.027, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.3983739837398375, |
|
"grad_norm": 0.41103601455688477, |
|
"learning_rate": 5.1121460571100776e-05, |
|
"loss": 0.0186, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 0.14661918580532074, |
|
"learning_rate": 5.018633715215948e-05, |
|
"loss": 0.0211, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.430894308943089, |
|
"grad_norm": 0.22353650629520416, |
|
"learning_rate": 4.9256971891724845e-05, |
|
"loss": 0.0253, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.4471544715447155, |
|
"grad_norm": 0.3574526906013489, |
|
"learning_rate": 4.833347221880685e-05, |
|
"loss": 0.0308, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.4634146341463414, |
|
"grad_norm": 0.24301108717918396, |
|
"learning_rate": 4.7415944884388904e-05, |
|
"loss": 0.0295, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.4796747967479673, |
|
"grad_norm": 0.28461310267448425, |
|
"learning_rate": 4.6504495949088154e-05, |
|
"loss": 0.024, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.4959349593495936, |
|
"grad_norm": 0.2199145406484604, |
|
"learning_rate": 4.559923077089564e-05, |
|
"loss": 0.0188, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.5121951219512195, |
|
"grad_norm": 0.24626000225543976, |
|
"learning_rate": 4.470025399299728e-05, |
|
"loss": 0.0295, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.5284552845528454, |
|
"grad_norm": 0.1908273845911026, |
|
"learning_rate": 4.3807669531678054e-05, |
|
"loss": 0.0172, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.5447154471544717, |
|
"grad_norm": 0.36190247535705566, |
|
"learning_rate": 4.292158056430966e-05, |
|
"loss": 0.0156, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.5609756097560976, |
|
"grad_norm": 0.33335763216018677, |
|
"learning_rate": 4.204208951742393e-05, |
|
"loss": 0.0187, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.5772357723577235, |
|
"grad_norm": 0.2627616226673126, |
|
"learning_rate": 4.116929805487307e-05, |
|
"loss": 0.022, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.59349593495935, |
|
"grad_norm": 0.33589500188827515, |
|
"learning_rate": 4.0303307066077653e-05, |
|
"loss": 0.0251, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.6097560975609757, |
|
"grad_norm": 0.2956457734107971, |
|
"learning_rate": 3.944421665436462e-05, |
|
"loss": 0.0214, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.6260162601626016, |
|
"grad_norm": 0.1858706772327423, |
|
"learning_rate": 3.859212612539597e-05, |
|
"loss": 0.0269, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.642276422764228, |
|
"grad_norm": 0.21992811560630798, |
|
"learning_rate": 3.7747133975689385e-05, |
|
"loss": 0.0231, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 0.2564171850681305, |
|
"learning_rate": 3.690933788123296e-05, |
|
"loss": 0.0224, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.6747967479674797, |
|
"grad_norm": 0.2475014477968216, |
|
"learning_rate": 3.607883468619414e-05, |
|
"loss": 0.0279, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.6910569105691056, |
|
"grad_norm": 0.3992771506309509, |
|
"learning_rate": 3.525572039172528e-05, |
|
"loss": 0.0294, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.7073170731707314, |
|
"grad_norm": 0.2070523500442505, |
|
"learning_rate": 3.4440090144866536e-05, |
|
"loss": 0.0255, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.7235772357723578, |
|
"grad_norm": 0.3437440097332001, |
|
"learning_rate": 3.363203822754728e-05, |
|
"loss": 0.0238, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.7398373983739837, |
|
"grad_norm": 0.2936142385005951, |
|
"learning_rate": 3.283165804568778e-05, |
|
"loss": 0.0222, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.7560975609756095, |
|
"grad_norm": 0.41532281041145325, |
|
"learning_rate": 3.203904211840213e-05, |
|
"loss": 0.0194, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.772357723577236, |
|
"grad_norm": 0.20870675146579742, |
|
"learning_rate": 3.1254282067303355e-05, |
|
"loss": 0.0226, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.7886178861788617, |
|
"grad_norm": 0.22978569567203522, |
|
"learning_rate": 3.0477468605912784e-05, |
|
"loss": 0.0245, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.8048780487804876, |
|
"grad_norm": 0.15536998212337494, |
|
"learning_rate": 2.970869152917387e-05, |
|
"loss": 0.0183, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.821138211382114, |
|
"grad_norm": 0.2645808458328247, |
|
"learning_rate": 2.894803970307257e-05, |
|
"loss": 0.019, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.83739837398374, |
|
"grad_norm": 0.17497260868549347, |
|
"learning_rate": 2.819560105436504e-05, |
|
"loss": 0.019, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.8536585365853657, |
|
"grad_norm": 0.3548007905483246, |
|
"learning_rate": 2.7451462560413633e-05, |
|
"loss": 0.0233, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.869918699186992, |
|
"grad_norm": 0.1929531693458557, |
|
"learning_rate": 2.6715710239132973e-05, |
|
"loss": 0.0196, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.886178861788618, |
|
"grad_norm": 0.2864411175251007, |
|
"learning_rate": 2.5988429139046865e-05, |
|
"loss": 0.025, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 0.11372525990009308, |
|
"learning_rate": 2.5269703329456996e-05, |
|
"loss": 0.0163, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.91869918699187, |
|
"grad_norm": 0.23025590181350708, |
|
"learning_rate": 2.4559615890725284e-05, |
|
"loss": 0.0199, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.934959349593496, |
|
"grad_norm": 0.14693354070186615, |
|
"learning_rate": 2.3858248904670056e-05, |
|
"loss": 0.0168, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.951219512195122, |
|
"grad_norm": 0.18080760538578033, |
|
"learning_rate": 2.316568344507799e-05, |
|
"loss": 0.0145, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.9674796747967482, |
|
"grad_norm": 0.2424541711807251, |
|
"learning_rate": 2.248199956833258e-05, |
|
"loss": 0.0181, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.983739837398374, |
|
"grad_norm": 0.24186311662197113, |
|
"learning_rate": 2.1807276304159873e-05, |
|
"loss": 0.0235, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.5643988847732544, |
|
"learning_rate": 2.1141591646493275e-05, |
|
"loss": 0.0222, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 4.016260162601626, |
|
"grad_norm": 0.21844126284122467, |
|
"learning_rate": 2.0485022544457933e-05, |
|
"loss": 0.0151, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.032520325203252, |
|
"grad_norm": 0.23713727295398712, |
|
"learning_rate": 1.983764489347574e-05, |
|
"loss": 0.0224, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.048780487804878, |
|
"grad_norm": 0.16830649971961975, |
|
"learning_rate": 1.9199533526492463e-05, |
|
"loss": 0.0243, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"grad_norm": 0.31435462832450867, |
|
"learning_rate": 1.857076220532734e-05, |
|
"loss": 0.0229, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.08130081300813, |
|
"grad_norm": 0.4781794548034668, |
|
"learning_rate": 1.7951403612146744e-05, |
|
"loss": 0.0186, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 4.097560975609756, |
|
"grad_norm": 0.21629083156585693, |
|
"learning_rate": 1.7341529341062623e-05, |
|
"loss": 0.0168, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 4.1138211382113825, |
|
"grad_norm": 0.14135605096817017, |
|
"learning_rate": 1.6741209889856513e-05, |
|
"loss": 0.0178, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.130081300813008, |
|
"grad_norm": 0.1877690851688385, |
|
"learning_rate": 1.6150514651830483e-05, |
|
"loss": 0.0252, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 0.5082934498786926, |
|
"learning_rate": 1.5569511907785783e-05, |
|
"loss": 0.0164, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.16260162601626, |
|
"grad_norm": 0.20046326518058777, |
|
"learning_rate": 1.4998268818129802e-05, |
|
"loss": 0.0209, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 4.178861788617886, |
|
"grad_norm": 0.15702232718467712, |
|
"learning_rate": 1.443685141511294e-05, |
|
"loss": 0.0263, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 4.195121951219512, |
|
"grad_norm": 0.2763630449771881, |
|
"learning_rate": 1.3885324595195493e-05, |
|
"loss": 0.0218, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.211382113821138, |
|
"grad_norm": 0.1242925375699997, |
|
"learning_rate": 1.3343752111546138e-05, |
|
"loss": 0.0139, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 4.227642276422764, |
|
"grad_norm": 0.23633089661598206, |
|
"learning_rate": 1.2812196566672441e-05, |
|
"loss": 0.0192, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.2439024390243905, |
|
"grad_norm": 0.1781703680753708, |
|
"learning_rate": 1.2290719405184291e-05, |
|
"loss": 0.0182, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 4.260162601626016, |
|
"grad_norm": 0.20754830539226532, |
|
"learning_rate": 1.1779380906691361e-05, |
|
"loss": 0.0162, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 4.276422764227642, |
|
"grad_norm": 0.3521351218223572, |
|
"learning_rate": 1.1278240178835197e-05, |
|
"loss": 0.0233, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 4.2926829268292686, |
|
"grad_norm": 0.2133350968360901, |
|
"learning_rate": 1.0787355150456546e-05, |
|
"loss": 0.019, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.308943089430894, |
|
"grad_norm": 0.22535590827465057, |
|
"learning_rate": 1.0306782564899352e-05, |
|
"loss": 0.0174, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.32520325203252, |
|
"grad_norm": 0.361722856760025, |
|
"learning_rate": 9.83657797345141e-06, |
|
"loss": 0.016, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.341463414634147, |
|
"grad_norm": 0.19668163359165192, |
|
"learning_rate": 9.376795728923016e-06, |
|
"loss": 0.0239, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.357723577235772, |
|
"grad_norm": 0.1902504563331604, |
|
"learning_rate": 8.927488979364184e-06, |
|
"loss": 0.0148, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.373983739837398, |
|
"grad_norm": 0.344314306974411, |
|
"learning_rate": 8.488709661920946e-06, |
|
"loss": 0.0132, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.12004625797271729, |
|
"learning_rate": 8.060508496831743e-06, |
|
"loss": 0.0161, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.40650406504065, |
|
"grad_norm": 0.22964505851268768, |
|
"learning_rate": 7.642934981564609e-06, |
|
"loss": 0.014, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.4227642276422765, |
|
"grad_norm": 0.27576199173927307, |
|
"learning_rate": 7.23603738509534e-06, |
|
"loss": 0.0229, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.439024390243903, |
|
"grad_norm": 0.12927354872226715, |
|
"learning_rate": 6.839862742328107e-06, |
|
"loss": 0.0104, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.455284552845528, |
|
"grad_norm": 0.25259020924568176, |
|
"learning_rate": 6.454456848658363e-06, |
|
"loss": 0.0112, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.471544715447155, |
|
"grad_norm": 0.22319383919239044, |
|
"learning_rate": 6.079864254679191e-06, |
|
"loss": 0.0149, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.487804878048781, |
|
"grad_norm": 0.19844867289066315, |
|
"learning_rate": 5.716128261031628e-06, |
|
"loss": 0.0168, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.504065040650406, |
|
"grad_norm": 0.23324230313301086, |
|
"learning_rate": 5.363290913399232e-06, |
|
"loss": 0.014, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.520325203252033, |
|
"grad_norm": 0.22129973769187927, |
|
"learning_rate": 5.021392997647933e-06, |
|
"loss": 0.0155, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.536585365853659, |
|
"grad_norm": 0.4064948260784149, |
|
"learning_rate": 4.690474035111448e-06, |
|
"loss": 0.0202, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.5528455284552845, |
|
"grad_norm": 0.22724312543869019, |
|
"learning_rate": 4.370572278022788e-06, |
|
"loss": 0.0164, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.569105691056911, |
|
"grad_norm": 0.22973546385765076, |
|
"learning_rate": 4.061724705092574e-06, |
|
"loss": 0.0111, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.585365853658536, |
|
"grad_norm": 0.1258576661348343, |
|
"learning_rate": 3.76396701723456e-06, |
|
"loss": 0.0112, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.6016260162601625, |
|
"grad_norm": 0.2755131423473358, |
|
"learning_rate": 3.4773336334387064e-06, |
|
"loss": 0.0223, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.617886178861789, |
|
"grad_norm": 0.21121273934841156, |
|
"learning_rate": 3.2018576867926643e-06, |
|
"loss": 0.0156, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 0.0909205749630928, |
|
"learning_rate": 2.937571020651775e-06, |
|
"loss": 0.0162, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.650406504065041, |
|
"grad_norm": 0.19245898723602295, |
|
"learning_rate": 2.6845041849581033e-06, |
|
"loss": 0.0188, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 0.24272821843624115, |
|
"learning_rate": 2.442686432709096e-06, |
|
"loss": 0.0191, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.682926829268292, |
|
"grad_norm": 0.1934613585472107, |
|
"learning_rate": 2.212145716576142e-06, |
|
"loss": 0.0158, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.699186991869919, |
|
"grad_norm": 0.19412636756896973, |
|
"learning_rate": 1.9929086856733447e-06, |
|
"loss": 0.0164, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.715447154471545, |
|
"grad_norm": 0.26893916726112366, |
|
"learning_rate": 1.7850006824771004e-06, |
|
"loss": 0.0194, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.7317073170731705, |
|
"grad_norm": 0.20123203098773956, |
|
"learning_rate": 1.5884457398966046e-06, |
|
"loss": 0.0188, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.747967479674797, |
|
"grad_norm": 0.13909615576267242, |
|
"learning_rate": 1.4032665784958877e-06, |
|
"loss": 0.0162, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.764227642276423, |
|
"grad_norm": 0.17143380641937256, |
|
"learning_rate": 1.229484603867348e-06, |
|
"loss": 0.0165, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.780487804878049, |
|
"grad_norm": 0.11365949362516403, |
|
"learning_rate": 1.0671199041574542e-06, |
|
"loss": 0.0221, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.796747967479675, |
|
"grad_norm": 0.22491714358329773, |
|
"learning_rate": 9.161912477446688e-07, |
|
"loss": 0.0252, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.8130081300813, |
|
"grad_norm": 0.3459233343601227, |
|
"learning_rate": 7.76716081069917e-07, |
|
"loss": 0.0167, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.829268292682927, |
|
"grad_norm": 0.23336616158485413, |
|
"learning_rate": 6.487105266199333e-07, |
|
"loss": 0.0156, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.845528455284553, |
|
"grad_norm": 0.13937050104141235, |
|
"learning_rate": 5.321893810635525e-07, |
|
"loss": 0.0148, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.861788617886178, |
|
"grad_norm": 0.34142452478408813, |
|
"learning_rate": 4.2716611354127747e-07, |
|
"loss": 0.0156, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 0.1981387436389923, |
|
"learning_rate": 3.3365286410842465e-07, |
|
"loss": 0.029, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.894308943089431, |
|
"grad_norm": 0.12347165495157242, |
|
"learning_rate": 2.516604423317248e-07, |
|
"loss": 0.016, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.9105691056910565, |
|
"grad_norm": 0.17062854766845703, |
|
"learning_rate": 1.81198326039822e-07, |
|
"loss": 0.0165, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.926829268292683, |
|
"grad_norm": 0.16616424918174744, |
|
"learning_rate": 1.2227466022770584e-07, |
|
"loss": 0.0167, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.943089430894309, |
|
"grad_norm": 0.22849665582180023, |
|
"learning_rate": 7.489625611519779e-08, |
|
"loss": 0.0173, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.959349593495935, |
|
"grad_norm": 0.3534289598464966, |
|
"learning_rate": 3.906859035960331e-08, |
|
"loss": 0.0238, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.975609756097561, |
|
"grad_norm": 0.19245095551013947, |
|
"learning_rate": 1.4795804422651494e-08, |
|
"loss": 0.0134, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.991869918699187, |
|
"grad_norm": 0.15183484554290771, |
|
"learning_rate": 2.080704091766972e-09, |
|
"loss": 0.0118, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 5.001626016260163, |
|
"step": 3076, |
|
"total_flos": 1.078733099791872e+17, |
|
"train_loss": 0.05316672712002526, |
|
"train_runtime": 1399.9912, |
|
"train_samples_per_second": 35.155, |
|
"train_steps_per_second": 2.197 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3076, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.078733099791872e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|