|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9940119760479041, |
|
"eval_steps": 500, |
|
"global_step": 83, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 1190.78125, |
|
"epoch": 0.011976047904191617, |
|
"grad_norm": 36.3756103515625, |
|
"kl": 0.0, |
|
"learning_rate": 1.111111111111111e-07, |
|
"loss": 0.0, |
|
"reward": 0.6835937760770321, |
|
"reward_std": 0.11635640449821949, |
|
"rewards/accuracy_reward": 0.6302083432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0533854179084301, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 1470.078125, |
|
"epoch": 0.023952095808383235, |
|
"grad_norm": 27.217134475708008, |
|
"kl": 0.0, |
|
"learning_rate": 2.222222222222222e-07, |
|
"loss": 0.0, |
|
"reward": 0.5325521007180214, |
|
"reward_std": 0.13405859377235174, |
|
"rewards/accuracy_reward": 0.4947916716337204, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0377604179084301, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 1473.5, |
|
"epoch": 0.03592814371257485, |
|
"grad_norm": 12.41641902923584, |
|
"kl": 0.0007615089416503906, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.0, |
|
"reward": 0.4713541716337204, |
|
"reward_std": 0.11533699464052916, |
|
"rewards/accuracy_reward": 0.42187501257285476, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0494791679084301, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 896.5625, |
|
"epoch": 0.04790419161676647, |
|
"grad_norm": 30.794960021972656, |
|
"kl": 0.0019729137420654297, |
|
"learning_rate": 4.444444444444444e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5976562760770321, |
|
"reward_std": 0.13709542341530323, |
|
"rewards/accuracy_reward": 0.5625000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.03515625197906047, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 1053.5520935058594, |
|
"epoch": 0.059880239520958084, |
|
"grad_norm": 14.93273639678955, |
|
"kl": 0.0015816688537597656, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.0001, |
|
"reward": 0.5976562611758709, |
|
"reward_std": 0.16129255667328835, |
|
"rewards/accuracy_reward": 0.5625000111758709, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.03515625, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 1321.125, |
|
"epoch": 0.0718562874251497, |
|
"grad_norm": 20.902679443359375, |
|
"kl": 0.00043714046478271484, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.0, |
|
"reward": 0.5429687525611371, |
|
"reward_std": 0.07764231134206057, |
|
"rewards/accuracy_reward": 0.5000000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.04296875069849193, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 996.9166717529297, |
|
"epoch": 0.08383233532934131, |
|
"grad_norm": 32.750858306884766, |
|
"kl": 0.006168365478515625, |
|
"learning_rate": 7.777777777777778e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7981771156191826, |
|
"reward_std": 0.12683826312422752, |
|
"rewards/accuracy_reward": 0.7239583507180214, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0742187537252903, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 1024.3125, |
|
"epoch": 0.09580838323353294, |
|
"grad_norm": 18.83184051513672, |
|
"kl": 0.016448974609375, |
|
"learning_rate": 8.888888888888888e-07, |
|
"loss": 0.0007, |
|
"reward": 0.720052108168602, |
|
"reward_std": 0.15430233627557755, |
|
"rewards/accuracy_reward": 0.6718750149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.048177084885537624, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 1155.125, |
|
"epoch": 0.10778443113772455, |
|
"grad_norm": 16.292612075805664, |
|
"kl": 0.025983810424804688, |
|
"learning_rate": 1e-06, |
|
"loss": 0.001, |
|
"reward": 0.5546875074505806, |
|
"reward_std": 0.10253959987312555, |
|
"rewards/accuracy_reward": 0.5052083432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.049479166977107525, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 1780.625, |
|
"epoch": 0.11976047904191617, |
|
"grad_norm": 4.369594097137451, |
|
"kl": 0.03295087814331055, |
|
"learning_rate": 9.995945347921067e-07, |
|
"loss": 0.0013, |
|
"reward": 0.3697916716337204, |
|
"reward_std": 0.13915570452809334, |
|
"rewards/accuracy_reward": 0.3489583507180214, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.02083333395421505, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 1026.125, |
|
"epoch": 0.1317365269461078, |
|
"grad_norm": 19.12957763671875, |
|
"kl": 0.23905563354492188, |
|
"learning_rate": 9.983788698441369e-07, |
|
"loss": 0.0096, |
|
"reward": 0.6992187798023224, |
|
"reward_std": 0.12939812522381544, |
|
"rewards/accuracy_reward": 0.6406250074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.05859375256113708, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 1603.2135620117188, |
|
"epoch": 0.1437125748502994, |
|
"grad_norm": 18.756772994995117, |
|
"kl": 0.11810016632080078, |
|
"learning_rate": 9.963551958664945e-07, |
|
"loss": 0.0047, |
|
"reward": 0.3658854365348816, |
|
"reward_std": 0.0706010814756155, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.03255208441987634, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 1135.5885620117188, |
|
"epoch": 0.15568862275449102, |
|
"grad_norm": 11.572704315185547, |
|
"kl": 0.27099609375, |
|
"learning_rate": 9.935271596564688e-07, |
|
"loss": 0.0108, |
|
"reward": 0.7382812723517418, |
|
"reward_std": 0.11970062833279371, |
|
"rewards/accuracy_reward": 0.6770833432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0611979179084301, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 1584.75, |
|
"epoch": 0.16766467065868262, |
|
"grad_norm": 4.483678817749023, |
|
"kl": 0.29352617263793945, |
|
"learning_rate": 9.898998575264588e-07, |
|
"loss": 0.0117, |
|
"reward": 0.4557291716337204, |
|
"reward_std": 0.07522482145577669, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.03906250186264515, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 1438.75, |
|
"epoch": 0.17964071856287425, |
|
"grad_norm": 4.186062812805176, |
|
"kl": 0.42242431640625, |
|
"learning_rate": 9.854798261200746e-07, |
|
"loss": 0.0169, |
|
"reward": 0.502604179084301, |
|
"reward_std": 0.1203515324741602, |
|
"rewards/accuracy_reward": 0.43750001303851604, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06510416697710752, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 1288.34375, |
|
"epoch": 0.19161676646706588, |
|
"grad_norm": 15.665771484375, |
|
"kl": 0.84912109375, |
|
"learning_rate": 9.80275030632663e-07, |
|
"loss": 0.034, |
|
"reward": 0.6640625298023224, |
|
"reward_std": 0.10871894843876362, |
|
"rewards/accuracy_reward": 0.5885416865348816, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07552083674818277, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 1175.25, |
|
"epoch": 0.20359281437125748, |
|
"grad_norm": 8.767546653747559, |
|
"kl": 0.7672920227050781, |
|
"learning_rate": 9.742948504574879e-07, |
|
"loss": 0.0306, |
|
"reward": 0.6406250149011612, |
|
"reward_std": 0.13799083977937698, |
|
"rewards/accuracy_reward": 0.5572916753590107, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08333333488553762, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 1315.5, |
|
"epoch": 0.2155688622754491, |
|
"grad_norm": 2.373502016067505, |
|
"kl": 0.21795654296875, |
|
"learning_rate": 9.675500622834293e-07, |
|
"loss": 0.0087, |
|
"reward": 0.42578125, |
|
"reward_std": 0.15298314206302166, |
|
"rewards/accuracy_reward": 0.3645833507180214, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.061197918839752674, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 1290.0625, |
|
"epoch": 0.2275449101796407, |
|
"grad_norm": 8.432291984558105, |
|
"kl": 0.6476497650146484, |
|
"learning_rate": 9.60052820674661e-07, |
|
"loss": 0.0259, |
|
"reward": 0.673177108168602, |
|
"reward_std": 0.12136406265199184, |
|
"rewards/accuracy_reward": 0.6041666772216558, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06901041883975267, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 1698.1666870117188, |
|
"epoch": 0.23952095808383234, |
|
"grad_norm": 7.2242431640625, |
|
"kl": 0.6529922485351562, |
|
"learning_rate": 9.518166361673058e-07, |
|
"loss": 0.0261, |
|
"reward": 0.38151043420657516, |
|
"reward_std": 0.08616631850600243, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.04817708535119891, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 1424.734375, |
|
"epoch": 0.25149700598802394, |
|
"grad_norm": 6.204524993896484, |
|
"kl": 0.6253471374511719, |
|
"learning_rate": 9.428563509225346e-07, |
|
"loss": 0.0251, |
|
"reward": 0.4765625149011612, |
|
"reward_std": 0.08804207853972912, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.059895834885537624, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 1133.75, |
|
"epoch": 0.2634730538922156, |
|
"grad_norm": 3.5956451892852783, |
|
"kl": 0.60986328125, |
|
"learning_rate": 9.3318811197999e-07, |
|
"loss": 0.0244, |
|
"reward": 0.6497396156191826, |
|
"reward_std": 0.106993043795228, |
|
"rewards/accuracy_reward": 0.5937500074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.055989584885537624, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 2152.4479370117188, |
|
"epoch": 0.2754491017964072, |
|
"grad_norm": 1.9209452867507935, |
|
"kl": 0.004711151123046875, |
|
"learning_rate": 9.228293421597289e-07, |
|
"loss": 0.0002, |
|
"reward": 0.2018229179084301, |
|
"reward_std": 0.0970163643360138, |
|
"rewards/accuracy_reward": 0.17187500512227416, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.029947917442768812, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 1283.875, |
|
"epoch": 0.2874251497005988, |
|
"grad_norm": 3.2163443565368652, |
|
"kl": 0.5288314819335938, |
|
"learning_rate": 9.117987086651232e-07, |
|
"loss": 0.0211, |
|
"reward": 0.5130208432674408, |
|
"reward_std": 0.13065862283110619, |
|
"rewards/accuracy_reward": 0.44791667675599456, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06510416883975267, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 1132.5625, |
|
"epoch": 0.2994011976047904, |
|
"grad_norm": 3.4035706520080566, |
|
"kl": 1.2806243896484375, |
|
"learning_rate": 9.001160894432978e-07, |
|
"loss": 0.0513, |
|
"reward": 0.6106770932674408, |
|
"reward_std": 0.13533879444003105, |
|
"rewards/accuracy_reward": 0.5312500149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07942708488553762, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 1262.109375, |
|
"epoch": 0.31137724550898205, |
|
"grad_norm": 6.233963966369629, |
|
"kl": 0.724578857421875, |
|
"learning_rate": 8.878025373637259e-07, |
|
"loss": 0.029, |
|
"reward": 0.6315104318782687, |
|
"reward_std": 0.13147221505641937, |
|
"rewards/accuracy_reward": 0.5625000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06901041883975267, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 1111.1875, |
|
"epoch": 0.32335329341317365, |
|
"grad_norm": 11.196812629699707, |
|
"kl": 1.4609375, |
|
"learning_rate": 8.748802422795359e-07, |
|
"loss": 0.0584, |
|
"reward": 0.7526042014360428, |
|
"reward_std": 0.11228201538324356, |
|
"rewards/accuracy_reward": 0.666666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08593750279396772, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 1422.9375, |
|
"epoch": 0.33532934131736525, |
|
"grad_norm": 3.3535704612731934, |
|
"kl": 0.4104576110839844, |
|
"learning_rate": 8.613724910398959e-07, |
|
"loss": 0.0164, |
|
"reward": 0.6901042014360428, |
|
"reward_std": 0.15475903172045946, |
|
"rewards/accuracy_reward": 0.6093750149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08072917046956718, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 1568.6198120117188, |
|
"epoch": 0.3473053892215569, |
|
"grad_norm": 2.9321556091308594, |
|
"kl": 0.41168212890625, |
|
"learning_rate": 8.473036255255366e-07, |
|
"loss": 0.0165, |
|
"reward": 0.3984375111758709, |
|
"reward_std": 0.19856118597090244, |
|
"rewards/accuracy_reward": 0.338541679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.05989583395421505, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 1832.75, |
|
"epoch": 0.3592814371257485, |
|
"grad_norm": 4.1487956047058105, |
|
"kl": 0.36643218994140625, |
|
"learning_rate": 8.32698998783039e-07, |
|
"loss": 0.0147, |
|
"reward": 0.3750000149011612, |
|
"reward_std": 0.17254010029137135, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0416666679084301, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 1245.125, |
|
"epoch": 0.3712574850299401, |
|
"grad_norm": 2.372616767883301, |
|
"kl": 1.181640625, |
|
"learning_rate": 8.17584929336929e-07, |
|
"loss": 0.0472, |
|
"reward": 0.604166679084301, |
|
"reward_std": 0.1132371760904789, |
|
"rewards/accuracy_reward": 0.5052083432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.09895833674818277, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 983.375, |
|
"epoch": 0.38323353293413176, |
|
"grad_norm": 6.431793212890625, |
|
"kl": 1.734375, |
|
"learning_rate": 8.019886537619179e-07, |
|
"loss": 0.0694, |
|
"reward": 0.614583358168602, |
|
"reward_std": 0.13955211825668812, |
|
"rewards/accuracy_reward": 0.5468750223517418, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06770833488553762, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 1390.5625, |
|
"epoch": 0.39520958083832336, |
|
"grad_norm": 3.662283420562744, |
|
"kl": 0.57391357421875, |
|
"learning_rate": 7.859382776007543e-07, |
|
"loss": 0.023, |
|
"reward": 0.48828126303851604, |
|
"reward_std": 0.11240259557962418, |
|
"rewards/accuracy_reward": 0.4166666716337204, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0716145858168602, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 1514.8958740234375, |
|
"epoch": 0.40718562874251496, |
|
"grad_norm": 4.9103546142578125, |
|
"kl": 1.3856163024902344, |
|
"learning_rate": 7.694627247161356e-07, |
|
"loss": 0.0553, |
|
"reward": 0.5234375055879354, |
|
"reward_std": 0.10020329616963863, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.10677083767950535, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 1381.875, |
|
"epoch": 0.41916167664670656, |
|
"grad_norm": 15.807221412658691, |
|
"kl": 1.150360107421875, |
|
"learning_rate": 7.525916851679529e-07, |
|
"loss": 0.0461, |
|
"reward": 0.4739583432674408, |
|
"reward_std": 0.09151106514036655, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.057291668839752674, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 1381.5, |
|
"epoch": 0.4311377245508982, |
|
"grad_norm": 2.794029712677002, |
|
"kl": 0.950286865234375, |
|
"learning_rate": 7.353555617097967e-07, |
|
"loss": 0.038, |
|
"reward": 0.5833333386108279, |
|
"reward_std": 0.08740208484232426, |
|
"rewards/accuracy_reward": 0.5000000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08333333674818277, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 1376.0625, |
|
"epoch": 0.4431137724550898, |
|
"grad_norm": 6.0238356590271, |
|
"kl": 0.79541015625, |
|
"learning_rate": 7.177854150011389e-07, |
|
"loss": 0.0318, |
|
"reward": 0.549479179084301, |
|
"reward_std": 0.11059301160275936, |
|
"rewards/accuracy_reward": 0.4739583432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07552083488553762, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 1776.8958740234375, |
|
"epoch": 0.4550898203592814, |
|
"grad_norm": 1.405756950378418, |
|
"kl": 0.7037200927734375, |
|
"learning_rate": 6.999129076339259e-07, |
|
"loss": 0.028, |
|
"reward": 0.39843751629814506, |
|
"reward_std": 0.1277984417974949, |
|
"rewards/accuracy_reward": 0.3489583386108279, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.04947916744276881, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 1105.6875, |
|
"epoch": 0.46706586826347307, |
|
"grad_norm": 3.609495162963867, |
|
"kl": 1.002105712890625, |
|
"learning_rate": 6.817702470744477e-07, |
|
"loss": 0.0401, |
|
"reward": 0.5859375149011612, |
|
"reward_std": 0.10204238072037697, |
|
"rewards/accuracy_reward": 0.5000000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0859375037252903, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 1078.125, |
|
"epoch": 0.47904191616766467, |
|
"grad_norm": 9.784010887145996, |
|
"kl": 1.2890625, |
|
"learning_rate": 6.633901276233064e-07, |
|
"loss": 0.0517, |
|
"reward": 0.673177108168602, |
|
"reward_std": 0.11213659681379795, |
|
"rewards/accuracy_reward": 0.5885416865348816, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08463541977107525, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 1378.8229370117188, |
|
"epoch": 0.49101796407185627, |
|
"grad_norm": 1.9631364345550537, |
|
"kl": 0.4476318359375, |
|
"learning_rate": 6.448056714980767e-07, |
|
"loss": 0.0179, |
|
"reward": 0.4648437676951289, |
|
"reward_std": 0.12703735567629337, |
|
"rewards/accuracy_reward": 0.3906250149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07421875093132257, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 1081.8125, |
|
"epoch": 0.5029940119760479, |
|
"grad_norm": 3.6088380813598633, |
|
"kl": 1.1484375, |
|
"learning_rate": 6.260503691448321e-07, |
|
"loss": 0.046, |
|
"reward": 0.8606771230697632, |
|
"reward_std": 0.1145353289321065, |
|
"rewards/accuracy_reward": 0.7552083432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1054687537252903, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 1213.71875, |
|
"epoch": 0.5149700598802395, |
|
"grad_norm": 6.979413032531738, |
|
"kl": 0.8447265625, |
|
"learning_rate": 6.071580188860954e-07, |
|
"loss": 0.0339, |
|
"reward": 0.5833333432674408, |
|
"reward_std": 0.11912628076970577, |
|
"rewards/accuracy_reward": 0.5052083507180214, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07812500186264515, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 1243.1823120117188, |
|
"epoch": 0.5269461077844312, |
|
"grad_norm": 4.323620319366455, |
|
"kl": 0.6096343994140625, |
|
"learning_rate": 5.881626660139791e-07, |
|
"loss": 0.0245, |
|
"reward": 0.5872395960614085, |
|
"reward_std": 0.12293908558785915, |
|
"rewards/accuracy_reward": 0.5052083432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08203125186264515, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 1625.1875, |
|
"epoch": 0.5389221556886228, |
|
"grad_norm": 6.710347652435303, |
|
"kl": 0.7031707763671875, |
|
"learning_rate": 5.690985414382668e-07, |
|
"loss": 0.0281, |
|
"reward": 0.5312500149011612, |
|
"reward_std": 0.12612489983439445, |
|
"rewards/accuracy_reward": 0.463541679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06770833535119891, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 1616.2864685058594, |
|
"epoch": 0.5508982035928144, |
|
"grad_norm": 5.034140110015869, |
|
"kl": 0.63177490234375, |
|
"learning_rate": 5.5e-07, |
|
"loss": 0.0253, |
|
"reward": 0.38932292722165585, |
|
"reward_std": 0.09617834351956844, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0559895858168602, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 1878.4010620117188, |
|
"epoch": 0.562874251497006, |
|
"grad_norm": 6.528008460998535, |
|
"kl": 0.196319580078125, |
|
"learning_rate": 5.309014585617334e-07, |
|
"loss": 0.0079, |
|
"reward": 0.21614583837799728, |
|
"reward_std": 0.08288709167391062, |
|
"rewards/accuracy_reward": 0.1666666716337204, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.04947916860692203, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 1067.484375, |
|
"epoch": 0.5748502994011976, |
|
"grad_norm": 4.622894287109375, |
|
"kl": 1.21875, |
|
"learning_rate": 5.11837333986021e-07, |
|
"loss": 0.0487, |
|
"reward": 0.9218750298023224, |
|
"reward_std": 0.09687121585011482, |
|
"rewards/accuracy_reward": 0.8333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08854166977107525, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 1743.0573120117188, |
|
"epoch": 0.5868263473053892, |
|
"grad_norm": 5.265244007110596, |
|
"kl": 0.7179183959960938, |
|
"learning_rate": 4.928419811139045e-07, |
|
"loss": 0.0287, |
|
"reward": 0.4114583358168602, |
|
"reward_std": 0.11536262556910515, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07812500186264515, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 1336.625, |
|
"epoch": 0.5988023952095808, |
|
"grad_norm": 5.1192240715026855, |
|
"kl": 1.0614166259765625, |
|
"learning_rate": 4.739496308551679e-07, |
|
"loss": 0.0425, |
|
"reward": 0.6015625204890966, |
|
"reward_std": 0.11182517930865288, |
|
"rewards/accuracy_reward": 0.5000000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.10156250558793545, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 1601.1458435058594, |
|
"epoch": 0.6107784431137725, |
|
"grad_norm": 7.076495170593262, |
|
"kl": 1.0777587890625, |
|
"learning_rate": 4.551943285019233e-07, |
|
"loss": 0.0433, |
|
"reward": 0.4830729365348816, |
|
"reward_std": 0.09971196111291647, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06640625093132257, |
|
"step": 51 |
|
}, |
|
{ |
|
"completion_length": 1865.421875, |
|
"epoch": 0.6227544910179641, |
|
"grad_norm": 1.5869081020355225, |
|
"kl": 0.4460277557373047, |
|
"learning_rate": 4.3660987237669377e-07, |
|
"loss": 0.0178, |
|
"reward": 0.3958333507180214, |
|
"reward_std": 0.08472462091594934, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06250000186264515, |
|
"step": 52 |
|
}, |
|
{ |
|
"completion_length": 1454.328125, |
|
"epoch": 0.6347305389221557, |
|
"grad_norm": 7.47260046005249, |
|
"kl": 1.51171875, |
|
"learning_rate": 4.182297529255524e-07, |
|
"loss": 0.0607, |
|
"reward": 0.4934896007180214, |
|
"reward_std": 0.10642260871827602, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0768229179084301, |
|
"step": 53 |
|
}, |
|
{ |
|
"completion_length": 1444.4427185058594, |
|
"epoch": 0.6467065868263473, |
|
"grad_norm": 4.976149559020996, |
|
"kl": 1.0859375, |
|
"learning_rate": 4.0008709236607405e-07, |
|
"loss": 0.0434, |
|
"reward": 0.4921875074505806, |
|
"reward_std": 0.10759196057915688, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0755208358168602, |
|
"step": 54 |
|
}, |
|
{ |
|
"completion_length": 1585.125, |
|
"epoch": 0.6586826347305389, |
|
"grad_norm": 2.437457323074341, |
|
"kl": 1.212890625, |
|
"learning_rate": 3.8221458499886115e-07, |
|
"loss": 0.0486, |
|
"reward": 0.6250000102445483, |
|
"reward_std": 0.12861231248825788, |
|
"rewards/accuracy_reward": 0.5520833432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07291666883975267, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 1848.125, |
|
"epoch": 0.6706586826347305, |
|
"grad_norm": 2.6519415378570557, |
|
"kl": 0.339874267578125, |
|
"learning_rate": 3.646444382902033e-07, |
|
"loss": 0.0136, |
|
"reward": 0.3164062574505806, |
|
"reward_std": 0.10790392756462097, |
|
"rewards/accuracy_reward": 0.2500000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06640625, |
|
"step": 56 |
|
}, |
|
{ |
|
"completion_length": 1311.75, |
|
"epoch": 0.6826347305389222, |
|
"grad_norm": 4.369971752166748, |
|
"kl": 1.4095611572265625, |
|
"learning_rate": 3.474083148320469e-07, |
|
"loss": 0.0565, |
|
"reward": 0.6640625149011612, |
|
"reward_std": 0.14868063479661942, |
|
"rewards/accuracy_reward": 0.5520833432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.11197916883975267, |
|
"step": 57 |
|
}, |
|
{ |
|
"completion_length": 1429.4219207763672, |
|
"epoch": 0.6946107784431138, |
|
"grad_norm": 6.277844429016113, |
|
"kl": 0.970916748046875, |
|
"learning_rate": 3.3053727528386457e-07, |
|
"loss": 0.0389, |
|
"reward": 0.5846354365348816, |
|
"reward_std": 0.1101480070501566, |
|
"rewards/accuracy_reward": 0.5000000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0846354179084301, |
|
"step": 58 |
|
}, |
|
{ |
|
"completion_length": 911.0625, |
|
"epoch": 0.7065868263473054, |
|
"grad_norm": 5.268196105957031, |
|
"kl": 1.7060546875, |
|
"learning_rate": 3.140617223992458e-07, |
|
"loss": 0.0683, |
|
"reward": 0.856770858168602, |
|
"reward_std": 0.12339456751942635, |
|
"rewards/accuracy_reward": 0.7500000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1067708395421505, |
|
"step": 59 |
|
}, |
|
{ |
|
"completion_length": 1568.5625, |
|
"epoch": 0.718562874251497, |
|
"grad_norm": 3.4410088062286377, |
|
"kl": 0.6736679077148438, |
|
"learning_rate": 2.980113462380821e-07, |
|
"loss": 0.027, |
|
"reward": 0.4453125223517418, |
|
"reward_std": 0.13059347681701183, |
|
"rewards/accuracy_reward": 0.3750000037252903, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07031250139698386, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 1302.6875, |
|
"epoch": 0.7305389221556886, |
|
"grad_norm": 5.513269901275635, |
|
"kl": 1.0953369140625, |
|
"learning_rate": 2.82415070663071e-07, |
|
"loss": 0.0437, |
|
"reward": 0.6171875223517418, |
|
"reward_std": 0.11416286043822765, |
|
"rewards/accuracy_reward": 0.5000000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.11718750186264515, |
|
"step": 61 |
|
}, |
|
{ |
|
"completion_length": 1684.984375, |
|
"epoch": 0.7425149700598802, |
|
"grad_norm": 3.3617103099823, |
|
"kl": 0.6853790283203125, |
|
"learning_rate": 2.673010012169609e-07, |
|
"loss": 0.0274, |
|
"reward": 0.4010416744276881, |
|
"reward_std": 0.08693839982151985, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06770833488553762, |
|
"step": 62 |
|
}, |
|
{ |
|
"completion_length": 1815.5, |
|
"epoch": 0.7544910179640718, |
|
"grad_norm": 3.155616283416748, |
|
"kl": 0.395294189453125, |
|
"learning_rate": 2.5269637447446345e-07, |
|
"loss": 0.0158, |
|
"reward": 0.3828125074505806, |
|
"reward_std": 0.08006503619253635, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0494791679084301, |
|
"step": 63 |
|
}, |
|
{ |
|
"completion_length": 1800.3333435058594, |
|
"epoch": 0.7664670658682635, |
|
"grad_norm": 1.9823267459869385, |
|
"kl": 0.34970855712890625, |
|
"learning_rate": 2.3862750896010425e-07, |
|
"loss": 0.014, |
|
"reward": 0.4036458432674408, |
|
"reward_std": 0.08622701931744814, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.07031250279396772, |
|
"step": 64 |
|
}, |
|
{ |
|
"completion_length": 1160.375, |
|
"epoch": 0.7784431137724551, |
|
"grad_norm": 5.189390659332275, |
|
"kl": 1.3760986328125, |
|
"learning_rate": 2.25119757720464e-07, |
|
"loss": 0.0551, |
|
"reward": 0.8632812798023224, |
|
"reward_std": 0.11284597590565681, |
|
"rewards/accuracy_reward": 0.7500000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.11328125279396772, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 1668.265625, |
|
"epoch": 0.7904191616766467, |
|
"grad_norm": 3.109102249145508, |
|
"kl": 0.5458221435546875, |
|
"learning_rate": 2.12197462636274e-07, |
|
"loss": 0.0218, |
|
"reward": 0.5104166828095913, |
|
"reward_std": 0.12134900130331516, |
|
"rewards/accuracy_reward": 0.4218750074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.08854166977107525, |
|
"step": 66 |
|
}, |
|
{ |
|
"completion_length": 1529.2083435058594, |
|
"epoch": 0.8023952095808383, |
|
"grad_norm": 6.05462646484375, |
|
"kl": 1.633209228515625, |
|
"learning_rate": 1.998839105567023e-07, |
|
"loss": 0.0652, |
|
"reward": 0.4231770932674408, |
|
"reward_std": 0.12092401646077633, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0898437537252903, |
|
"step": 67 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.8143712574850299, |
|
"grad_norm": 0.8508380651473999, |
|
"kl": 0.00394439697265625, |
|
"learning_rate": 1.882012913348768e-07, |
|
"loss": 0.0002, |
|
"reward": 0.3033854253590107, |
|
"reward_std": 0.12076857313513756, |
|
"rewards/accuracy_reward": 0.1666666716337204, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.13671875558793545, |
|
"step": 68 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.8263473053892215, |
|
"grad_norm": 0.6287813782691956, |
|
"kl": 0.00579071044921875, |
|
"learning_rate": 1.7717065784027108e-07, |
|
"loss": 0.0002, |
|
"reward": 0.5859375260770321, |
|
"reward_std": 0.11029668338596821, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1692708395421505, |
|
"step": 69 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.8383233532934131, |
|
"grad_norm": 0.8328803777694702, |
|
"kl": 0.00489044189453125, |
|
"learning_rate": 1.6681188802000992e-07, |
|
"loss": 0.0002, |
|
"reward": 0.6432291939854622, |
|
"reward_std": 0.13170602917671204, |
|
"rewards/accuracy_reward": 0.5000000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1432291716337204, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 2032.9375, |
|
"epoch": 0.8502994011976048, |
|
"grad_norm": 0.7211276292800903, |
|
"kl": 0.00424957275390625, |
|
"learning_rate": 1.5714364907746534e-07, |
|
"loss": 0.0002, |
|
"reward": 0.44661460630595684, |
|
"reward_std": 0.11995729431509972, |
|
"rewards/accuracy_reward": 0.3333333358168602, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.11328125186264515, |
|
"step": 71 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.8622754491017964, |
|
"grad_norm": 0.795598566532135, |
|
"kl": 0.00417327880859375, |
|
"learning_rate": 1.4818336383269423e-07, |
|
"loss": 0.0002, |
|
"reward": 0.805989608168602, |
|
"reward_std": 0.12349414266645908, |
|
"rewards/accuracy_reward": 0.666666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1393229216337204, |
|
"step": 72 |
|
}, |
|
{ |
|
"completion_length": 2041.3385620117188, |
|
"epoch": 0.874251497005988, |
|
"grad_norm": 0.6561583876609802, |
|
"kl": 0.00516510009765625, |
|
"learning_rate": 1.3994717932533889e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7122395932674408, |
|
"reward_std": 0.12930710427463055, |
|
"rewards/accuracy_reward": 0.5833333507180214, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.12890625558793545, |
|
"step": 73 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.8862275449101796, |
|
"grad_norm": 0.6364233493804932, |
|
"kl": 0.00426483154296875, |
|
"learning_rate": 1.324499377165708e-07, |
|
"loss": 0.0002, |
|
"reward": 0.5507812574505806, |
|
"reward_std": 0.12330615520477295, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1341145895421505, |
|
"step": 74 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.8982035928143712, |
|
"grad_norm": 0.7673735618591309, |
|
"kl": 0.00435638427734375, |
|
"learning_rate": 1.257051495425121e-07, |
|
"loss": 0.0002, |
|
"reward": 0.5768229365348816, |
|
"reward_std": 0.13856617361307144, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1601562574505806, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.9101796407185628, |
|
"grad_norm": 0.773383378982544, |
|
"kl": 0.00489044189453125, |
|
"learning_rate": 1.197249693673371e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8151041865348816, |
|
"reward_std": 0.1366959922015667, |
|
"rewards/accuracy_reward": 0.6666666865348816, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1484375074505806, |
|
"step": 76 |
|
}, |
|
{ |
|
"completion_length": 2046.9375, |
|
"epoch": 0.9221556886227545, |
|
"grad_norm": 0.6188393235206604, |
|
"kl": 0.0057525634765625, |
|
"learning_rate": 1.145201738799255e-07, |
|
"loss": 0.0002, |
|
"reward": 0.6341145858168602, |
|
"reward_std": 0.13442306593060493, |
|
"rewards/accuracy_reward": 0.5000000074505806, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1341145858168602, |
|
"step": 77 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.9341317365269461, |
|
"grad_norm": 0.771629273891449, |
|
"kl": 0.004974365234375, |
|
"learning_rate": 1.1010014247354125e-07, |
|
"loss": 0.0002, |
|
"reward": 0.5664062555879354, |
|
"reward_std": 0.1288151517510414, |
|
"rewards/accuracy_reward": 0.416666679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.14973958767950535, |
|
"step": 78 |
|
}, |
|
{ |
|
"completion_length": 2037.3385620117188, |
|
"epoch": 0.9461077844311377, |
|
"grad_norm": 1.8673304319381714, |
|
"kl": 0.01363372802734375, |
|
"learning_rate": 1.064728403435312e-07, |
|
"loss": 0.0005, |
|
"reward": 0.6250000223517418, |
|
"reward_std": 0.12153633683919907, |
|
"rewards/accuracy_reward": 0.5000000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1250000037252903, |
|
"step": 79 |
|
}, |
|
{ |
|
"completion_length": 2045.796875, |
|
"epoch": 0.9580838323353293, |
|
"grad_norm": 0.5428643226623535, |
|
"kl": 0.003711700439453125, |
|
"learning_rate": 1.0364480413350543e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8554687947034836, |
|
"reward_std": 0.09259135648608208, |
|
"rewards/accuracy_reward": 0.7500000149011612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1054687537252903, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.9700598802395209, |
|
"grad_norm": 0.6405127048492432, |
|
"kl": 0.00507354736328125, |
|
"learning_rate": 1.0162113015586308e-07, |
|
"loss": 0.0002, |
|
"reward": 0.6484375223517418, |
|
"reward_std": 0.12896526977419853, |
|
"rewards/accuracy_reward": 0.5052083432674408, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1432291716337204, |
|
"step": 81 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.9820359281437125, |
|
"grad_norm": 0.6773039698600769, |
|
"kl": 0.00487518310546875, |
|
"learning_rate": 1.0040546520789337e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7031250149011612, |
|
"reward_std": 0.11840885132551193, |
|
"rewards/accuracy_reward": 0.5833333507180214, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.1197916716337204, |
|
"step": 82 |
|
}, |
|
{ |
|
"completion_length": 2048.0, |
|
"epoch": 0.9940119760479041, |
|
"grad_norm": 0.5921207070350647, |
|
"kl": 0.00601959228515625, |
|
"learning_rate": 1e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7174479365348816, |
|
"reward_std": 0.12807989306747913, |
|
"rewards/accuracy_reward": 0.588541679084301, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.12890625558793545, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.9940119760479041, |
|
"step": 83, |
|
"total_flos": 0.0, |
|
"train_loss": 0.02214584331980233, |
|
"train_runtime": 5149.8835, |
|
"train_samples_per_second": 0.194, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 83, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|