|
{ |
|
"best_metric": 0.4757327735424042, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-300", |
|
"epoch": 0.09686989162680874, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00032289963875602916, |
|
"grad_norm": 0.8824386596679688, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.9502, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00032289963875602916, |
|
"eval_loss": 1.3877121210098267, |
|
"eval_runtime": 93.1564, |
|
"eval_samples_per_second": 2.673, |
|
"eval_steps_per_second": 2.673, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006457992775120583, |
|
"grad_norm": 0.9450064897537231, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.0243, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0009686989162680874, |
|
"grad_norm": 0.8851878046989441, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 1.0441, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012915985550241166, |
|
"grad_norm": 0.9229115843772888, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.9838, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0016144981937801458, |
|
"grad_norm": 0.8223733901977539, |
|
"learning_rate": 0.00015, |
|
"loss": 0.9295, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0019373978325361748, |
|
"grad_norm": 0.608881950378418, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.8621, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002260297471292204, |
|
"grad_norm": 0.5287642478942871, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.7054, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0025831971100482333, |
|
"grad_norm": 0.5346184372901917, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.7059, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0029060967488042625, |
|
"grad_norm": 0.5089557766914368, |
|
"learning_rate": 0.00027, |
|
"loss": 0.697, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0032289963875602916, |
|
"grad_norm": 0.5970950722694397, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7296, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0035518960263163203, |
|
"grad_norm": 0.5149356722831726, |
|
"learning_rate": 0.0002999911984174669, |
|
"loss": 0.652, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0038747956650723495, |
|
"grad_norm": 0.5151774287223816, |
|
"learning_rate": 0.0002999647947027726, |
|
"loss": 0.5781, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004197695303828379, |
|
"grad_norm": 0.8652517795562744, |
|
"learning_rate": 0.0002999207919545099, |
|
"loss": 0.6031, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004520594942584408, |
|
"grad_norm": 0.5327635407447815, |
|
"learning_rate": 0.0002998591953365965, |
|
"loss": 0.5459, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004843494581340437, |
|
"grad_norm": 0.6640207171440125, |
|
"learning_rate": 0.00029978001207766854, |
|
"loss": 0.6103, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005166394220096467, |
|
"grad_norm": 0.5555705428123474, |
|
"learning_rate": 0.00029968325147023263, |
|
"loss": 0.5229, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005489293858852495, |
|
"grad_norm": 0.5247602462768555, |
|
"learning_rate": 0.000299568924869575, |
|
"loss": 0.5202, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.005812193497608525, |
|
"grad_norm": 0.42580658197402954, |
|
"learning_rate": 0.00029943704569242917, |
|
"loss": 0.5354, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.006135093136364554, |
|
"grad_norm": 0.43852224946022034, |
|
"learning_rate": 0.0002992876294154013, |
|
"loss": 0.5391, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.006457992775120583, |
|
"grad_norm": 0.5213896632194519, |
|
"learning_rate": 0.00029912069357315393, |
|
"loss": 0.514, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006780892413876612, |
|
"grad_norm": 0.4253011643886566, |
|
"learning_rate": 0.00029893625775634835, |
|
"loss": 0.5246, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.007103792052632641, |
|
"grad_norm": 0.6179479956626892, |
|
"learning_rate": 0.0002987343436093454, |
|
"loss": 0.5259, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.00742669169138867, |
|
"grad_norm": 0.4869435429573059, |
|
"learning_rate": 0.00029851497482766547, |
|
"loss": 0.4949, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.007749591330144699, |
|
"grad_norm": 0.46306025981903076, |
|
"learning_rate": 0.00029827817715520773, |
|
"loss": 0.5344, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.008072490968900729, |
|
"grad_norm": 0.5446106195449829, |
|
"learning_rate": 0.0002980239783812289, |
|
"loss": 0.6442, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008395390607656757, |
|
"grad_norm": 0.4747227430343628, |
|
"learning_rate": 0.0002977524083370822, |
|
"loss": 0.5129, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.008718290246412786, |
|
"grad_norm": 0.4436034560203552, |
|
"learning_rate": 0.00029746349889271645, |
|
"loss": 0.5176, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.009041189885168817, |
|
"grad_norm": 0.458401620388031, |
|
"learning_rate": 0.0002971572839529358, |
|
"loss": 0.5984, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.009364089523924845, |
|
"grad_norm": 0.41444727778434753, |
|
"learning_rate": 0.00029683379945342125, |
|
"loss": 0.5384, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.009686989162680874, |
|
"grad_norm": 0.4278819262981415, |
|
"learning_rate": 0.000296493083356513, |
|
"loss": 0.4995, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010009888801436903, |
|
"grad_norm": 0.5811014175415039, |
|
"learning_rate": 0.00029613517564675565, |
|
"loss": 0.5204, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.010332788440192933, |
|
"grad_norm": 0.43319976329803467, |
|
"learning_rate": 0.0002957601183262058, |
|
"loss": 0.5376, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010655688078948962, |
|
"grad_norm": 0.5323597192764282, |
|
"learning_rate": 0.000295367955409503, |
|
"loss": 0.5722, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01097858771770499, |
|
"grad_norm": 0.49440231919288635, |
|
"learning_rate": 0.00029495873291870436, |
|
"loss": 0.519, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01130148735646102, |
|
"grad_norm": 0.585383951663971, |
|
"learning_rate": 0.0002945324988778834, |
|
"loss": 0.5643, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01162438699521705, |
|
"grad_norm": 0.6207253336906433, |
|
"learning_rate": 0.00029408930330749477, |
|
"loss": 0.5647, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.011947286633973079, |
|
"grad_norm": 0.5334969162940979, |
|
"learning_rate": 0.0002936291982185036, |
|
"loss": 0.6132, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.012270186272729107, |
|
"grad_norm": 0.5715814828872681, |
|
"learning_rate": 0.00029315223760628217, |
|
"loss": 0.5453, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.012593085911485136, |
|
"grad_norm": 0.5439808964729309, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.6189, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.012915985550241166, |
|
"grad_norm": 0.4831181466579437, |
|
"learning_rate": 0.00029214797567742035, |
|
"loss": 0.558, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.013238885188997195, |
|
"grad_norm": 0.44526898860931396, |
|
"learning_rate": 0.00029162079221537, |
|
"loss": 0.5486, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.013561784827753224, |
|
"grad_norm": 0.4262256324291229, |
|
"learning_rate": 0.0002910769889254386, |
|
"loss": 0.5953, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013884684466509253, |
|
"grad_norm": 1.0412601232528687, |
|
"learning_rate": 0.0002905166296253533, |
|
"loss": 0.6746, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.014207584105265281, |
|
"grad_norm": 0.4333534836769104, |
|
"learning_rate": 0.0002899397800757626, |
|
"loss": 0.5598, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.014530483744021312, |
|
"grad_norm": 0.44549164175987244, |
|
"learning_rate": 0.0002893465079725187, |
|
"loss": 0.5453, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01485338338277734, |
|
"grad_norm": 0.4520653486251831, |
|
"learning_rate": 0.0002887368829387333, |
|
"loss": 0.55, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01517628302153337, |
|
"grad_norm": 0.4351714849472046, |
|
"learning_rate": 0.0002881109765166071, |
|
"loss": 0.5768, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.015499182660289398, |
|
"grad_norm": 0.49298447370529175, |
|
"learning_rate": 0.00028746886215903387, |
|
"loss": 0.5117, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01582208229904543, |
|
"grad_norm": 0.5363653302192688, |
|
"learning_rate": 0.00028681061522098047, |
|
"loss": 0.6614, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.016144981937801457, |
|
"grad_norm": 0.6216686367988586, |
|
"learning_rate": 0.0002861363129506435, |
|
"loss": 0.6039, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016144981937801457, |
|
"eval_loss": 0.5689713358879089, |
|
"eval_runtime": 93.1562, |
|
"eval_samples_per_second": 2.673, |
|
"eval_steps_per_second": 2.673, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016467881576557486, |
|
"grad_norm": 0.4990479052066803, |
|
"learning_rate": 0.0002854460344803842, |
|
"loss": 0.6526, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.016790781215313515, |
|
"grad_norm": 0.45471203327178955, |
|
"learning_rate": 0.00028473986081744163, |
|
"loss": 0.5901, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.017113680854069543, |
|
"grad_norm": 0.3418448567390442, |
|
"learning_rate": 0.000284017874834426, |
|
"loss": 0.5179, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.017436580492825572, |
|
"grad_norm": 0.5214569568634033, |
|
"learning_rate": 0.0002832801612595937, |
|
"loss": 0.531, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.017759480131581604, |
|
"grad_norm": 0.4000888764858246, |
|
"learning_rate": 0.0002825268066669034, |
|
"loss": 0.5312, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.018082379770337633, |
|
"grad_norm": 0.3511790335178375, |
|
"learning_rate": 0.00028175789946585693, |
|
"loss": 0.5187, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.018405279409093662, |
|
"grad_norm": 0.40245896577835083, |
|
"learning_rate": 0.0002809735298911234, |
|
"loss": 0.5141, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01872817904784969, |
|
"grad_norm": 0.3479350805282593, |
|
"learning_rate": 0.00028017378999195015, |
|
"loss": 0.5353, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.01905107868660572, |
|
"grad_norm": 0.3354577124118805, |
|
"learning_rate": 0.0002793587736213603, |
|
"loss": 0.5223, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.019373978325361748, |
|
"grad_norm": 0.36033692955970764, |
|
"learning_rate": 0.00027852857642513836, |
|
"loss": 0.535, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.019696877964117777, |
|
"grad_norm": 0.40051642060279846, |
|
"learning_rate": 0.00027768329583060635, |
|
"loss": 0.4658, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.020019777602873805, |
|
"grad_norm": 0.41961464285850525, |
|
"learning_rate": 0.00027682303103518976, |
|
"loss": 0.5517, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.020342677241629838, |
|
"grad_norm": 0.4324147403240204, |
|
"learning_rate": 0.00027594788299477655, |
|
"loss": 0.5352, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.020665576880385866, |
|
"grad_norm": 0.39792558550834656, |
|
"learning_rate": 0.0002750579544118695, |
|
"loss": 0.5369, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.020988476519141895, |
|
"grad_norm": 0.4185834228992462, |
|
"learning_rate": 0.00027415334972353357, |
|
"loss": 0.5323, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.021311376157897924, |
|
"grad_norm": 0.36977943778038025, |
|
"learning_rate": 0.0002732341750891397, |
|
"loss": 0.4811, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.021634275796653953, |
|
"grad_norm": 0.38211461901664734, |
|
"learning_rate": 0.00027230053837790666, |
|
"loss": 0.508, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02195717543540998, |
|
"grad_norm": 0.3872841000556946, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.4985, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02228007507416601, |
|
"grad_norm": 0.3474493622779846, |
|
"learning_rate": 0.0002703903186748843, |
|
"loss": 0.4411, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.02260297471292204, |
|
"grad_norm": 0.3461940586566925, |
|
"learning_rate": 0.00026941395985584653, |
|
"loss": 0.4987, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.022925874351678067, |
|
"grad_norm": 0.3902309238910675, |
|
"learning_rate": 0.00026842358727916524, |
|
"loss": 0.5151, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0232487739904341, |
|
"grad_norm": 0.34906888008117676, |
|
"learning_rate": 0.0002674193171694533, |
|
"loss": 0.5087, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02357167362919013, |
|
"grad_norm": 0.40804773569107056, |
|
"learning_rate": 0.0002664012673822609, |
|
"loss": 0.5772, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.023894573267946157, |
|
"grad_norm": 0.3935260474681854, |
|
"learning_rate": 0.0002653695573902443, |
|
"loss": 0.4854, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.024217472906702186, |
|
"grad_norm": 0.3976927101612091, |
|
"learning_rate": 0.0002643243082691454, |
|
"loss": 0.4943, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.024540372545458215, |
|
"grad_norm": 0.4370949864387512, |
|
"learning_rate": 0.0002632656426835831, |
|
"loss": 0.5562, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.024863272184214243, |
|
"grad_norm": 0.37720787525177, |
|
"learning_rate": 0.00026219368487265753, |
|
"loss": 0.4861, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.025186171822970272, |
|
"grad_norm": 0.38937804102897644, |
|
"learning_rate": 0.00026110856063537083, |
|
"loss": 0.4428, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0255090714617263, |
|
"grad_norm": 0.41925427317619324, |
|
"learning_rate": 0.00026001039731586334, |
|
"loss": 0.5127, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.025831971100482333, |
|
"grad_norm": 0.4304789900779724, |
|
"learning_rate": 0.0002588993237884696, |
|
"loss": 0.4446, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02615487073923836, |
|
"grad_norm": 0.4581892490386963, |
|
"learning_rate": 0.00025777547044259435, |
|
"loss": 0.5083, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.02647777037799439, |
|
"grad_norm": 0.40676695108413696, |
|
"learning_rate": 0.0002566389691674106, |
|
"loss": 0.4466, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.02680067001675042, |
|
"grad_norm": 0.4280904531478882, |
|
"learning_rate": 0.00025548995333638197, |
|
"loss": 0.5397, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.027123569655506448, |
|
"grad_norm": 0.500979483127594, |
|
"learning_rate": 0.00025432855779161076, |
|
"loss": 0.5162, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.027446469294262477, |
|
"grad_norm": 0.46338576078414917, |
|
"learning_rate": 0.00025315491882801347, |
|
"loss": 0.52, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.027769368933018505, |
|
"grad_norm": 0.4408024251461029, |
|
"learning_rate": 0.00025196917417732615, |
|
"loss": 0.5225, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.028092268571774534, |
|
"grad_norm": 0.5076299905776978, |
|
"learning_rate": 0.0002507714629919409, |
|
"loss": 0.5791, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.028415168210530563, |
|
"grad_norm": 0.4352111220359802, |
|
"learning_rate": 0.0002495619258285757, |
|
"loss": 0.5098, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.028738067849286595, |
|
"grad_norm": 0.5041966438293457, |
|
"learning_rate": 0.0002483407046317794, |
|
"loss": 0.5932, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.029060967488042624, |
|
"grad_norm": 0.452606737613678, |
|
"learning_rate": 0.00024710794271727413, |
|
"loss": 0.606, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.029383867126798652, |
|
"grad_norm": 0.40284180641174316, |
|
"learning_rate": 0.0002458637847551364, |
|
"loss": 0.537, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.02970676676555468, |
|
"grad_norm": 0.4412550628185272, |
|
"learning_rate": 0.00024460837675281926, |
|
"loss": 0.487, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.03002966640431071, |
|
"grad_norm": 0.4605487287044525, |
|
"learning_rate": 0.00024334186603801807, |
|
"loss": 0.5168, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03035256604306674, |
|
"grad_norm": 0.4492901861667633, |
|
"learning_rate": 0.00024206440124138062, |
|
"loss": 0.5853, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.030675465681822767, |
|
"grad_norm": 0.4852229356765747, |
|
"learning_rate": 0.0002407761322790648, |
|
"loss": 0.5914, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.030998365320578796, |
|
"grad_norm": 0.436937153339386, |
|
"learning_rate": 0.00023947721033514512, |
|
"loss": 0.5557, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.03132126495933483, |
|
"grad_norm": 0.49399349093437195, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.5293, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.03164416459809086, |
|
"grad_norm": 0.4873245358467102, |
|
"learning_rate": 0.0002368480184717773, |
|
"loss": 0.4905, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.031967064236846886, |
|
"grad_norm": 0.5261349678039551, |
|
"learning_rate": 0.00023551805709965147, |
|
"loss": 0.512, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.032289963875602914, |
|
"grad_norm": 0.6633840799331665, |
|
"learning_rate": 0.00023417805980435736, |
|
"loss": 0.5687, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.032289963875602914, |
|
"eval_loss": 0.549869179725647, |
|
"eval_runtime": 93.2109, |
|
"eval_samples_per_second": 2.671, |
|
"eval_steps_per_second": 2.671, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03261286351435894, |
|
"grad_norm": 0.5682958960533142, |
|
"learning_rate": 0.00023282818384051866, |
|
"loss": 0.6406, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.03293576315311497, |
|
"grad_norm": 0.4537736773490906, |
|
"learning_rate": 0.00023146858762206489, |
|
"loss": 0.5788, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.033258662791871, |
|
"grad_norm": 0.3675607442855835, |
|
"learning_rate": 0.00023009943070364044, |
|
"loss": 0.5094, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.03358156243062703, |
|
"grad_norm": 0.30980873107910156, |
|
"learning_rate": 0.0002287208737618801, |
|
"loss": 0.5131, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.03390446206938306, |
|
"grad_norm": 0.31538596749305725, |
|
"learning_rate": 0.00022733307857655325, |
|
"loss": 0.4996, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03422736170813909, |
|
"grad_norm": 0.3792458474636078, |
|
"learning_rate": 0.00022593620801157808, |
|
"loss": 0.5272, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.034550261346895116, |
|
"grad_norm": 0.3597868084907532, |
|
"learning_rate": 0.00022453042599590882, |
|
"loss": 0.5219, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.034873160985651144, |
|
"grad_norm": 0.3305104970932007, |
|
"learning_rate": 0.00022311589750429787, |
|
"loss": 0.4561, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.03519606062440717, |
|
"grad_norm": 0.30127620697021484, |
|
"learning_rate": 0.00022169278853793545, |
|
"loss": 0.4988, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.03551896026316321, |
|
"grad_norm": 0.34320175647735596, |
|
"learning_rate": 0.00022026126610496852, |
|
"loss": 0.5181, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03584185990191924, |
|
"grad_norm": 0.3284643292427063, |
|
"learning_rate": 0.0002188214982009016, |
|
"loss": 0.5342, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.036164759540675266, |
|
"grad_norm": 0.3455963730812073, |
|
"learning_rate": 0.00021737365378888187, |
|
"loss": 0.4768, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.036487659179431295, |
|
"grad_norm": 0.3220086097717285, |
|
"learning_rate": 0.00021591790277987043, |
|
"loss": 0.4888, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.036810558818187324, |
|
"grad_norm": 0.3551287353038788, |
|
"learning_rate": 0.00021445441601270276, |
|
"loss": 0.4567, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.03713345845694335, |
|
"grad_norm": 0.35259413719177246, |
|
"learning_rate": 0.00021298336523403968, |
|
"loss": 0.4779, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03745635809569938, |
|
"grad_norm": 0.3786124587059021, |
|
"learning_rate": 0.0002115049230782124, |
|
"loss": 0.4885, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.03777925773445541, |
|
"grad_norm": 0.3437775671482086, |
|
"learning_rate": 0.00021001926304696296, |
|
"loss": 0.4335, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.03810215737321144, |
|
"grad_norm": 0.3531520962715149, |
|
"learning_rate": 0.00020852655948908316, |
|
"loss": 0.4604, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.03842505701196747, |
|
"grad_norm": 0.374508261680603, |
|
"learning_rate": 0.0002070269875799538, |
|
"loss": 0.4603, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.038747956650723496, |
|
"grad_norm": 0.3716915249824524, |
|
"learning_rate": 0.00020552072330098716, |
|
"loss": 0.4878, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.039070856289479525, |
|
"grad_norm": 0.41532278060913086, |
|
"learning_rate": 0.0002040079434189748, |
|
"loss": 0.5234, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.03939375592823555, |
|
"grad_norm": 0.3630126416683197, |
|
"learning_rate": 0.00020248882546534326, |
|
"loss": 0.4509, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.03971665556699158, |
|
"grad_norm": 0.3788343071937561, |
|
"learning_rate": 0.00020096354771531976, |
|
"loss": 0.4989, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.04003955520574761, |
|
"grad_norm": 0.34830769896507263, |
|
"learning_rate": 0.00019943228916701104, |
|
"loss": 0.459, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.04036245484450364, |
|
"grad_norm": 0.3595748543739319, |
|
"learning_rate": 0.00019789522952039695, |
|
"loss": 0.4525, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.040685354483259675, |
|
"grad_norm": 0.3879833519458771, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.483, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.041008254122015704, |
|
"grad_norm": 0.44336938858032227, |
|
"learning_rate": 0.00019480442911492702, |
|
"loss": 0.5148, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.04133115376077173, |
|
"grad_norm": 0.4268869161605835, |
|
"learning_rate": 0.00019325105107520263, |
|
"loss": 0.5186, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.04165405339952776, |
|
"grad_norm": 0.40015068650245667, |
|
"learning_rate": 0.00019169259733286913, |
|
"loss": 0.4856, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.04197695303828379, |
|
"grad_norm": 0.3713054656982422, |
|
"learning_rate": 0.00019012925077938314, |
|
"loss": 0.4546, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04229985267703982, |
|
"grad_norm": 0.45219311118125916, |
|
"learning_rate": 0.0001885611948803941, |
|
"loss": 0.4606, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.04262275231579585, |
|
"grad_norm": 0.3732009828090668, |
|
"learning_rate": 0.0001869886136542143, |
|
"loss": 0.502, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.042945651954551876, |
|
"grad_norm": 0.5309696793556213, |
|
"learning_rate": 0.00018541169165022298, |
|
"loss": 0.591, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.043268551593307905, |
|
"grad_norm": 0.4451289772987366, |
|
"learning_rate": 0.00018383061392720913, |
|
"loss": 0.5503, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.043591451232063934, |
|
"grad_norm": 0.49492791295051575, |
|
"learning_rate": 0.0001822455660316536, |
|
"loss": 0.6013, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04391435087081996, |
|
"grad_norm": 0.4255249500274658, |
|
"learning_rate": 0.00018065673397595473, |
|
"loss": 0.5237, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.04423725050957599, |
|
"grad_norm": 0.35570111870765686, |
|
"learning_rate": 0.00017906430421659876, |
|
"loss": 0.4749, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.04456015014833202, |
|
"grad_norm": 0.4393974840641022, |
|
"learning_rate": 0.00017746846363227842, |
|
"loss": 0.5613, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.04488304978708805, |
|
"grad_norm": 0.4163194000720978, |
|
"learning_rate": 0.00017586939950196186, |
|
"loss": 0.5197, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.04520594942584408, |
|
"grad_norm": 0.40177619457244873, |
|
"learning_rate": 0.00017426729948291474, |
|
"loss": 0.5775, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.045528849064600106, |
|
"grad_norm": 0.38539931178092957, |
|
"learning_rate": 0.00017266235158867752, |
|
"loss": 0.5486, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.045851748703356135, |
|
"grad_norm": 0.3907434642314911, |
|
"learning_rate": 0.00017105474416700164, |
|
"loss": 0.52, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.04617464834211217, |
|
"grad_norm": 0.45413726568222046, |
|
"learning_rate": 0.0001694446658777458, |
|
"loss": 0.5052, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0464975479808682, |
|
"grad_norm": 0.3584253489971161, |
|
"learning_rate": 0.00016783230567073596, |
|
"loss": 0.4945, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.04682044761962423, |
|
"grad_norm": 0.38787075877189636, |
|
"learning_rate": 0.00016621785276359127, |
|
"loss": 0.5507, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04714334725838026, |
|
"grad_norm": 0.4273373484611511, |
|
"learning_rate": 0.0001646014966195185, |
|
"loss": 0.5588, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.047466246897136286, |
|
"grad_norm": 0.3610289692878723, |
|
"learning_rate": 0.00016298342692507763, |
|
"loss": 0.5056, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.047789146535892314, |
|
"grad_norm": 0.4982016682624817, |
|
"learning_rate": 0.00016136383356792156, |
|
"loss": 0.6056, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.04811204617464834, |
|
"grad_norm": 0.48546895384788513, |
|
"learning_rate": 0.0001597429066145116, |
|
"loss": 0.6287, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.04843494581340437, |
|
"grad_norm": 0.4680643379688263, |
|
"learning_rate": 0.0001581208362878126, |
|
"loss": 0.5451, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04843494581340437, |
|
"eval_loss": 0.5126909613609314, |
|
"eval_runtime": 93.2305, |
|
"eval_samples_per_second": 2.671, |
|
"eval_steps_per_second": 2.671, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0487578454521604, |
|
"grad_norm": 0.4066923260688782, |
|
"learning_rate": 0.00015649781294496933, |
|
"loss": 0.5668, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.04908074509091643, |
|
"grad_norm": 0.4061080813407898, |
|
"learning_rate": 0.00015487402705496707, |
|
"loss": 0.5461, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.04940364472967246, |
|
"grad_norm": 0.33173874020576477, |
|
"learning_rate": 0.0001532496691762796, |
|
"loss": 0.4937, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.04972654436842849, |
|
"grad_norm": 0.3234449625015259, |
|
"learning_rate": 0.00015162492993450597, |
|
"loss": 0.5055, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.050049444007184515, |
|
"grad_norm": 0.26888391375541687, |
|
"learning_rate": 0.00015, |
|
"loss": 0.4302, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.050372343645940544, |
|
"grad_norm": 0.2688259482383728, |
|
"learning_rate": 0.00014837507006549403, |
|
"loss": 0.4681, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.05069524328469657, |
|
"grad_norm": 0.29257914423942566, |
|
"learning_rate": 0.00014675033082372038, |
|
"loss": 0.4916, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.0510181429234526, |
|
"grad_norm": 0.3311769664287567, |
|
"learning_rate": 0.00014512597294503293, |
|
"loss": 0.4852, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.05134104256220863, |
|
"grad_norm": 0.3170052468776703, |
|
"learning_rate": 0.00014350218705503067, |
|
"loss": 0.4772, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.051663942200964666, |
|
"grad_norm": 0.34309279918670654, |
|
"learning_rate": 0.00014187916371218736, |
|
"loss": 0.4622, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.051986841839720695, |
|
"grad_norm": 0.32580074667930603, |
|
"learning_rate": 0.00014025709338548836, |
|
"loss": 0.4839, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.05230974147847672, |
|
"grad_norm": 0.317793071269989, |
|
"learning_rate": 0.00013863616643207844, |
|
"loss": 0.4923, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.05263264111723275, |
|
"grad_norm": 0.30965691804885864, |
|
"learning_rate": 0.00013701657307492235, |
|
"loss": 0.4769, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.05295554075598878, |
|
"grad_norm": 0.3157210052013397, |
|
"learning_rate": 0.00013539850338048154, |
|
"loss": 0.4798, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.05327844039474481, |
|
"grad_norm": 0.35382428765296936, |
|
"learning_rate": 0.00013378214723640876, |
|
"loss": 0.5212, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.05360134003350084, |
|
"grad_norm": 0.29083287715911865, |
|
"learning_rate": 0.00013216769432926404, |
|
"loss": 0.4257, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.05392423967225687, |
|
"grad_norm": 0.3195495009422302, |
|
"learning_rate": 0.00013055533412225422, |
|
"loss": 0.4177, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.054247139311012896, |
|
"grad_norm": 0.29507699608802795, |
|
"learning_rate": 0.00012894525583299833, |
|
"loss": 0.4311, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.054570038949768924, |
|
"grad_norm": 0.2950059175491333, |
|
"learning_rate": 0.0001273376484113225, |
|
"loss": 0.4188, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.05489293858852495, |
|
"grad_norm": 0.34078386425971985, |
|
"learning_rate": 0.0001257327005170853, |
|
"loss": 0.4737, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05521583822728098, |
|
"grad_norm": 0.3855750262737274, |
|
"learning_rate": 0.00012413060049803814, |
|
"loss": 0.455, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.05553873786603701, |
|
"grad_norm": 0.34931278228759766, |
|
"learning_rate": 0.00012253153636772156, |
|
"loss": 0.4584, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.05586163750479304, |
|
"grad_norm": 0.3456253707408905, |
|
"learning_rate": 0.00012093569578340124, |
|
"loss": 0.4152, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.05618453714354907, |
|
"grad_norm": 0.3462797999382019, |
|
"learning_rate": 0.00011934326602404528, |
|
"loss": 0.4644, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0565074367823051, |
|
"grad_norm": 0.3225034475326538, |
|
"learning_rate": 0.00011775443396834638, |
|
"loss": 0.4438, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.056830336421061126, |
|
"grad_norm": 0.3485172986984253, |
|
"learning_rate": 0.00011616938607279086, |
|
"loss": 0.4167, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.05715323605981716, |
|
"grad_norm": 0.36885136365890503, |
|
"learning_rate": 0.00011458830834977698, |
|
"loss": 0.4494, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.05747613569857319, |
|
"grad_norm": 0.40458542108535767, |
|
"learning_rate": 0.0001130113863457857, |
|
"loss": 0.4847, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.05779903533732922, |
|
"grad_norm": 0.3624725043773651, |
|
"learning_rate": 0.00011143880511960584, |
|
"loss": 0.4958, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.05812193497608525, |
|
"grad_norm": 0.3824242949485779, |
|
"learning_rate": 0.00010987074922061689, |
|
"loss": 0.4564, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.058444834614841276, |
|
"grad_norm": 0.3851178288459778, |
|
"learning_rate": 0.00010830740266713087, |
|
"loss": 0.4651, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.058767734253597305, |
|
"grad_norm": 0.43144652247428894, |
|
"learning_rate": 0.00010674894892479738, |
|
"loss": 0.4815, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.059090633892353334, |
|
"grad_norm": 0.389303982257843, |
|
"learning_rate": 0.00010519557088507298, |
|
"loss": 0.5031, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.05941353353110936, |
|
"grad_norm": 0.37136152386665344, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.4521, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.05973643316986539, |
|
"grad_norm": 0.3901714086532593, |
|
"learning_rate": 0.00010210477047960302, |
|
"loss": 0.4977, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.06005933280862142, |
|
"grad_norm": 0.4063364863395691, |
|
"learning_rate": 0.00010056771083298893, |
|
"loss": 0.4808, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.06038223244737745, |
|
"grad_norm": 0.408845454454422, |
|
"learning_rate": 9.903645228468024e-05, |
|
"loss": 0.4782, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.06070513208613348, |
|
"grad_norm": 0.3464532792568207, |
|
"learning_rate": 9.751117453465673e-05, |
|
"loss": 0.4462, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.061028031724889506, |
|
"grad_norm": 0.41235268115997314, |
|
"learning_rate": 9.59920565810252e-05, |
|
"loss": 0.4636, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.061350931363645535, |
|
"grad_norm": 0.3754219710826874, |
|
"learning_rate": 9.447927669901282e-05, |
|
"loss": 0.5001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06167383100240156, |
|
"grad_norm": 0.39120209217071533, |
|
"learning_rate": 9.297301242004618e-05, |
|
"loss": 0.5631, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.06199673064115759, |
|
"grad_norm": 0.47471514344215393, |
|
"learning_rate": 9.14734405109168e-05, |
|
"loss": 0.5029, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.06231963027991362, |
|
"grad_norm": 0.3913878798484802, |
|
"learning_rate": 8.998073695303701e-05, |
|
"loss": 0.5068, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.06264252991866966, |
|
"grad_norm": 0.4407348334789276, |
|
"learning_rate": 8.849507692178758e-05, |
|
"loss": 0.4856, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.06296542955742568, |
|
"grad_norm": 0.41722989082336426, |
|
"learning_rate": 8.70166347659603e-05, |
|
"loss": 0.5372, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.06328832919618171, |
|
"grad_norm": 0.35007795691490173, |
|
"learning_rate": 8.554558398729725e-05, |
|
"loss": 0.4814, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.06361122883493774, |
|
"grad_norm": 0.43563127517700195, |
|
"learning_rate": 8.408209722012956e-05, |
|
"loss": 0.5617, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.06393412847369377, |
|
"grad_norm": 0.5308802723884583, |
|
"learning_rate": 8.262634621111818e-05, |
|
"loss": 0.5746, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.0642570281124498, |
|
"grad_norm": 0.5026018023490906, |
|
"learning_rate": 8.117850179909842e-05, |
|
"loss": 0.6231, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.06457992775120583, |
|
"grad_norm": 0.5310789346694946, |
|
"learning_rate": 7.973873389503149e-05, |
|
"loss": 0.6351, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06457992775120583, |
|
"eval_loss": 0.4887339770793915, |
|
"eval_runtime": 92.9821, |
|
"eval_samples_per_second": 2.678, |
|
"eval_steps_per_second": 2.678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06490282738996185, |
|
"grad_norm": 0.318142294883728, |
|
"learning_rate": 7.830721146206451e-05, |
|
"loss": 0.5384, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.06522572702871789, |
|
"grad_norm": 0.288631409406662, |
|
"learning_rate": 7.688410249570214e-05, |
|
"loss": 0.5078, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.06554862666747392, |
|
"grad_norm": 0.280100554227829, |
|
"learning_rate": 7.54695740040912e-05, |
|
"loss": 0.4788, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.06587152630622994, |
|
"grad_norm": 0.279681533575058, |
|
"learning_rate": 7.406379198842189e-05, |
|
"loss": 0.4447, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.06619442594498598, |
|
"grad_norm": 0.2892783284187317, |
|
"learning_rate": 7.266692142344672e-05, |
|
"loss": 0.4932, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.066517325583742, |
|
"grad_norm": 0.2658500075340271, |
|
"learning_rate": 7.127912623811993e-05, |
|
"loss": 0.4682, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.06684022522249804, |
|
"grad_norm": 0.2946866452693939, |
|
"learning_rate": 6.990056929635957e-05, |
|
"loss": 0.4838, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.06716312486125406, |
|
"grad_norm": 0.2683822214603424, |
|
"learning_rate": 6.853141237793506e-05, |
|
"loss": 0.4408, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.0674860245000101, |
|
"grad_norm": 0.3225007653236389, |
|
"learning_rate": 6.717181615948126e-05, |
|
"loss": 0.4949, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.06780892413876612, |
|
"grad_norm": 0.25332513451576233, |
|
"learning_rate": 6.582194019564266e-05, |
|
"loss": 0.4141, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06813182377752215, |
|
"grad_norm": 0.2799530625343323, |
|
"learning_rate": 6.448194290034848e-05, |
|
"loss": 0.4445, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.06845472341627817, |
|
"grad_norm": 0.27327555418014526, |
|
"learning_rate": 6.315198152822272e-05, |
|
"loss": 0.4138, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.06877762305503421, |
|
"grad_norm": 0.3778553903102875, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 0.4804, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.06910052269379023, |
|
"grad_norm": 0.3077884614467621, |
|
"learning_rate": 6.052278966485491e-05, |
|
"loss": 0.4657, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.06942342233254627, |
|
"grad_norm": 0.29660362005233765, |
|
"learning_rate": 5.922386772093526e-05, |
|
"loss": 0.4297, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06974632197130229, |
|
"grad_norm": 0.3540116548538208, |
|
"learning_rate": 5.793559875861938e-05, |
|
"loss": 0.466, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.07006922161005832, |
|
"grad_norm": 0.2957676351070404, |
|
"learning_rate": 5.6658133961981894e-05, |
|
"loss": 0.4421, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.07039212124881435, |
|
"grad_norm": 0.3042965233325958, |
|
"learning_rate": 5.5391623247180744e-05, |
|
"loss": 0.441, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.07071502088757038, |
|
"grad_norm": 0.36982765793800354, |
|
"learning_rate": 5.413621524486363e-05, |
|
"loss": 0.4114, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.07103792052632642, |
|
"grad_norm": 0.3452307879924774, |
|
"learning_rate": 5.289205728272586e-05, |
|
"loss": 0.4562, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07136082016508244, |
|
"grad_norm": 0.3854043483734131, |
|
"learning_rate": 5.165929536822059e-05, |
|
"loss": 0.5003, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.07168371980383847, |
|
"grad_norm": 0.3237496018409729, |
|
"learning_rate": 5.043807417142436e-05, |
|
"loss": 0.4592, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.0720066194425945, |
|
"grad_norm": 0.32223159074783325, |
|
"learning_rate": 4.922853700805909e-05, |
|
"loss": 0.4553, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.07232951908135053, |
|
"grad_norm": 0.40129488706588745, |
|
"learning_rate": 4.8030825822673814e-05, |
|
"loss": 0.4276, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.07265241872010655, |
|
"grad_norm": 0.34809187054634094, |
|
"learning_rate": 4.684508117198648e-05, |
|
"loss": 0.4856, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.07297531835886259, |
|
"grad_norm": 0.3367185592651367, |
|
"learning_rate": 4.567144220838923e-05, |
|
"loss": 0.4555, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.07329821799761861, |
|
"grad_norm": 0.35933539271354675, |
|
"learning_rate": 4.4510046663617996e-05, |
|
"loss": 0.4837, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.07362111763637465, |
|
"grad_norm": 0.3718101382255554, |
|
"learning_rate": 4.336103083258942e-05, |
|
"loss": 0.4789, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.07394401727513067, |
|
"grad_norm": 0.3542415201663971, |
|
"learning_rate": 4.2224529557405645e-05, |
|
"loss": 0.5075, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.0742669169138867, |
|
"grad_norm": 0.3407626748085022, |
|
"learning_rate": 4.1100676211530404e-05, |
|
"loss": 0.4803, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07458981655264273, |
|
"grad_norm": 0.39396294951438904, |
|
"learning_rate": 3.998960268413666e-05, |
|
"loss": 0.5117, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.07491271619139876, |
|
"grad_norm": 0.3785285949707031, |
|
"learning_rate": 3.889143936462914e-05, |
|
"loss": 0.4925, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.07523561583015478, |
|
"grad_norm": 0.36613747477531433, |
|
"learning_rate": 3.780631512734241e-05, |
|
"loss": 0.4434, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.07555851546891082, |
|
"grad_norm": 0.3978104591369629, |
|
"learning_rate": 3.673435731641691e-05, |
|
"loss": 0.4613, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.07588141510766684, |
|
"grad_norm": 0.43552708625793457, |
|
"learning_rate": 3.567569173085454e-05, |
|
"loss": 0.4177, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07620431474642288, |
|
"grad_norm": 0.3718654215335846, |
|
"learning_rate": 3.463044260975566e-05, |
|
"loss": 0.4611, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.07652721438517891, |
|
"grad_norm": 0.41485676169395447, |
|
"learning_rate": 3.3598732617739036e-05, |
|
"loss": 0.5586, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.07685011402393493, |
|
"grad_norm": 0.37860673666000366, |
|
"learning_rate": 3.258068283054666e-05, |
|
"loss": 0.4256, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.07717301366269097, |
|
"grad_norm": 0.4362449645996094, |
|
"learning_rate": 3.1576412720834746e-05, |
|
"loss": 0.5763, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.07749591330144699, |
|
"grad_norm": 0.3914451003074646, |
|
"learning_rate": 3.058604014415343e-05, |
|
"loss": 0.4739, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07781881294020303, |
|
"grad_norm": 0.3677349388599396, |
|
"learning_rate": 2.960968132511567e-05, |
|
"loss": 0.4716, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.07814171257895905, |
|
"grad_norm": 0.3888345956802368, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 0.5218, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.07846461221771509, |
|
"grad_norm": 0.37700045108795166, |
|
"learning_rate": 2.7699461622093304e-05, |
|
"loss": 0.4978, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.0787875118564711, |
|
"grad_norm": 0.41537439823150635, |
|
"learning_rate": 2.67658249108603e-05, |
|
"loss": 0.4907, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.07911041149522714, |
|
"grad_norm": 0.40000054240226746, |
|
"learning_rate": 2.584665027646643e-05, |
|
"loss": 0.488, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07943331113398316, |
|
"grad_norm": 0.395548552274704, |
|
"learning_rate": 2.49420455881305e-05, |
|
"loss": 0.4847, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.0797562107727392, |
|
"grad_norm": 0.4183206558227539, |
|
"learning_rate": 2.4052117005223455e-05, |
|
"loss": 0.5261, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.08007911041149522, |
|
"grad_norm": 0.37241002917289734, |
|
"learning_rate": 2.317696896481024e-05, |
|
"loss": 0.499, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.08040201005025126, |
|
"grad_norm": 0.4700750410556793, |
|
"learning_rate": 2.231670416939364e-05, |
|
"loss": 0.435, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.08072490968900728, |
|
"grad_norm": 0.47890686988830566, |
|
"learning_rate": 2.147142357486164e-05, |
|
"loss": 0.6928, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08072490968900728, |
|
"eval_loss": 0.4805048406124115, |
|
"eval_runtime": 93.118, |
|
"eval_samples_per_second": 2.674, |
|
"eval_steps_per_second": 2.674, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08104780932776331, |
|
"grad_norm": 0.3123357892036438, |
|
"learning_rate": 2.0641226378639715e-05, |
|
"loss": 0.5109, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.08137070896651935, |
|
"grad_norm": 0.30325785279273987, |
|
"learning_rate": 1.9826210008049785e-05, |
|
"loss": 0.498, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.08169360860527537, |
|
"grad_norm": 0.2983933389186859, |
|
"learning_rate": 1.902647010887655e-05, |
|
"loss": 0.508, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.08201650824403141, |
|
"grad_norm": 0.29377394914627075, |
|
"learning_rate": 1.8242100534143062e-05, |
|
"loss": 0.486, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.08233940788278743, |
|
"grad_norm": 0.28709226846694946, |
|
"learning_rate": 1.7473193333096575e-05, |
|
"loss": 0.4685, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.08266230752154347, |
|
"grad_norm": 0.2827620804309845, |
|
"learning_rate": 1.671983874040631e-05, |
|
"loss": 0.4801, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.08298520716029949, |
|
"grad_norm": 0.3168405294418335, |
|
"learning_rate": 1.598212516557394e-05, |
|
"loss": 0.4902, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.08330810679905552, |
|
"grad_norm": 0.3135143518447876, |
|
"learning_rate": 1.526013918255836e-05, |
|
"loss": 0.5243, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.08363100643781154, |
|
"grad_norm": 0.25695309042930603, |
|
"learning_rate": 1.4553965519615723e-05, |
|
"loss": 0.4216, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.08395390607656758, |
|
"grad_norm": 0.2938316762447357, |
|
"learning_rate": 1.3863687049356464e-05, |
|
"loss": 0.4577, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0842768057153236, |
|
"grad_norm": 0.2999093234539032, |
|
"learning_rate": 1.3189384779019535e-05, |
|
"loss": 0.4935, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.08459970535407964, |
|
"grad_norm": 0.3224240839481354, |
|
"learning_rate": 1.25311378409661e-05, |
|
"loss": 0.4744, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.08492260499283566, |
|
"grad_norm": 0.29576462507247925, |
|
"learning_rate": 1.1889023483392879e-05, |
|
"loss": 0.4506, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.0852455046315917, |
|
"grad_norm": 0.2991703450679779, |
|
"learning_rate": 1.1263117061266675e-05, |
|
"loss": 0.4842, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.08556840427034772, |
|
"grad_norm": 0.3080856502056122, |
|
"learning_rate": 1.0653492027481286e-05, |
|
"loss": 0.4486, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08589130390910375, |
|
"grad_norm": 0.2527904510498047, |
|
"learning_rate": 1.0060219924237379e-05, |
|
"loss": 0.3798, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.08621420354785977, |
|
"grad_norm": 0.2680191397666931, |
|
"learning_rate": 9.48337037464666e-06, |
|
"loss": 0.4122, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.08653710318661581, |
|
"grad_norm": 0.29812344908714294, |
|
"learning_rate": 8.923011074561404e-06, |
|
"loss": 0.4546, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.08686000282537185, |
|
"grad_norm": 0.3110487163066864, |
|
"learning_rate": 8.379207784630004e-06, |
|
"loss": 0.4445, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.08718290246412787, |
|
"grad_norm": 0.32935261726379395, |
|
"learning_rate": 7.852024322579648e-06, |
|
"loss": 0.482, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0875058021028839, |
|
"grad_norm": 0.30921775102615356, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 0.4362, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.08782870174163993, |
|
"grad_norm": 0.3837946951389313, |
|
"learning_rate": 6.847762393717782e-06, |
|
"loss": 0.433, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.08815160138039596, |
|
"grad_norm": 0.2926897406578064, |
|
"learning_rate": 6.370801781496326e-06, |
|
"loss": 0.4659, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.08847450101915198, |
|
"grad_norm": 0.35898199677467346, |
|
"learning_rate": 5.910696692505201e-06, |
|
"loss": 0.506, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.08879740065790802, |
|
"grad_norm": 0.3298279345035553, |
|
"learning_rate": 5.467501122116563e-06, |
|
"loss": 0.5052, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08912030029666404, |
|
"grad_norm": 0.34559693932533264, |
|
"learning_rate": 5.0412670812956465e-06, |
|
"loss": 0.4997, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.08944319993542008, |
|
"grad_norm": 0.2868078947067261, |
|
"learning_rate": 4.6320445904969475e-06, |
|
"loss": 0.4047, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.0897660995741761, |
|
"grad_norm": 0.3573528528213501, |
|
"learning_rate": 4.239881673794165e-06, |
|
"loss": 0.481, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.09008899921293213, |
|
"grad_norm": 0.3438877463340759, |
|
"learning_rate": 3.864824353244367e-06, |
|
"loss": 0.5199, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.09041189885168815, |
|
"grad_norm": 0.3259707987308502, |
|
"learning_rate": 3.506916643487001e-06, |
|
"loss": 0.4441, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09073479849044419, |
|
"grad_norm": 0.36126869916915894, |
|
"learning_rate": 3.166200546578718e-06, |
|
"loss": 0.4598, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.09105769812920021, |
|
"grad_norm": 0.29352861642837524, |
|
"learning_rate": 2.8427160470641253e-06, |
|
"loss": 0.4116, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.09138059776795625, |
|
"grad_norm": 0.390318363904953, |
|
"learning_rate": 2.5365011072835117e-06, |
|
"loss": 0.457, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.09170349740671227, |
|
"grad_norm": 0.34145522117614746, |
|
"learning_rate": 2.2475916629177415e-06, |
|
"loss": 0.4275, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.0920263970454683, |
|
"grad_norm": 0.3860124945640564, |
|
"learning_rate": 1.9760216187710787e-06, |
|
"loss": 0.5023, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.09234929668422434, |
|
"grad_norm": 0.36518341302871704, |
|
"learning_rate": 1.7218228447922867e-06, |
|
"loss": 0.4925, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.09267219632298036, |
|
"grad_norm": 0.3913903832435608, |
|
"learning_rate": 1.4850251723345196e-06, |
|
"loss": 0.4858, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.0929950959617364, |
|
"grad_norm": 0.35096660256385803, |
|
"learning_rate": 1.2656563906545902e-06, |
|
"loss": 0.4196, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.09331799560049242, |
|
"grad_norm": 0.4638069272041321, |
|
"learning_rate": 1.0637422436516274e-06, |
|
"loss": 0.5741, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.09364089523924846, |
|
"grad_norm": 0.40387284755706787, |
|
"learning_rate": 8.793064268460604e-07, |
|
"loss": 0.4867, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09396379487800448, |
|
"grad_norm": 0.39819347858428955, |
|
"learning_rate": 7.123705845987093e-07, |
|
"loss": 0.4803, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.09428669451676051, |
|
"grad_norm": 0.3998761773109436, |
|
"learning_rate": 5.629543075708176e-07, |
|
"loss": 0.4755, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.09460959415551654, |
|
"grad_norm": 0.3553345799446106, |
|
"learning_rate": 4.310751304249738e-07, |
|
"loss": 0.5079, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.09493249379427257, |
|
"grad_norm": 0.35981830954551697, |
|
"learning_rate": 3.167485297673411e-07, |
|
"loss": 0.4487, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.09525539343302859, |
|
"grad_norm": 0.38175123929977417, |
|
"learning_rate": 2.1998792233142714e-07, |
|
"loss": 0.5537, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.09557829307178463, |
|
"grad_norm": 0.42106011509895325, |
|
"learning_rate": 1.4080466340349316e-07, |
|
"loss": 0.4318, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.09590119271054065, |
|
"grad_norm": 0.37637245655059814, |
|
"learning_rate": 7.92080454900701e-08, |
|
"loss": 0.5632, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.09622409234929669, |
|
"grad_norm": 0.3730914890766144, |
|
"learning_rate": 3.5205297227380855e-08, |
|
"loss": 0.4611, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.09654699198805271, |
|
"grad_norm": 0.5511401891708374, |
|
"learning_rate": 8.801582533035644e-09, |
|
"loss": 0.5325, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.09686989162680874, |
|
"grad_norm": 0.5235540270805359, |
|
"learning_rate": 0.0, |
|
"loss": 0.6263, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09686989162680874, |
|
"eval_loss": 0.4757327735424042, |
|
"eval_runtime": 92.9521, |
|
"eval_samples_per_second": 2.679, |
|
"eval_steps_per_second": 2.679, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7681879996301312e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|